##// END OF EJS Templates
sidedata: register the flag processors if the repository allows for it...
marmoute -
r43305:142deb53 default
parent child Browse files
Show More
@@ -1,2671 +1,2676 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_SIDEDATA,
57 )
58 )
58 from .thirdparty import (
59 from .thirdparty import (
59 attr,
60 attr,
60 )
61 )
61 from . import (
62 from . import (
62 ancestor,
63 ancestor,
63 dagop,
64 dagop,
64 error,
65 error,
65 mdiff,
66 mdiff,
66 policy,
67 policy,
67 pycompat,
68 pycompat,
68 templatefilters,
69 templatefilters,
69 util,
70 util,
70 )
71 )
71 from .interfaces import (
72 from .interfaces import (
72 repository,
73 repository,
73 util as interfaceutil,
74 util as interfaceutil,
74 )
75 )
75 from .revlogutils import (
76 from .revlogutils import (
76 deltas as deltautil,
77 deltas as deltautil,
77 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
78 )
80 )
79 from .utils import (
81 from .utils import (
80 storageutil,
82 storageutil,
81 stringutil,
83 stringutil,
82 )
84 )
83
85
84 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
86 REVLOGV0
88 REVLOGV0
87 REVLOGV1
89 REVLOGV1
88 REVLOGV2
90 REVLOGV2
89 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
98 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
102
105
103 parsers = policy.importmod(r'parsers')
106 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
107 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
108 rustdagop = policy.importrust(r'dagop')
106
109
107 # Aliased for performance.
110 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
109
112
110 # max size of revlog with inline data
113 # max size of revlog with inline data
111 _maxinline = 131072
114 _maxinline = 131072
112 _chunksize = 1048576
115 _chunksize = 1048576
113
116
114 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
119 return text, False, {}
117
120
118 def ellipsiswriteprocessor(rl, text, sidedata):
121 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
122 return text, False
120
123
121 def ellipsisrawprocessor(rl, text):
124 def ellipsisrawprocessor(rl, text):
122 return False
125 return False
123
126
124 ellipsisprocessor = (
127 ellipsisprocessor = (
125 ellipsisreadprocessor,
128 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
129 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
130 ellipsisrawprocessor,
128 )
131 )
129
132
130 def getoffset(q):
133 def getoffset(q):
131 return int(q >> 16)
134 return int(q >> 16)
132
135
133 def gettype(q):
136 def gettype(q):
134 return int(q & 0xFFFF)
137 return int(q & 0xFFFF)
135
138
136 def offset_type(offset, type):
139 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
141 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
140
143
141 @attr.s(slots=True, frozen=True)
144 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
145 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
146 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
147 node: expected hash of the revision
145 p1, p2: parent revs of the revision
148 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
149 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
150 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
151 flags: flags associated to the revision storage
149
152
150 One of btext[0] or cachedelta must be set.
153 One of btext[0] or cachedelta must be set.
151 """
154 """
152 node = attr.ib()
155 node = attr.ib()
153 p1 = attr.ib()
156 p1 = attr.ib()
154 p2 = attr.ib()
157 p2 = attr.ib()
155 btext = attr.ib()
158 btext = attr.ib()
156 textlen = attr.ib()
159 textlen = attr.ib()
157 cachedelta = attr.ib()
160 cachedelta = attr.ib()
158 flags = attr.ib()
161 flags = attr.ib()
159
162
160 @interfaceutil.implementer(repository.irevisiondelta)
163 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
164 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
165 class revlogrevisiondelta(object):
163 node = attr.ib()
166 node = attr.ib()
164 p1node = attr.ib()
167 p1node = attr.ib()
165 p2node = attr.ib()
168 p2node = attr.ib()
166 basenode = attr.ib()
169 basenode = attr.ib()
167 flags = attr.ib()
170 flags = attr.ib()
168 baserevisionsize = attr.ib()
171 baserevisionsize = attr.ib()
169 revision = attr.ib()
172 revision = attr.ib()
170 delta = attr.ib()
173 delta = attr.ib()
171 linknode = attr.ib(default=None)
174 linknode = attr.ib(default=None)
172
175
173 @interfaceutil.implementer(repository.iverifyproblem)
176 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
177 @attr.s(frozen=True)
175 class revlogproblem(object):
178 class revlogproblem(object):
176 warning = attr.ib(default=None)
179 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
180 error = attr.ib(default=None)
178 node = attr.ib(default=None)
181 node = attr.ib(default=None)
179
182
180 # index v0:
183 # index v0:
181 # 4 bytes: offset
184 # 4 bytes: offset
182 # 4 bytes: compressed length
185 # 4 bytes: compressed length
183 # 4 bytes: base rev
186 # 4 bytes: base rev
184 # 4 bytes: link rev
187 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
188 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
189 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
190 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
191 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
192 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
193 indexformatv0_unpack = indexformatv0.unpack
191
194
192 class revlogoldindex(list):
195 class revlogoldindex(list):
193 def __getitem__(self, i):
196 def __getitem__(self, i):
194 if i == -1:
197 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
198 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
199 return list.__getitem__(self, i)
197
200
198 class revlogoldio(object):
201 class revlogoldio(object):
199 def __init__(self):
202 def __init__(self):
200 self.size = indexformatv0.size
203 self.size = indexformatv0.size
201
204
202 def parseindex(self, data, inline):
205 def parseindex(self, data, inline):
203 s = self.size
206 s = self.size
204 index = []
207 index = []
205 nodemap = {nullid: nullrev}
208 nodemap = {nullid: nullrev}
206 n = off = 0
209 n = off = 0
207 l = len(data)
210 l = len(data)
208 while off + s <= l:
211 while off + s <= l:
209 cur = data[off:off + s]
212 cur = data[off:off + s]
210 off += s
213 off += s
211 e = indexformatv0_unpack(cur)
214 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
215 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
216 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
217 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
218 index.append(e2)
216 nodemap[e[6]] = n
219 nodemap[e[6]] = n
217 n += 1
220 n += 1
218
221
219 return revlogoldindex(index), nodemap, None
222 return revlogoldindex(index), nodemap, None
220
223
221 def packentry(self, entry, node, version, rev):
224 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
225 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
226 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
227 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
228 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
229 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
230 return indexformatv0_pack(*e2)
228
231
229 # index ng:
232 # index ng:
230 # 6 bytes: offset
233 # 6 bytes: offset
231 # 2 bytes: flags
234 # 2 bytes: flags
232 # 4 bytes: compressed length
235 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
236 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
237 # 4 bytes: base rev
235 # 4 bytes: link rev
238 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
239 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
240 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
241 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
242 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
243 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
244 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
245 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
246 versionformat_unpack = versionformat.unpack
244
247
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
248 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
249 # signed integer)
247 _maxentrysize = 0x7fffffff
250 _maxentrysize = 0x7fffffff
248
251
249 class revlogio(object):
252 class revlogio(object):
250 def __init__(self):
253 def __init__(self):
251 self.size = indexformatng.size
254 self.size = indexformatng.size
252
255
253 def parseindex(self, data, inline):
256 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
257 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
258 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
259 return index, getattr(index, 'nodemap', None), cache
257
260
258 def packentry(self, entry, node, version, rev):
261 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
262 p = indexformatng_pack(*entry)
260 if rev == 0:
263 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
264 p = versionformat_pack(version) + p[4:]
262 return p
265 return p
263
266
264 class revlog(object):
267 class revlog(object):
265 """
268 """
266 the underlying revision storage object
269 the underlying revision storage object
267
270
268 A revlog consists of two parts, an index and the revision data.
271 A revlog consists of two parts, an index and the revision data.
269
272
270 The index is a file with a fixed record size containing
273 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
274 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
275 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
276 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
277 contains a linkrev entry that can serve as a pointer to external
275 data.
278 data.
276
279
277 The revision data itself is a linear collection of data chunks.
280 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
281 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
282 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
283 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
284 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
285 its size, or O(1) relative to the number of revisions.
283
286
284 Both pieces of the revlog are written to in an append-only
287 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
288 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
289 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
290 for locking while reading.
288
291
289 If checkambig, indexfile is opened with checkambig=True at
292 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
293 writing, to avoid file stat ambiguity.
291
294
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
296 index will be mmapped rather than read if it is larger than the
294 configured threshold.
297 configured threshold.
295
298
296 If censorable is True, the revlog can have censored revisions.
299 If censorable is True, the revlog can have censored revisions.
297
300
298 If `upperboundcomp` is not None, this is the expected maximal gain from
301 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
302 compression for the data content.
300 """
303 """
301
304
302 _flagserrorclass = error.RevlogError
305 _flagserrorclass = error.RevlogError
303
306
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
307 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 mmaplargeindex=False, censorable=False,
308 mmaplargeindex=False, censorable=False,
306 upperboundcomp=None):
309 upperboundcomp=None):
307 """
310 """
308 create a revlog object
311 create a revlog object
309
312
310 opener is a function that abstracts the file opening operation
313 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
314 and can be used to implement COW semantics or the like.
312
315
313 """
316 """
314 self.upperboundcomp = upperboundcomp
317 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
318 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + ".d")
319 self.datafile = datafile or (indexfile[:-2] + ".d")
317 self.opener = opener
320 self.opener = opener
318 # When True, indexfile is opened with checkambig=True at writing, to
321 # When True, indexfile is opened with checkambig=True at writing, to
319 # avoid file stat ambiguity.
322 # avoid file stat ambiguity.
320 self._checkambig = checkambig
323 self._checkambig = checkambig
321 self._mmaplargeindex = mmaplargeindex
324 self._mmaplargeindex = mmaplargeindex
322 self._censorable = censorable
325 self._censorable = censorable
323 # 3-tuple of (node, rev, text) for a raw revision.
326 # 3-tuple of (node, rev, text) for a raw revision.
324 self._revisioncache = None
327 self._revisioncache = None
325 # Maps rev to chain base rev.
328 # Maps rev to chain base rev.
326 self._chainbasecache = util.lrucachedict(100)
329 self._chainbasecache = util.lrucachedict(100)
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
330 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 self._chunkcache = (0, '')
331 self._chunkcache = (0, '')
329 # How much data to read and cache into the raw revlog data cache.
332 # How much data to read and cache into the raw revlog data cache.
330 self._chunkcachesize = 65536
333 self._chunkcachesize = 65536
331 self._maxchainlen = None
334 self._maxchainlen = None
332 self._deltabothparents = True
335 self._deltabothparents = True
333 self.index = []
336 self.index = []
334 # Mapping of partial identifiers to full nodes.
337 # Mapping of partial identifiers to full nodes.
335 self._pcache = {}
338 self._pcache = {}
336 # Mapping of revision integer to full node.
339 # Mapping of revision integer to full node.
337 self._nodecache = {nullid: nullrev}
340 self._nodecache = {nullid: nullrev}
338 self._nodepos = None
341 self._nodepos = None
339 self._compengine = 'zlib'
342 self._compengine = 'zlib'
340 self._compengineopts = {}
343 self._compengineopts = {}
341 self._maxdeltachainspan = -1
344 self._maxdeltachainspan = -1
342 self._withsparseread = False
345 self._withsparseread = False
343 self._sparserevlog = False
346 self._sparserevlog = False
344 self._srdensitythreshold = 0.50
347 self._srdensitythreshold = 0.50
345 self._srmingapsize = 262144
348 self._srmingapsize = 262144
346
349
347 # Make copy of flag processors so each revlog instance can support
350 # Make copy of flag processors so each revlog instance can support
348 # custom flags.
351 # custom flags.
349 self._flagprocessors = dict(flagutil.flagprocessors)
352 self._flagprocessors = dict(flagutil.flagprocessors)
350
353
351 # 2-tuple of file handles being used for active writing.
354 # 2-tuple of file handles being used for active writing.
352 self._writinghandles = None
355 self._writinghandles = None
353
356
354 self._loadindex()
357 self._loadindex()
355
358
356 def _loadindex(self):
359 def _loadindex(self):
357 mmapindexthreshold = None
360 mmapindexthreshold = None
358 opts = self.opener.options
361 opts = self.opener.options
359
362
360 if 'revlogv2' in opts:
363 if 'revlogv2' in opts:
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
364 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 elif 'revlogv1' in opts:
365 elif 'revlogv1' in opts:
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
366 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 if 'generaldelta' in opts:
367 if 'generaldelta' in opts:
365 newversionflags |= FLAG_GENERALDELTA
368 newversionflags |= FLAG_GENERALDELTA
366 elif 'revlogv0' in self.opener.options:
369 elif 'revlogv0' in self.opener.options:
367 newversionflags = REVLOGV0
370 newversionflags = REVLOGV0
368 else:
371 else:
369 newversionflags = REVLOG_DEFAULT_VERSION
372 newversionflags = REVLOG_DEFAULT_VERSION
370
373
371 if 'chunkcachesize' in opts:
374 if 'chunkcachesize' in opts:
372 self._chunkcachesize = opts['chunkcachesize']
375 self._chunkcachesize = opts['chunkcachesize']
373 if 'maxchainlen' in opts:
376 if 'maxchainlen' in opts:
374 self._maxchainlen = opts['maxchainlen']
377 self._maxchainlen = opts['maxchainlen']
375 if 'deltabothparents' in opts:
378 if 'deltabothparents' in opts:
376 self._deltabothparents = opts['deltabothparents']
379 self._deltabothparents = opts['deltabothparents']
377 self._lazydelta = bool(opts.get('lazydelta', True))
380 self._lazydelta = bool(opts.get('lazydelta', True))
378 self._lazydeltabase = False
381 self._lazydeltabase = False
379 if self._lazydelta:
382 if self._lazydelta:
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 if 'compengine' in opts:
384 if 'compengine' in opts:
382 self._compengine = opts['compengine']
385 self._compengine = opts['compengine']
383 if 'zlib.level' in opts:
386 if 'zlib.level' in opts:
384 self._compengineopts['zlib.level'] = opts['zlib.level']
387 self._compengineopts['zlib.level'] = opts['zlib.level']
385 if 'zstd.level' in opts:
388 if 'zstd.level' in opts:
386 self._compengineopts['zstd.level'] = opts['zstd.level']
389 self._compengineopts['zstd.level'] = opts['zstd.level']
387 if 'maxdeltachainspan' in opts:
390 if 'maxdeltachainspan' in opts:
388 self._maxdeltachainspan = opts['maxdeltachainspan']
391 self._maxdeltachainspan = opts['maxdeltachainspan']
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 mmapindexthreshold = opts['mmapindexthreshold']
393 mmapindexthreshold = opts['mmapindexthreshold']
391 self.hassidedata = bool(opts.get('side-data', False))
394 self.hassidedata = bool(opts.get('side-data', False))
395 if self.hassidedata:
396 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
392 self._sparserevlog = bool(opts.get('sparse-revlog', False))
397 self._sparserevlog = bool(opts.get('sparse-revlog', False))
393 withsparseread = bool(opts.get('with-sparse-read', False))
398 withsparseread = bool(opts.get('with-sparse-read', False))
394 # sparse-revlog forces sparse-read
399 # sparse-revlog forces sparse-read
395 self._withsparseread = self._sparserevlog or withsparseread
400 self._withsparseread = self._sparserevlog or withsparseread
396 if 'sparse-read-density-threshold' in opts:
401 if 'sparse-read-density-threshold' in opts:
397 self._srdensitythreshold = opts['sparse-read-density-threshold']
402 self._srdensitythreshold = opts['sparse-read-density-threshold']
398 if 'sparse-read-min-gap-size' in opts:
403 if 'sparse-read-min-gap-size' in opts:
399 self._srmingapsize = opts['sparse-read-min-gap-size']
404 self._srmingapsize = opts['sparse-read-min-gap-size']
400 if opts.get('enableellipsis'):
405 if opts.get('enableellipsis'):
401 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
406 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
402
407
403 # revlog v0 doesn't have flag processors
408 # revlog v0 doesn't have flag processors
404 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
409 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
405 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
410 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
406
411
407 if self._chunkcachesize <= 0:
412 if self._chunkcachesize <= 0:
408 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 raise error.RevlogError(_('revlog chunk cache size %r is not '
409 'greater than 0') % self._chunkcachesize)
414 'greater than 0') % self._chunkcachesize)
410 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 elif self._chunkcachesize & (self._chunkcachesize - 1):
411 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 raise error.RevlogError(_('revlog chunk cache size %r is not a '
412 'power of 2') % self._chunkcachesize)
417 'power of 2') % self._chunkcachesize)
413
418
414 indexdata = ''
419 indexdata = ''
415 self._initempty = True
420 self._initempty = True
416 try:
421 try:
417 with self._indexfp() as f:
422 with self._indexfp() as f:
418 if (mmapindexthreshold is not None and
423 if (mmapindexthreshold is not None and
419 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 self.opener.fstat(f).st_size >= mmapindexthreshold):
420 # TODO: should .close() to release resources without
425 # TODO: should .close() to release resources without
421 # relying on Python GC
426 # relying on Python GC
422 indexdata = util.buffer(util.mmapread(f))
427 indexdata = util.buffer(util.mmapread(f))
423 else:
428 else:
424 indexdata = f.read()
429 indexdata = f.read()
425 if len(indexdata) > 0:
430 if len(indexdata) > 0:
426 versionflags = versionformat_unpack(indexdata[:4])[0]
431 versionflags = versionformat_unpack(indexdata[:4])[0]
427 self._initempty = False
432 self._initempty = False
428 else:
433 else:
429 versionflags = newversionflags
434 versionflags = newversionflags
430 except IOError as inst:
435 except IOError as inst:
431 if inst.errno != errno.ENOENT:
436 if inst.errno != errno.ENOENT:
432 raise
437 raise
433
438
434 versionflags = newversionflags
439 versionflags = newversionflags
435
440
436 self.version = versionflags
441 self.version = versionflags
437
442
438 flags = versionflags & ~0xFFFF
443 flags = versionflags & ~0xFFFF
439 fmt = versionflags & 0xFFFF
444 fmt = versionflags & 0xFFFF
440
445
441 if fmt == REVLOGV0:
446 if fmt == REVLOGV0:
442 if flags:
447 if flags:
443 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
448 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 'revlog %s') %
449 'revlog %s') %
445 (flags >> 16, fmt, self.indexfile))
450 (flags >> 16, fmt, self.indexfile))
446
451
447 self._inline = False
452 self._inline = False
448 self._generaldelta = False
453 self._generaldelta = False
449
454
450 elif fmt == REVLOGV1:
455 elif fmt == REVLOGV1:
451 if flags & ~REVLOGV1_FLAGS:
456 if flags & ~REVLOGV1_FLAGS:
452 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
453 'revlog %s') %
458 'revlog %s') %
454 (flags >> 16, fmt, self.indexfile))
459 (flags >> 16, fmt, self.indexfile))
455
460
456 self._inline = versionflags & FLAG_INLINE_DATA
461 self._inline = versionflags & FLAG_INLINE_DATA
457 self._generaldelta = versionflags & FLAG_GENERALDELTA
462 self._generaldelta = versionflags & FLAG_GENERALDELTA
458
463
459 elif fmt == REVLOGV2:
464 elif fmt == REVLOGV2:
460 if flags & ~REVLOGV2_FLAGS:
465 if flags & ~REVLOGV2_FLAGS:
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 'revlog %s') %
467 'revlog %s') %
463 (flags >> 16, fmt, self.indexfile))
468 (flags >> 16, fmt, self.indexfile))
464
469
465 self._inline = versionflags & FLAG_INLINE_DATA
470 self._inline = versionflags & FLAG_INLINE_DATA
466 # generaldelta implied by version 2 revlogs.
471 # generaldelta implied by version 2 revlogs.
467 self._generaldelta = True
472 self._generaldelta = True
468
473
469 else:
474 else:
470 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
475 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
471 (fmt, self.indexfile))
476 (fmt, self.indexfile))
472 # sparse-revlog can't be on without general-delta (issue6056)
477 # sparse-revlog can't be on without general-delta (issue6056)
473 if not self._generaldelta:
478 if not self._generaldelta:
474 self._sparserevlog = False
479 self._sparserevlog = False
475
480
476 self._storedeltachains = True
481 self._storedeltachains = True
477
482
478 self._io = revlogio()
483 self._io = revlogio()
479 if self.version == REVLOGV0:
484 if self.version == REVLOGV0:
480 self._io = revlogoldio()
485 self._io = revlogoldio()
481 try:
486 try:
482 d = self._io.parseindex(indexdata, self._inline)
487 d = self._io.parseindex(indexdata, self._inline)
483 except (ValueError, IndexError):
488 except (ValueError, IndexError):
484 raise error.RevlogError(_("index %s is corrupted") %
489 raise error.RevlogError(_("index %s is corrupted") %
485 self.indexfile)
490 self.indexfile)
486 self.index, nodemap, self._chunkcache = d
491 self.index, nodemap, self._chunkcache = d
487 if nodemap is not None:
492 if nodemap is not None:
488 self.nodemap = self._nodecache = nodemap
493 self.nodemap = self._nodecache = nodemap
489 if not self._chunkcache:
494 if not self._chunkcache:
490 self._chunkclear()
495 self._chunkclear()
491 # revnum -> (chain-length, sum-delta-length)
496 # revnum -> (chain-length, sum-delta-length)
492 self._chaininfocache = {}
497 self._chaininfocache = {}
493 # revlog header -> revlog compressor
498 # revlog header -> revlog compressor
494 self._decompressors = {}
499 self._decompressors = {}
495
500
496 @util.propertycache
501 @util.propertycache
497 def _compressor(self):
502 def _compressor(self):
498 engine = util.compengines[self._compengine]
503 engine = util.compengines[self._compengine]
499 return engine.revlogcompressor(self._compengineopts)
504 return engine.revlogcompressor(self._compengineopts)
500
505
501 def _indexfp(self, mode='r'):
506 def _indexfp(self, mode='r'):
502 """file object for the revlog's index file"""
507 """file object for the revlog's index file"""
503 args = {r'mode': mode}
508 args = {r'mode': mode}
504 if mode != 'r':
509 if mode != 'r':
505 args[r'checkambig'] = self._checkambig
510 args[r'checkambig'] = self._checkambig
506 if mode == 'w':
511 if mode == 'w':
507 args[r'atomictemp'] = True
512 args[r'atomictemp'] = True
508 return self.opener(self.indexfile, **args)
513 return self.opener(self.indexfile, **args)
509
514
510 def _datafp(self, mode='r'):
515 def _datafp(self, mode='r'):
511 """file object for the revlog's data file"""
516 """file object for the revlog's data file"""
512 return self.opener(self.datafile, mode=mode)
517 return self.opener(self.datafile, mode=mode)
513
518
514 @contextlib.contextmanager
519 @contextlib.contextmanager
515 def _datareadfp(self, existingfp=None):
520 def _datareadfp(self, existingfp=None):
516 """file object suitable to read data"""
521 """file object suitable to read data"""
517 # Use explicit file handle, if given.
522 # Use explicit file handle, if given.
518 if existingfp is not None:
523 if existingfp is not None:
519 yield existingfp
524 yield existingfp
520
525
521 # Use a file handle being actively used for writes, if available.
526 # Use a file handle being actively used for writes, if available.
522 # There is some danger to doing this because reads will seek the
527 # There is some danger to doing this because reads will seek the
523 # file. However, _writeentry() performs a SEEK_END before all writes,
528 # file. However, _writeentry() performs a SEEK_END before all writes,
524 # so we should be safe.
529 # so we should be safe.
525 elif self._writinghandles:
530 elif self._writinghandles:
526 if self._inline:
531 if self._inline:
527 yield self._writinghandles[0]
532 yield self._writinghandles[0]
528 else:
533 else:
529 yield self._writinghandles[1]
534 yield self._writinghandles[1]
530
535
531 # Otherwise open a new file handle.
536 # Otherwise open a new file handle.
532 else:
537 else:
533 if self._inline:
538 if self._inline:
534 func = self._indexfp
539 func = self._indexfp
535 else:
540 else:
536 func = self._datafp
541 func = self._datafp
537 with func() as fp:
542 with func() as fp:
538 yield fp
543 yield fp
539
544
540 def tip(self):
545 def tip(self):
541 return self.node(len(self.index) - 1)
546 return self.node(len(self.index) - 1)
542 def __contains__(self, rev):
547 def __contains__(self, rev):
543 return 0 <= rev < len(self)
548 return 0 <= rev < len(self)
544 def __len__(self):
549 def __len__(self):
545 return len(self.index)
550 return len(self.index)
546 def __iter__(self):
551 def __iter__(self):
547 return iter(pycompat.xrange(len(self)))
552 return iter(pycompat.xrange(len(self)))
548 def revs(self, start=0, stop=None):
553 def revs(self, start=0, stop=None):
549 """iterate over all rev in this revlog (from start to stop)"""
554 """iterate over all rev in this revlog (from start to stop)"""
550 return storageutil.iterrevs(len(self), start=start, stop=stop)
555 return storageutil.iterrevs(len(self), start=start, stop=stop)
551
556
552 @util.propertycache
557 @util.propertycache
553 def nodemap(self):
558 def nodemap(self):
554 if self.index:
559 if self.index:
555 # populate mapping down to the initial node
560 # populate mapping down to the initial node
556 node0 = self.index[0][7] # get around changelog filtering
561 node0 = self.index[0][7] # get around changelog filtering
557 self.rev(node0)
562 self.rev(node0)
558 return self._nodecache
563 return self._nodecache
559
564
560 def hasnode(self, node):
565 def hasnode(self, node):
561 try:
566 try:
562 self.rev(node)
567 self.rev(node)
563 return True
568 return True
564 except KeyError:
569 except KeyError:
565 return False
570 return False
566
571
567 def candelta(self, baserev, rev):
572 def candelta(self, baserev, rev):
568 """whether two revisions (baserev, rev) can be delta-ed or not"""
573 """whether two revisions (baserev, rev) can be delta-ed or not"""
569 # Disable delta if either rev requires a content-changing flag
574 # Disable delta if either rev requires a content-changing flag
570 # processor (ex. LFS). This is because such flag processor can alter
575 # processor (ex. LFS). This is because such flag processor can alter
571 # the rawtext content that the delta will be based on, and two clients
576 # the rawtext content that the delta will be based on, and two clients
572 # could have a same revlog node with different flags (i.e. different
577 # could have a same revlog node with different flags (i.e. different
573 # rawtext contents) and the delta could be incompatible.
578 # rawtext contents) and the delta could be incompatible.
574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
579 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
580 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
576 return False
581 return False
577 return True
582 return True
578
583
579 def clearcaches(self):
584 def clearcaches(self):
580 self._revisioncache = None
585 self._revisioncache = None
581 self._chainbasecache.clear()
586 self._chainbasecache.clear()
582 self._chunkcache = (0, '')
587 self._chunkcache = (0, '')
583 self._pcache = {}
588 self._pcache = {}
584
589
585 try:
590 try:
586 # If we are using the native C version, you are in a fun case
591 # If we are using the native C version, you are in a fun case
587 # where self.index, self.nodemap and self._nodecaches is the same
592 # where self.index, self.nodemap and self._nodecaches is the same
588 # object.
593 # object.
589 self._nodecache.clearcaches()
594 self._nodecache.clearcaches()
590 except AttributeError:
595 except AttributeError:
591 self._nodecache = {nullid: nullrev}
596 self._nodecache = {nullid: nullrev}
592 self._nodepos = None
597 self._nodepos = None
593
598
594 def rev(self, node):
599 def rev(self, node):
595 try:
600 try:
596 return self._nodecache[node]
601 return self._nodecache[node]
597 except TypeError:
602 except TypeError:
598 raise
603 raise
599 except error.RevlogError:
604 except error.RevlogError:
600 # parsers.c radix tree lookup failed
605 # parsers.c radix tree lookup failed
601 if node == wdirid or node in wdirfilenodeids:
606 if node == wdirid or node in wdirfilenodeids:
602 raise error.WdirUnsupported
607 raise error.WdirUnsupported
603 raise error.LookupError(node, self.indexfile, _('no node'))
608 raise error.LookupError(node, self.indexfile, _('no node'))
604 except KeyError:
609 except KeyError:
605 # pure python cache lookup failed
610 # pure python cache lookup failed
606 n = self._nodecache
611 n = self._nodecache
607 i = self.index
612 i = self.index
608 p = self._nodepos
613 p = self._nodepos
609 if p is None:
614 if p is None:
610 p = len(i) - 1
615 p = len(i) - 1
611 else:
616 else:
612 assert p < len(i)
617 assert p < len(i)
613 for r in pycompat.xrange(p, -1, -1):
618 for r in pycompat.xrange(p, -1, -1):
614 v = i[r][7]
619 v = i[r][7]
615 n[v] = r
620 n[v] = r
616 if v == node:
621 if v == node:
617 self._nodepos = r - 1
622 self._nodepos = r - 1
618 return r
623 return r
619 if node == wdirid or node in wdirfilenodeids:
624 if node == wdirid or node in wdirfilenodeids:
620 raise error.WdirUnsupported
625 raise error.WdirUnsupported
621 raise error.LookupError(node, self.indexfile, _('no node'))
626 raise error.LookupError(node, self.indexfile, _('no node'))
622
627
623 # Accessors for index entries.
628 # Accessors for index entries.
624
629
625 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
630 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
626 # are flags.
631 # are flags.
627 def start(self, rev):
632 def start(self, rev):
628 return int(self.index[rev][0] >> 16)
633 return int(self.index[rev][0] >> 16)
629
634
630 def flags(self, rev):
635 def flags(self, rev):
631 return self.index[rev][0] & 0xFFFF
636 return self.index[rev][0] & 0xFFFF
632
637
633 def length(self, rev):
638 def length(self, rev):
634 return self.index[rev][1]
639 return self.index[rev][1]
635
640
636 def rawsize(self, rev):
641 def rawsize(self, rev):
637 """return the length of the uncompressed text for a given revision"""
642 """return the length of the uncompressed text for a given revision"""
638 l = self.index[rev][2]
643 l = self.index[rev][2]
639 if l >= 0:
644 if l >= 0:
640 return l
645 return l
641
646
642 t = self.rawdata(rev)
647 t = self.rawdata(rev)
643 return len(t)
648 return len(t)
644
649
645 def size(self, rev):
650 def size(self, rev):
646 """length of non-raw text (processed by a "read" flag processor)"""
651 """length of non-raw text (processed by a "read" flag processor)"""
647 # fast path: if no "read" flag processor could change the content,
652 # fast path: if no "read" flag processor could change the content,
648 # size is rawsize. note: ELLIPSIS is known to not change the content.
653 # size is rawsize. note: ELLIPSIS is known to not change the content.
649 flags = self.flags(rev)
654 flags = self.flags(rev)
650 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
655 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
651 return self.rawsize(rev)
656 return self.rawsize(rev)
652
657
653 return len(self.revision(rev, raw=False))
658 return len(self.revision(rev, raw=False))
654
659
655 def chainbase(self, rev):
660 def chainbase(self, rev):
656 base = self._chainbasecache.get(rev)
661 base = self._chainbasecache.get(rev)
657 if base is not None:
662 if base is not None:
658 return base
663 return base
659
664
660 index = self.index
665 index = self.index
661 iterrev = rev
666 iterrev = rev
662 base = index[iterrev][3]
667 base = index[iterrev][3]
663 while base != iterrev:
668 while base != iterrev:
664 iterrev = base
669 iterrev = base
665 base = index[iterrev][3]
670 base = index[iterrev][3]
666
671
667 self._chainbasecache[rev] = base
672 self._chainbasecache[rev] = base
668 return base
673 return base
669
674
670 def linkrev(self, rev):
675 def linkrev(self, rev):
671 return self.index[rev][4]
676 return self.index[rev][4]
672
677
673 def parentrevs(self, rev):
678 def parentrevs(self, rev):
674 try:
679 try:
675 entry = self.index[rev]
680 entry = self.index[rev]
676 except IndexError:
681 except IndexError:
677 if rev == wdirrev:
682 if rev == wdirrev:
678 raise error.WdirUnsupported
683 raise error.WdirUnsupported
679 raise
684 raise
680
685
681 return entry[5], entry[6]
686 return entry[5], entry[6]
682
687
683 # fast parentrevs(rev) where rev isn't filtered
688 # fast parentrevs(rev) where rev isn't filtered
684 _uncheckedparentrevs = parentrevs
689 _uncheckedparentrevs = parentrevs
685
690
686 def node(self, rev):
691 def node(self, rev):
687 try:
692 try:
688 return self.index[rev][7]
693 return self.index[rev][7]
689 except IndexError:
694 except IndexError:
690 if rev == wdirrev:
695 if rev == wdirrev:
691 raise error.WdirUnsupported
696 raise error.WdirUnsupported
692 raise
697 raise
693
698
694 # Derived from index values.
699 # Derived from index values.
695
700
696 def end(self, rev):
701 def end(self, rev):
697 return self.start(rev) + self.length(rev)
702 return self.start(rev) + self.length(rev)
698
703
699 def parents(self, node):
704 def parents(self, node):
700 i = self.index
705 i = self.index
701 d = i[self.rev(node)]
706 d = i[self.rev(node)]
702 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
707 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
703
708
704 def chainlen(self, rev):
709 def chainlen(self, rev):
705 return self._chaininfo(rev)[0]
710 return self._chaininfo(rev)[0]
706
711
707 def _chaininfo(self, rev):
712 def _chaininfo(self, rev):
708 chaininfocache = self._chaininfocache
713 chaininfocache = self._chaininfocache
709 if rev in chaininfocache:
714 if rev in chaininfocache:
710 return chaininfocache[rev]
715 return chaininfocache[rev]
711 index = self.index
716 index = self.index
712 generaldelta = self._generaldelta
717 generaldelta = self._generaldelta
713 iterrev = rev
718 iterrev = rev
714 e = index[iterrev]
719 e = index[iterrev]
715 clen = 0
720 clen = 0
716 compresseddeltalen = 0
721 compresseddeltalen = 0
717 while iterrev != e[3]:
722 while iterrev != e[3]:
718 clen += 1
723 clen += 1
719 compresseddeltalen += e[1]
724 compresseddeltalen += e[1]
720 if generaldelta:
725 if generaldelta:
721 iterrev = e[3]
726 iterrev = e[3]
722 else:
727 else:
723 iterrev -= 1
728 iterrev -= 1
724 if iterrev in chaininfocache:
729 if iterrev in chaininfocache:
725 t = chaininfocache[iterrev]
730 t = chaininfocache[iterrev]
726 clen += t[0]
731 clen += t[0]
727 compresseddeltalen += t[1]
732 compresseddeltalen += t[1]
728 break
733 break
729 e = index[iterrev]
734 e = index[iterrev]
730 else:
735 else:
731 # Add text length of base since decompressing that also takes
736 # Add text length of base since decompressing that also takes
732 # work. For cache hits the length is already included.
737 # work. For cache hits the length is already included.
733 compresseddeltalen += e[1]
738 compresseddeltalen += e[1]
734 r = (clen, compresseddeltalen)
739 r = (clen, compresseddeltalen)
735 chaininfocache[rev] = r
740 chaininfocache[rev] = r
736 return r
741 return r
737
742
738 def _deltachain(self, rev, stoprev=None):
743 def _deltachain(self, rev, stoprev=None):
739 """Obtain the delta chain for a revision.
744 """Obtain the delta chain for a revision.
740
745
741 ``stoprev`` specifies a revision to stop at. If not specified, we
746 ``stoprev`` specifies a revision to stop at. If not specified, we
742 stop at the base of the chain.
747 stop at the base of the chain.
743
748
744 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
749 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
745 revs in ascending order and ``stopped`` is a bool indicating whether
750 revs in ascending order and ``stopped`` is a bool indicating whether
746 ``stoprev`` was hit.
751 ``stoprev`` was hit.
747 """
752 """
748 # Try C implementation.
753 # Try C implementation.
749 try:
754 try:
750 return self.index.deltachain(rev, stoprev, self._generaldelta)
755 return self.index.deltachain(rev, stoprev, self._generaldelta)
751 except AttributeError:
756 except AttributeError:
752 pass
757 pass
753
758
754 chain = []
759 chain = []
755
760
756 # Alias to prevent attribute lookup in tight loop.
761 # Alias to prevent attribute lookup in tight loop.
757 index = self.index
762 index = self.index
758 generaldelta = self._generaldelta
763 generaldelta = self._generaldelta
759
764
760 iterrev = rev
765 iterrev = rev
761 e = index[iterrev]
766 e = index[iterrev]
762 while iterrev != e[3] and iterrev != stoprev:
767 while iterrev != e[3] and iterrev != stoprev:
763 chain.append(iterrev)
768 chain.append(iterrev)
764 if generaldelta:
769 if generaldelta:
765 iterrev = e[3]
770 iterrev = e[3]
766 else:
771 else:
767 iterrev -= 1
772 iterrev -= 1
768 e = index[iterrev]
773 e = index[iterrev]
769
774
770 if iterrev == stoprev:
775 if iterrev == stoprev:
771 stopped = True
776 stopped = True
772 else:
777 else:
773 chain.append(iterrev)
778 chain.append(iterrev)
774 stopped = False
779 stopped = False
775
780
776 chain.reverse()
781 chain.reverse()
777 return chain, stopped
782 return chain, stopped
778
783
779 def ancestors(self, revs, stoprev=0, inclusive=False):
784 def ancestors(self, revs, stoprev=0, inclusive=False):
780 """Generate the ancestors of 'revs' in reverse revision order.
785 """Generate the ancestors of 'revs' in reverse revision order.
781 Does not generate revs lower than stoprev.
786 Does not generate revs lower than stoprev.
782
787
783 See the documentation for ancestor.lazyancestors for more details."""
788 See the documentation for ancestor.lazyancestors for more details."""
784
789
785 # first, make sure start revisions aren't filtered
790 # first, make sure start revisions aren't filtered
786 revs = list(revs)
791 revs = list(revs)
787 checkrev = self.node
792 checkrev = self.node
788 for r in revs:
793 for r in revs:
789 checkrev(r)
794 checkrev(r)
790 # and we're sure ancestors aren't filtered as well
795 # and we're sure ancestors aren't filtered as well
791
796
792 if rustancestor is not None:
797 if rustancestor is not None:
793 lazyancestors = rustancestor.LazyAncestors
798 lazyancestors = rustancestor.LazyAncestors
794 arg = self.index
799 arg = self.index
795 elif util.safehasattr(parsers, 'rustlazyancestors'):
800 elif util.safehasattr(parsers, 'rustlazyancestors'):
796 lazyancestors = ancestor.rustlazyancestors
801 lazyancestors = ancestor.rustlazyancestors
797 arg = self.index
802 arg = self.index
798 else:
803 else:
799 lazyancestors = ancestor.lazyancestors
804 lazyancestors = ancestor.lazyancestors
800 arg = self._uncheckedparentrevs
805 arg = self._uncheckedparentrevs
801 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
806 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
802
807
803 def descendants(self, revs):
808 def descendants(self, revs):
804 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
809 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
805
810
806 def findcommonmissing(self, common=None, heads=None):
811 def findcommonmissing(self, common=None, heads=None):
807 """Return a tuple of the ancestors of common and the ancestors of heads
812 """Return a tuple of the ancestors of common and the ancestors of heads
808 that are not ancestors of common. In revset terminology, we return the
813 that are not ancestors of common. In revset terminology, we return the
809 tuple:
814 tuple:
810
815
811 ::common, (::heads) - (::common)
816 ::common, (::heads) - (::common)
812
817
813 The list is sorted by revision number, meaning it is
818 The list is sorted by revision number, meaning it is
814 topologically sorted.
819 topologically sorted.
815
820
816 'heads' and 'common' are both lists of node IDs. If heads is
821 'heads' and 'common' are both lists of node IDs. If heads is
817 not supplied, uses all of the revlog's heads. If common is not
822 not supplied, uses all of the revlog's heads. If common is not
818 supplied, uses nullid."""
823 supplied, uses nullid."""
819 if common is None:
824 if common is None:
820 common = [nullid]
825 common = [nullid]
821 if heads is None:
826 if heads is None:
822 heads = self.heads()
827 heads = self.heads()
823
828
824 common = [self.rev(n) for n in common]
829 common = [self.rev(n) for n in common]
825 heads = [self.rev(n) for n in heads]
830 heads = [self.rev(n) for n in heads]
826
831
827 # we want the ancestors, but inclusive
832 # we want the ancestors, but inclusive
828 class lazyset(object):
833 class lazyset(object):
829 def __init__(self, lazyvalues):
834 def __init__(self, lazyvalues):
830 self.addedvalues = set()
835 self.addedvalues = set()
831 self.lazyvalues = lazyvalues
836 self.lazyvalues = lazyvalues
832
837
833 def __contains__(self, value):
838 def __contains__(self, value):
834 return value in self.addedvalues or value in self.lazyvalues
839 return value in self.addedvalues or value in self.lazyvalues
835
840
836 def __iter__(self):
841 def __iter__(self):
837 added = self.addedvalues
842 added = self.addedvalues
838 for r in added:
843 for r in added:
839 yield r
844 yield r
840 for r in self.lazyvalues:
845 for r in self.lazyvalues:
841 if not r in added:
846 if not r in added:
842 yield r
847 yield r
843
848
844 def add(self, value):
849 def add(self, value):
845 self.addedvalues.add(value)
850 self.addedvalues.add(value)
846
851
847 def update(self, values):
852 def update(self, values):
848 self.addedvalues.update(values)
853 self.addedvalues.update(values)
849
854
850 has = lazyset(self.ancestors(common))
855 has = lazyset(self.ancestors(common))
851 has.add(nullrev)
856 has.add(nullrev)
852 has.update(common)
857 has.update(common)
853
858
854 # take all ancestors from heads that aren't in has
859 # take all ancestors from heads that aren't in has
855 missing = set()
860 missing = set()
856 visit = collections.deque(r for r in heads if r not in has)
861 visit = collections.deque(r for r in heads if r not in has)
857 while visit:
862 while visit:
858 r = visit.popleft()
863 r = visit.popleft()
859 if r in missing:
864 if r in missing:
860 continue
865 continue
861 else:
866 else:
862 missing.add(r)
867 missing.add(r)
863 for p in self.parentrevs(r):
868 for p in self.parentrevs(r):
864 if p not in has:
869 if p not in has:
865 visit.append(p)
870 visit.append(p)
866 missing = list(missing)
871 missing = list(missing)
867 missing.sort()
872 missing.sort()
868 return has, [self.node(miss) for miss in missing]
873 return has, [self.node(miss) for miss in missing]
869
874
870 def incrementalmissingrevs(self, common=None):
875 def incrementalmissingrevs(self, common=None):
871 """Return an object that can be used to incrementally compute the
876 """Return an object that can be used to incrementally compute the
872 revision numbers of the ancestors of arbitrary sets that are not
877 revision numbers of the ancestors of arbitrary sets that are not
873 ancestors of common. This is an ancestor.incrementalmissingancestors
878 ancestors of common. This is an ancestor.incrementalmissingancestors
874 object.
879 object.
875
880
876 'common' is a list of revision numbers. If common is not supplied, uses
881 'common' is a list of revision numbers. If common is not supplied, uses
877 nullrev.
882 nullrev.
878 """
883 """
879 if common is None:
884 if common is None:
880 common = [nullrev]
885 common = [nullrev]
881
886
882 if rustancestor is not None:
887 if rustancestor is not None:
883 return rustancestor.MissingAncestors(self.index, common)
888 return rustancestor.MissingAncestors(self.index, common)
884 return ancestor.incrementalmissingancestors(self.parentrevs, common)
889 return ancestor.incrementalmissingancestors(self.parentrevs, common)
885
890
886 def findmissingrevs(self, common=None, heads=None):
891 def findmissingrevs(self, common=None, heads=None):
887 """Return the revision numbers of the ancestors of heads that
892 """Return the revision numbers of the ancestors of heads that
888 are not ancestors of common.
893 are not ancestors of common.
889
894
890 More specifically, return a list of revision numbers corresponding to
895 More specifically, return a list of revision numbers corresponding to
891 nodes N such that every N satisfies the following constraints:
896 nodes N such that every N satisfies the following constraints:
892
897
893 1. N is an ancestor of some node in 'heads'
898 1. N is an ancestor of some node in 'heads'
894 2. N is not an ancestor of any node in 'common'
899 2. N is not an ancestor of any node in 'common'
895
900
896 The list is sorted by revision number, meaning it is
901 The list is sorted by revision number, meaning it is
897 topologically sorted.
902 topologically sorted.
898
903
899 'heads' and 'common' are both lists of revision numbers. If heads is
904 'heads' and 'common' are both lists of revision numbers. If heads is
900 not supplied, uses all of the revlog's heads. If common is not
905 not supplied, uses all of the revlog's heads. If common is not
901 supplied, uses nullid."""
906 supplied, uses nullid."""
902 if common is None:
907 if common is None:
903 common = [nullrev]
908 common = [nullrev]
904 if heads is None:
909 if heads is None:
905 heads = self.headrevs()
910 heads = self.headrevs()
906
911
907 inc = self.incrementalmissingrevs(common=common)
912 inc = self.incrementalmissingrevs(common=common)
908 return inc.missingancestors(heads)
913 return inc.missingancestors(heads)
909
914
910 def findmissing(self, common=None, heads=None):
915 def findmissing(self, common=None, heads=None):
911 """Return the ancestors of heads that are not ancestors of common.
916 """Return the ancestors of heads that are not ancestors of common.
912
917
913 More specifically, return a list of nodes N such that every N
918 More specifically, return a list of nodes N such that every N
914 satisfies the following constraints:
919 satisfies the following constraints:
915
920
916 1. N is an ancestor of some node in 'heads'
921 1. N is an ancestor of some node in 'heads'
917 2. N is not an ancestor of any node in 'common'
922 2. N is not an ancestor of any node in 'common'
918
923
919 The list is sorted by revision number, meaning it is
924 The list is sorted by revision number, meaning it is
920 topologically sorted.
925 topologically sorted.
921
926
922 'heads' and 'common' are both lists of node IDs. If heads is
927 'heads' and 'common' are both lists of node IDs. If heads is
923 not supplied, uses all of the revlog's heads. If common is not
928 not supplied, uses all of the revlog's heads. If common is not
924 supplied, uses nullid."""
929 supplied, uses nullid."""
925 if common is None:
930 if common is None:
926 common = [nullid]
931 common = [nullid]
927 if heads is None:
932 if heads is None:
928 heads = self.heads()
933 heads = self.heads()
929
934
930 common = [self.rev(n) for n in common]
935 common = [self.rev(n) for n in common]
931 heads = [self.rev(n) for n in heads]
936 heads = [self.rev(n) for n in heads]
932
937
933 inc = self.incrementalmissingrevs(common=common)
938 inc = self.incrementalmissingrevs(common=common)
934 return [self.node(r) for r in inc.missingancestors(heads)]
939 return [self.node(r) for r in inc.missingancestors(heads)]
935
940
936 def nodesbetween(self, roots=None, heads=None):
941 def nodesbetween(self, roots=None, heads=None):
937 """Return a topological path from 'roots' to 'heads'.
942 """Return a topological path from 'roots' to 'heads'.
938
943
939 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
944 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
940 topologically sorted list of all nodes N that satisfy both of
945 topologically sorted list of all nodes N that satisfy both of
941 these constraints:
946 these constraints:
942
947
943 1. N is a descendant of some node in 'roots'
948 1. N is a descendant of some node in 'roots'
944 2. N is an ancestor of some node in 'heads'
949 2. N is an ancestor of some node in 'heads'
945
950
946 Every node is considered to be both a descendant and an ancestor
951 Every node is considered to be both a descendant and an ancestor
947 of itself, so every reachable node in 'roots' and 'heads' will be
952 of itself, so every reachable node in 'roots' and 'heads' will be
948 included in 'nodes'.
953 included in 'nodes'.
949
954
950 'outroots' is the list of reachable nodes in 'roots', i.e., the
955 'outroots' is the list of reachable nodes in 'roots', i.e., the
951 subset of 'roots' that is returned in 'nodes'. Likewise,
956 subset of 'roots' that is returned in 'nodes'. Likewise,
952 'outheads' is the subset of 'heads' that is also in 'nodes'.
957 'outheads' is the subset of 'heads' that is also in 'nodes'.
953
958
954 'roots' and 'heads' are both lists of node IDs. If 'roots' is
959 'roots' and 'heads' are both lists of node IDs. If 'roots' is
955 unspecified, uses nullid as the only root. If 'heads' is
960 unspecified, uses nullid as the only root. If 'heads' is
956 unspecified, uses list of all of the revlog's heads."""
961 unspecified, uses list of all of the revlog's heads."""
957 nonodes = ([], [], [])
962 nonodes = ([], [], [])
958 if roots is not None:
963 if roots is not None:
959 roots = list(roots)
964 roots = list(roots)
960 if not roots:
965 if not roots:
961 return nonodes
966 return nonodes
962 lowestrev = min([self.rev(n) for n in roots])
967 lowestrev = min([self.rev(n) for n in roots])
963 else:
968 else:
964 roots = [nullid] # Everybody's a descendant of nullid
969 roots = [nullid] # Everybody's a descendant of nullid
965 lowestrev = nullrev
970 lowestrev = nullrev
966 if (lowestrev == nullrev) and (heads is None):
971 if (lowestrev == nullrev) and (heads is None):
967 # We want _all_ the nodes!
972 # We want _all_ the nodes!
968 return ([self.node(r) for r in self], [nullid], list(self.heads()))
973 return ([self.node(r) for r in self], [nullid], list(self.heads()))
969 if heads is None:
974 if heads is None:
970 # All nodes are ancestors, so the latest ancestor is the last
975 # All nodes are ancestors, so the latest ancestor is the last
971 # node.
976 # node.
972 highestrev = len(self) - 1
977 highestrev = len(self) - 1
973 # Set ancestors to None to signal that every node is an ancestor.
978 # Set ancestors to None to signal that every node is an ancestor.
974 ancestors = None
979 ancestors = None
975 # Set heads to an empty dictionary for later discovery of heads
980 # Set heads to an empty dictionary for later discovery of heads
976 heads = {}
981 heads = {}
977 else:
982 else:
978 heads = list(heads)
983 heads = list(heads)
979 if not heads:
984 if not heads:
980 return nonodes
985 return nonodes
981 ancestors = set()
986 ancestors = set()
982 # Turn heads into a dictionary so we can remove 'fake' heads.
987 # Turn heads into a dictionary so we can remove 'fake' heads.
983 # Also, later we will be using it to filter out the heads we can't
988 # Also, later we will be using it to filter out the heads we can't
984 # find from roots.
989 # find from roots.
985 heads = dict.fromkeys(heads, False)
990 heads = dict.fromkeys(heads, False)
986 # Start at the top and keep marking parents until we're done.
991 # Start at the top and keep marking parents until we're done.
987 nodestotag = set(heads)
992 nodestotag = set(heads)
988 # Remember where the top was so we can use it as a limit later.
993 # Remember where the top was so we can use it as a limit later.
989 highestrev = max([self.rev(n) for n in nodestotag])
994 highestrev = max([self.rev(n) for n in nodestotag])
990 while nodestotag:
995 while nodestotag:
991 # grab a node to tag
996 # grab a node to tag
992 n = nodestotag.pop()
997 n = nodestotag.pop()
993 # Never tag nullid
998 # Never tag nullid
994 if n == nullid:
999 if n == nullid:
995 continue
1000 continue
996 # A node's revision number represents its place in a
1001 # A node's revision number represents its place in a
997 # topologically sorted list of nodes.
1002 # topologically sorted list of nodes.
998 r = self.rev(n)
1003 r = self.rev(n)
999 if r >= lowestrev:
1004 if r >= lowestrev:
1000 if n not in ancestors:
1005 if n not in ancestors:
1001 # If we are possibly a descendant of one of the roots
1006 # If we are possibly a descendant of one of the roots
1002 # and we haven't already been marked as an ancestor
1007 # and we haven't already been marked as an ancestor
1003 ancestors.add(n) # Mark as ancestor
1008 ancestors.add(n) # Mark as ancestor
1004 # Add non-nullid parents to list of nodes to tag.
1009 # Add non-nullid parents to list of nodes to tag.
1005 nodestotag.update([p for p in self.parents(n) if
1010 nodestotag.update([p for p in self.parents(n) if
1006 p != nullid])
1011 p != nullid])
1007 elif n in heads: # We've seen it before, is it a fake head?
1012 elif n in heads: # We've seen it before, is it a fake head?
1008 # So it is, real heads should not be the ancestors of
1013 # So it is, real heads should not be the ancestors of
1009 # any other heads.
1014 # any other heads.
1010 heads.pop(n)
1015 heads.pop(n)
1011 if not ancestors:
1016 if not ancestors:
1012 return nonodes
1017 return nonodes
1013 # Now that we have our set of ancestors, we want to remove any
1018 # Now that we have our set of ancestors, we want to remove any
1014 # roots that are not ancestors.
1019 # roots that are not ancestors.
1015
1020
1016 # If one of the roots was nullid, everything is included anyway.
1021 # If one of the roots was nullid, everything is included anyway.
1017 if lowestrev > nullrev:
1022 if lowestrev > nullrev:
1018 # But, since we weren't, let's recompute the lowest rev to not
1023 # But, since we weren't, let's recompute the lowest rev to not
1019 # include roots that aren't ancestors.
1024 # include roots that aren't ancestors.
1020
1025
1021 # Filter out roots that aren't ancestors of heads
1026 # Filter out roots that aren't ancestors of heads
1022 roots = [root for root in roots if root in ancestors]
1027 roots = [root for root in roots if root in ancestors]
1023 # Recompute the lowest revision
1028 # Recompute the lowest revision
1024 if roots:
1029 if roots:
1025 lowestrev = min([self.rev(root) for root in roots])
1030 lowestrev = min([self.rev(root) for root in roots])
1026 else:
1031 else:
1027 # No more roots? Return empty list
1032 # No more roots? Return empty list
1028 return nonodes
1033 return nonodes
1029 else:
1034 else:
1030 # We are descending from nullid, and don't need to care about
1035 # We are descending from nullid, and don't need to care about
1031 # any other roots.
1036 # any other roots.
1032 lowestrev = nullrev
1037 lowestrev = nullrev
1033 roots = [nullid]
1038 roots = [nullid]
1034 # Transform our roots list into a set.
1039 # Transform our roots list into a set.
1035 descendants = set(roots)
1040 descendants = set(roots)
1036 # Also, keep the original roots so we can filter out roots that aren't
1041 # Also, keep the original roots so we can filter out roots that aren't
1037 # 'real' roots (i.e. are descended from other roots).
1042 # 'real' roots (i.e. are descended from other roots).
1038 roots = descendants.copy()
1043 roots = descendants.copy()
1039 # Our topologically sorted list of output nodes.
1044 # Our topologically sorted list of output nodes.
1040 orderedout = []
1045 orderedout = []
1041 # Don't start at nullid since we don't want nullid in our output list,
1046 # Don't start at nullid since we don't want nullid in our output list,
1042 # and if nullid shows up in descendants, empty parents will look like
1047 # and if nullid shows up in descendants, empty parents will look like
1043 # they're descendants.
1048 # they're descendants.
1044 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1049 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1045 n = self.node(r)
1050 n = self.node(r)
1046 isdescendant = False
1051 isdescendant = False
1047 if lowestrev == nullrev: # Everybody is a descendant of nullid
1052 if lowestrev == nullrev: # Everybody is a descendant of nullid
1048 isdescendant = True
1053 isdescendant = True
1049 elif n in descendants:
1054 elif n in descendants:
1050 # n is already a descendant
1055 # n is already a descendant
1051 isdescendant = True
1056 isdescendant = True
1052 # This check only needs to be done here because all the roots
1057 # This check only needs to be done here because all the roots
1053 # will start being marked is descendants before the loop.
1058 # will start being marked is descendants before the loop.
1054 if n in roots:
1059 if n in roots:
1055 # If n was a root, check if it's a 'real' root.
1060 # If n was a root, check if it's a 'real' root.
1056 p = tuple(self.parents(n))
1061 p = tuple(self.parents(n))
1057 # If any of its parents are descendants, it's not a root.
1062 # If any of its parents are descendants, it's not a root.
1058 if (p[0] in descendants) or (p[1] in descendants):
1063 if (p[0] in descendants) or (p[1] in descendants):
1059 roots.remove(n)
1064 roots.remove(n)
1060 else:
1065 else:
1061 p = tuple(self.parents(n))
1066 p = tuple(self.parents(n))
1062 # A node is a descendant if either of its parents are
1067 # A node is a descendant if either of its parents are
1063 # descendants. (We seeded the dependents list with the roots
1068 # descendants. (We seeded the dependents list with the roots
1064 # up there, remember?)
1069 # up there, remember?)
1065 if (p[0] in descendants) or (p[1] in descendants):
1070 if (p[0] in descendants) or (p[1] in descendants):
1066 descendants.add(n)
1071 descendants.add(n)
1067 isdescendant = True
1072 isdescendant = True
1068 if isdescendant and ((ancestors is None) or (n in ancestors)):
1073 if isdescendant and ((ancestors is None) or (n in ancestors)):
1069 # Only include nodes that are both descendants and ancestors.
1074 # Only include nodes that are both descendants and ancestors.
1070 orderedout.append(n)
1075 orderedout.append(n)
1071 if (ancestors is not None) and (n in heads):
1076 if (ancestors is not None) and (n in heads):
1072 # We're trying to figure out which heads are reachable
1077 # We're trying to figure out which heads are reachable
1073 # from roots.
1078 # from roots.
1074 # Mark this head as having been reached
1079 # Mark this head as having been reached
1075 heads[n] = True
1080 heads[n] = True
1076 elif ancestors is None:
1081 elif ancestors is None:
1077 # Otherwise, we're trying to discover the heads.
1082 # Otherwise, we're trying to discover the heads.
1078 # Assume this is a head because if it isn't, the next step
1083 # Assume this is a head because if it isn't, the next step
1079 # will eventually remove it.
1084 # will eventually remove it.
1080 heads[n] = True
1085 heads[n] = True
1081 # But, obviously its parents aren't.
1086 # But, obviously its parents aren't.
1082 for p in self.parents(n):
1087 for p in self.parents(n):
1083 heads.pop(p, None)
1088 heads.pop(p, None)
1084 heads = [head for head, flag in heads.iteritems() if flag]
1089 heads = [head for head, flag in heads.iteritems() if flag]
1085 roots = list(roots)
1090 roots = list(roots)
1086 assert orderedout
1091 assert orderedout
1087 assert roots
1092 assert roots
1088 assert heads
1093 assert heads
1089 return (orderedout, roots, heads)
1094 return (orderedout, roots, heads)
1090
1095
1091 def headrevs(self, revs=None):
1096 def headrevs(self, revs=None):
1092 if revs is None:
1097 if revs is None:
1093 try:
1098 try:
1094 return self.index.headrevs()
1099 return self.index.headrevs()
1095 except AttributeError:
1100 except AttributeError:
1096 return self._headrevs()
1101 return self._headrevs()
1097 if rustdagop is not None:
1102 if rustdagop is not None:
1098 return rustdagop.headrevs(self.index, revs)
1103 return rustdagop.headrevs(self.index, revs)
1099 return dagop.headrevs(revs, self._uncheckedparentrevs)
1104 return dagop.headrevs(revs, self._uncheckedparentrevs)
1100
1105
1101 def computephases(self, roots):
1106 def computephases(self, roots):
1102 return self.index.computephasesmapsets(roots)
1107 return self.index.computephasesmapsets(roots)
1103
1108
1104 def _headrevs(self):
1109 def _headrevs(self):
1105 count = len(self)
1110 count = len(self)
1106 if not count:
1111 if not count:
1107 return [nullrev]
1112 return [nullrev]
1108 # we won't iter over filtered rev so nobody is a head at start
1113 # we won't iter over filtered rev so nobody is a head at start
1109 ishead = [0] * (count + 1)
1114 ishead = [0] * (count + 1)
1110 index = self.index
1115 index = self.index
1111 for r in self:
1116 for r in self:
1112 ishead[r] = 1 # I may be an head
1117 ishead[r] = 1 # I may be an head
1113 e = index[r]
1118 e = index[r]
1114 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1119 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1115 return [r for r, val in enumerate(ishead) if val]
1120 return [r for r, val in enumerate(ishead) if val]
1116
1121
1117 def heads(self, start=None, stop=None):
1122 def heads(self, start=None, stop=None):
1118 """return the list of all nodes that have no children
1123 """return the list of all nodes that have no children
1119
1124
1120 if start is specified, only heads that are descendants of
1125 if start is specified, only heads that are descendants of
1121 start will be returned
1126 start will be returned
1122 if stop is specified, it will consider all the revs from stop
1127 if stop is specified, it will consider all the revs from stop
1123 as if they had no children
1128 as if they had no children
1124 """
1129 """
1125 if start is None and stop is None:
1130 if start is None and stop is None:
1126 if not len(self):
1131 if not len(self):
1127 return [nullid]
1132 return [nullid]
1128 return [self.node(r) for r in self.headrevs()]
1133 return [self.node(r) for r in self.headrevs()]
1129
1134
1130 if start is None:
1135 if start is None:
1131 start = nullrev
1136 start = nullrev
1132 else:
1137 else:
1133 start = self.rev(start)
1138 start = self.rev(start)
1134
1139
1135 stoprevs = set(self.rev(n) for n in stop or [])
1140 stoprevs = set(self.rev(n) for n in stop or [])
1136
1141
1137 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1142 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1138 stoprevs=stoprevs)
1143 stoprevs=stoprevs)
1139
1144
1140 return [self.node(rev) for rev in revs]
1145 return [self.node(rev) for rev in revs]
1141
1146
1142 def children(self, node):
1147 def children(self, node):
1143 """find the children of a given node"""
1148 """find the children of a given node"""
1144 c = []
1149 c = []
1145 p = self.rev(node)
1150 p = self.rev(node)
1146 for r in self.revs(start=p + 1):
1151 for r in self.revs(start=p + 1):
1147 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1152 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1148 if prevs:
1153 if prevs:
1149 for pr in prevs:
1154 for pr in prevs:
1150 if pr == p:
1155 if pr == p:
1151 c.append(self.node(r))
1156 c.append(self.node(r))
1152 elif p == nullrev:
1157 elif p == nullrev:
1153 c.append(self.node(r))
1158 c.append(self.node(r))
1154 return c
1159 return c
1155
1160
1156 def commonancestorsheads(self, a, b):
1161 def commonancestorsheads(self, a, b):
1157 """calculate all the heads of the common ancestors of nodes a and b"""
1162 """calculate all the heads of the common ancestors of nodes a and b"""
1158 a, b = self.rev(a), self.rev(b)
1163 a, b = self.rev(a), self.rev(b)
1159 ancs = self._commonancestorsheads(a, b)
1164 ancs = self._commonancestorsheads(a, b)
1160 return pycompat.maplist(self.node, ancs)
1165 return pycompat.maplist(self.node, ancs)
1161
1166
1162 def _commonancestorsheads(self, *revs):
1167 def _commonancestorsheads(self, *revs):
1163 """calculate all the heads of the common ancestors of revs"""
1168 """calculate all the heads of the common ancestors of revs"""
1164 try:
1169 try:
1165 ancs = self.index.commonancestorsheads(*revs)
1170 ancs = self.index.commonancestorsheads(*revs)
1166 except (AttributeError, OverflowError): # C implementation failed
1171 except (AttributeError, OverflowError): # C implementation failed
1167 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1172 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1168 return ancs
1173 return ancs
1169
1174
1170 def isancestor(self, a, b):
1175 def isancestor(self, a, b):
1171 """return True if node a is an ancestor of node b
1176 """return True if node a is an ancestor of node b
1172
1177
1173 A revision is considered an ancestor of itself."""
1178 A revision is considered an ancestor of itself."""
1174 a, b = self.rev(a), self.rev(b)
1179 a, b = self.rev(a), self.rev(b)
1175 return self.isancestorrev(a, b)
1180 return self.isancestorrev(a, b)
1176
1181
1177 def isancestorrev(self, a, b):
1182 def isancestorrev(self, a, b):
1178 """return True if revision a is an ancestor of revision b
1183 """return True if revision a is an ancestor of revision b
1179
1184
1180 A revision is considered an ancestor of itself.
1185 A revision is considered an ancestor of itself.
1181
1186
1182 The implementation of this is trivial but the use of
1187 The implementation of this is trivial but the use of
1183 reachableroots is not."""
1188 reachableroots is not."""
1184 if a == nullrev:
1189 if a == nullrev:
1185 return True
1190 return True
1186 elif a == b:
1191 elif a == b:
1187 return True
1192 return True
1188 elif a > b:
1193 elif a > b:
1189 return False
1194 return False
1190 return bool(self.reachableroots(a, [b], [a], includepath=False))
1195 return bool(self.reachableroots(a, [b], [a], includepath=False))
1191
1196
1192 def reachableroots(self, minroot, heads, roots, includepath=False):
1197 def reachableroots(self, minroot, heads, roots, includepath=False):
1193 """return (heads(::<roots> and <roots>::<heads>))
1198 """return (heads(::<roots> and <roots>::<heads>))
1194
1199
1195 If includepath is True, return (<roots>::<heads>)."""
1200 If includepath is True, return (<roots>::<heads>)."""
1196 try:
1201 try:
1197 return self.index.reachableroots2(minroot, heads, roots,
1202 return self.index.reachableroots2(minroot, heads, roots,
1198 includepath)
1203 includepath)
1199 except AttributeError:
1204 except AttributeError:
1200 return dagop._reachablerootspure(self.parentrevs,
1205 return dagop._reachablerootspure(self.parentrevs,
1201 minroot, roots, heads, includepath)
1206 minroot, roots, heads, includepath)
1202
1207
1203 def ancestor(self, a, b):
1208 def ancestor(self, a, b):
1204 """calculate the "best" common ancestor of nodes a and b"""
1209 """calculate the "best" common ancestor of nodes a and b"""
1205
1210
1206 a, b = self.rev(a), self.rev(b)
1211 a, b = self.rev(a), self.rev(b)
1207 try:
1212 try:
1208 ancs = self.index.ancestors(a, b)
1213 ancs = self.index.ancestors(a, b)
1209 except (AttributeError, OverflowError):
1214 except (AttributeError, OverflowError):
1210 ancs = ancestor.ancestors(self.parentrevs, a, b)
1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1211 if ancs:
1216 if ancs:
1212 # choose a consistent winner when there's a tie
1217 # choose a consistent winner when there's a tie
1213 return min(map(self.node, ancs))
1218 return min(map(self.node, ancs))
1214 return nullid
1219 return nullid
1215
1220
1216 def _match(self, id):
1221 def _match(self, id):
1217 if isinstance(id, int):
1222 if isinstance(id, int):
1218 # rev
1223 # rev
1219 return self.node(id)
1224 return self.node(id)
1220 if len(id) == 20:
1225 if len(id) == 20:
1221 # possibly a binary node
1226 # possibly a binary node
1222 # odds of a binary node being all hex in ASCII are 1 in 10**25
1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1223 try:
1228 try:
1224 node = id
1229 node = id
1225 self.rev(node) # quick search the index
1230 self.rev(node) # quick search the index
1226 return node
1231 return node
1227 except error.LookupError:
1232 except error.LookupError:
1228 pass # may be partial hex id
1233 pass # may be partial hex id
1229 try:
1234 try:
1230 # str(rev)
1235 # str(rev)
1231 rev = int(id)
1236 rev = int(id)
1232 if "%d" % rev != id:
1237 if "%d" % rev != id:
1233 raise ValueError
1238 raise ValueError
1234 if rev < 0:
1239 if rev < 0:
1235 rev = len(self) + rev
1240 rev = len(self) + rev
1236 if rev < 0 or rev >= len(self):
1241 if rev < 0 or rev >= len(self):
1237 raise ValueError
1242 raise ValueError
1238 return self.node(rev)
1243 return self.node(rev)
1239 except (ValueError, OverflowError):
1244 except (ValueError, OverflowError):
1240 pass
1245 pass
1241 if len(id) == 40:
1246 if len(id) == 40:
1242 try:
1247 try:
1243 # a full hex nodeid?
1248 # a full hex nodeid?
1244 node = bin(id)
1249 node = bin(id)
1245 self.rev(node)
1250 self.rev(node)
1246 return node
1251 return node
1247 except (TypeError, error.LookupError):
1252 except (TypeError, error.LookupError):
1248 pass
1253 pass
1249
1254
1250 def _partialmatch(self, id):
1255 def _partialmatch(self, id):
1251 # we don't care wdirfilenodeids as they should be always full hash
1256 # we don't care wdirfilenodeids as they should be always full hash
1252 maybewdir = wdirhex.startswith(id)
1257 maybewdir = wdirhex.startswith(id)
1253 try:
1258 try:
1254 partial = self.index.partialmatch(id)
1259 partial = self.index.partialmatch(id)
1255 if partial and self.hasnode(partial):
1260 if partial and self.hasnode(partial):
1256 if maybewdir:
1261 if maybewdir:
1257 # single 'ff...' match in radix tree, ambiguous with wdir
1262 # single 'ff...' match in radix tree, ambiguous with wdir
1258 raise error.RevlogError
1263 raise error.RevlogError
1259 return partial
1264 return partial
1260 if maybewdir:
1265 if maybewdir:
1261 # no 'ff...' match in radix tree, wdir identified
1266 # no 'ff...' match in radix tree, wdir identified
1262 raise error.WdirUnsupported
1267 raise error.WdirUnsupported
1263 return None
1268 return None
1264 except error.RevlogError:
1269 except error.RevlogError:
1265 # parsers.c radix tree lookup gave multiple matches
1270 # parsers.c radix tree lookup gave multiple matches
1266 # fast path: for unfiltered changelog, radix tree is accurate
1271 # fast path: for unfiltered changelog, radix tree is accurate
1267 if not getattr(self, 'filteredrevs', None):
1272 if not getattr(self, 'filteredrevs', None):
1268 raise error.AmbiguousPrefixLookupError(
1273 raise error.AmbiguousPrefixLookupError(
1269 id, self.indexfile, _('ambiguous identifier'))
1274 id, self.indexfile, _('ambiguous identifier'))
1270 # fall through to slow path that filters hidden revisions
1275 # fall through to slow path that filters hidden revisions
1271 except (AttributeError, ValueError):
1276 except (AttributeError, ValueError):
1272 # we are pure python, or key was too short to search radix tree
1277 # we are pure python, or key was too short to search radix tree
1273 pass
1278 pass
1274
1279
1275 if id in self._pcache:
1280 if id in self._pcache:
1276 return self._pcache[id]
1281 return self._pcache[id]
1277
1282
1278 if len(id) <= 40:
1283 if len(id) <= 40:
1279 try:
1284 try:
1280 # hex(node)[:...]
1285 # hex(node)[:...]
1281 l = len(id) // 2 # grab an even number of digits
1286 l = len(id) // 2 # grab an even number of digits
1282 prefix = bin(id[:l * 2])
1287 prefix = bin(id[:l * 2])
1283 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1284 nl = [n for n in nl if hex(n).startswith(id) and
1289 nl = [n for n in nl if hex(n).startswith(id) and
1285 self.hasnode(n)]
1290 self.hasnode(n)]
1286 if nullhex.startswith(id):
1291 if nullhex.startswith(id):
1287 nl.append(nullid)
1292 nl.append(nullid)
1288 if len(nl) > 0:
1293 if len(nl) > 0:
1289 if len(nl) == 1 and not maybewdir:
1294 if len(nl) == 1 and not maybewdir:
1290 self._pcache[id] = nl[0]
1295 self._pcache[id] = nl[0]
1291 return nl[0]
1296 return nl[0]
1292 raise error.AmbiguousPrefixLookupError(
1297 raise error.AmbiguousPrefixLookupError(
1293 id, self.indexfile, _('ambiguous identifier'))
1298 id, self.indexfile, _('ambiguous identifier'))
1294 if maybewdir:
1299 if maybewdir:
1295 raise error.WdirUnsupported
1300 raise error.WdirUnsupported
1296 return None
1301 return None
1297 except TypeError:
1302 except TypeError:
1298 pass
1303 pass
1299
1304
1300 def lookup(self, id):
1305 def lookup(self, id):
1301 """locate a node based on:
1306 """locate a node based on:
1302 - revision number or str(revision number)
1307 - revision number or str(revision number)
1303 - nodeid or subset of hex nodeid
1308 - nodeid or subset of hex nodeid
1304 """
1309 """
1305 n = self._match(id)
1310 n = self._match(id)
1306 if n is not None:
1311 if n is not None:
1307 return n
1312 return n
1308 n = self._partialmatch(id)
1313 n = self._partialmatch(id)
1309 if n:
1314 if n:
1310 return n
1315 return n
1311
1316
1312 raise error.LookupError(id, self.indexfile, _('no match found'))
1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1313
1318
1314 def shortest(self, node, minlength=1):
1319 def shortest(self, node, minlength=1):
1315 """Find the shortest unambiguous prefix that matches node."""
1320 """Find the shortest unambiguous prefix that matches node."""
1316 def isvalid(prefix):
1321 def isvalid(prefix):
1317 try:
1322 try:
1318 matchednode = self._partialmatch(prefix)
1323 matchednode = self._partialmatch(prefix)
1319 except error.AmbiguousPrefixLookupError:
1324 except error.AmbiguousPrefixLookupError:
1320 return False
1325 return False
1321 except error.WdirUnsupported:
1326 except error.WdirUnsupported:
1322 # single 'ff...' match
1327 # single 'ff...' match
1323 return True
1328 return True
1324 if matchednode is None:
1329 if matchednode is None:
1325 raise error.LookupError(node, self.indexfile, _('no node'))
1330 raise error.LookupError(node, self.indexfile, _('no node'))
1326 return True
1331 return True
1327
1332
1328 def maybewdir(prefix):
1333 def maybewdir(prefix):
1329 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1334 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1330
1335
1331 hexnode = hex(node)
1336 hexnode = hex(node)
1332
1337
1333 def disambiguate(hexnode, minlength):
1338 def disambiguate(hexnode, minlength):
1334 """Disambiguate against wdirid."""
1339 """Disambiguate against wdirid."""
1335 for length in range(minlength, 41):
1340 for length in range(minlength, 41):
1336 prefix = hexnode[:length]
1341 prefix = hexnode[:length]
1337 if not maybewdir(prefix):
1342 if not maybewdir(prefix):
1338 return prefix
1343 return prefix
1339
1344
1340 if not getattr(self, 'filteredrevs', None):
1345 if not getattr(self, 'filteredrevs', None):
1341 try:
1346 try:
1342 length = max(self.index.shortest(node), minlength)
1347 length = max(self.index.shortest(node), minlength)
1343 return disambiguate(hexnode, length)
1348 return disambiguate(hexnode, length)
1344 except error.RevlogError:
1349 except error.RevlogError:
1345 if node != wdirid:
1350 if node != wdirid:
1346 raise error.LookupError(node, self.indexfile, _('no node'))
1351 raise error.LookupError(node, self.indexfile, _('no node'))
1347 except AttributeError:
1352 except AttributeError:
1348 # Fall through to pure code
1353 # Fall through to pure code
1349 pass
1354 pass
1350
1355
1351 if node == wdirid:
1356 if node == wdirid:
1352 for length in range(minlength, 41):
1357 for length in range(minlength, 41):
1353 prefix = hexnode[:length]
1358 prefix = hexnode[:length]
1354 if isvalid(prefix):
1359 if isvalid(prefix):
1355 return prefix
1360 return prefix
1356
1361
1357 for length in range(minlength, 41):
1362 for length in range(minlength, 41):
1358 prefix = hexnode[:length]
1363 prefix = hexnode[:length]
1359 if isvalid(prefix):
1364 if isvalid(prefix):
1360 return disambiguate(hexnode, length)
1365 return disambiguate(hexnode, length)
1361
1366
1362 def cmp(self, node, text):
1367 def cmp(self, node, text):
1363 """compare text with a given file revision
1368 """compare text with a given file revision
1364
1369
1365 returns True if text is different than what is stored.
1370 returns True if text is different than what is stored.
1366 """
1371 """
1367 p1, p2 = self.parents(node)
1372 p1, p2 = self.parents(node)
1368 return storageutil.hashrevisionsha1(text, p1, p2) != node
1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1369
1374
1370 def _cachesegment(self, offset, data):
1375 def _cachesegment(self, offset, data):
1371 """Add a segment to the revlog cache.
1376 """Add a segment to the revlog cache.
1372
1377
1373 Accepts an absolute offset and the data that is at that location.
1378 Accepts an absolute offset and the data that is at that location.
1374 """
1379 """
1375 o, d = self._chunkcache
1380 o, d = self._chunkcache
1376 # try to add to existing cache
1381 # try to add to existing cache
1377 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1378 self._chunkcache = o, d + data
1383 self._chunkcache = o, d + data
1379 else:
1384 else:
1380 self._chunkcache = offset, data
1385 self._chunkcache = offset, data
1381
1386
1382 def _readsegment(self, offset, length, df=None):
1387 def _readsegment(self, offset, length, df=None):
1383 """Load a segment of raw data from the revlog.
1388 """Load a segment of raw data from the revlog.
1384
1389
1385 Accepts an absolute offset, length to read, and an optional existing
1390 Accepts an absolute offset, length to read, and an optional existing
1386 file handle to read from.
1391 file handle to read from.
1387
1392
1388 If an existing file handle is passed, it will be seeked and the
1393 If an existing file handle is passed, it will be seeked and the
1389 original seek position will NOT be restored.
1394 original seek position will NOT be restored.
1390
1395
1391 Returns a str or buffer of raw byte data.
1396 Returns a str or buffer of raw byte data.
1392
1397
1393 Raises if the requested number of bytes could not be read.
1398 Raises if the requested number of bytes could not be read.
1394 """
1399 """
1395 # Cache data both forward and backward around the requested
1400 # Cache data both forward and backward around the requested
1396 # data, in a fixed size window. This helps speed up operations
1401 # data, in a fixed size window. This helps speed up operations
1397 # involving reading the revlog backwards.
1402 # involving reading the revlog backwards.
1398 cachesize = self._chunkcachesize
1403 cachesize = self._chunkcachesize
1399 realoffset = offset & ~(cachesize - 1)
1404 realoffset = offset & ~(cachesize - 1)
1400 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1401 - realoffset)
1406 - realoffset)
1402 with self._datareadfp(df) as df:
1407 with self._datareadfp(df) as df:
1403 df.seek(realoffset)
1408 df.seek(realoffset)
1404 d = df.read(reallength)
1409 d = df.read(reallength)
1405
1410
1406 self._cachesegment(realoffset, d)
1411 self._cachesegment(realoffset, d)
1407 if offset != realoffset or reallength != length:
1412 if offset != realoffset or reallength != length:
1408 startoffset = offset - realoffset
1413 startoffset = offset - realoffset
1409 if len(d) - startoffset < length:
1414 if len(d) - startoffset < length:
1410 raise error.RevlogError(
1415 raise error.RevlogError(
1411 _('partial read of revlog %s; expected %d bytes from '
1416 _('partial read of revlog %s; expected %d bytes from '
1412 'offset %d, got %d') %
1417 'offset %d, got %d') %
1413 (self.indexfile if self._inline else self.datafile,
1418 (self.indexfile if self._inline else self.datafile,
1414 length, realoffset, len(d) - startoffset))
1419 length, realoffset, len(d) - startoffset))
1415
1420
1416 return util.buffer(d, startoffset, length)
1421 return util.buffer(d, startoffset, length)
1417
1422
1418 if len(d) < length:
1423 if len(d) < length:
1419 raise error.RevlogError(
1424 raise error.RevlogError(
1420 _('partial read of revlog %s; expected %d bytes from offset '
1425 _('partial read of revlog %s; expected %d bytes from offset '
1421 '%d, got %d') %
1426 '%d, got %d') %
1422 (self.indexfile if self._inline else self.datafile,
1427 (self.indexfile if self._inline else self.datafile,
1423 length, offset, len(d)))
1428 length, offset, len(d)))
1424
1429
1425 return d
1430 return d
1426
1431
1427 def _getsegment(self, offset, length, df=None):
1432 def _getsegment(self, offset, length, df=None):
1428 """Obtain a segment of raw data from the revlog.
1433 """Obtain a segment of raw data from the revlog.
1429
1434
1430 Accepts an absolute offset, length of bytes to obtain, and an
1435 Accepts an absolute offset, length of bytes to obtain, and an
1431 optional file handle to the already-opened revlog. If the file
1436 optional file handle to the already-opened revlog. If the file
1432 handle is used, it's original seek position will not be preserved.
1437 handle is used, it's original seek position will not be preserved.
1433
1438
1434 Requests for data may be returned from a cache.
1439 Requests for data may be returned from a cache.
1435
1440
1436 Returns a str or a buffer instance of raw byte data.
1441 Returns a str or a buffer instance of raw byte data.
1437 """
1442 """
1438 o, d = self._chunkcache
1443 o, d = self._chunkcache
1439 l = len(d)
1444 l = len(d)
1440
1445
1441 # is it in the cache?
1446 # is it in the cache?
1442 cachestart = offset - o
1447 cachestart = offset - o
1443 cacheend = cachestart + length
1448 cacheend = cachestart + length
1444 if cachestart >= 0 and cacheend <= l:
1449 if cachestart >= 0 and cacheend <= l:
1445 if cachestart == 0 and cacheend == l:
1450 if cachestart == 0 and cacheend == l:
1446 return d # avoid a copy
1451 return d # avoid a copy
1447 return util.buffer(d, cachestart, cacheend - cachestart)
1452 return util.buffer(d, cachestart, cacheend - cachestart)
1448
1453
1449 return self._readsegment(offset, length, df=df)
1454 return self._readsegment(offset, length, df=df)
1450
1455
1451 def _getsegmentforrevs(self, startrev, endrev, df=None):
1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1452 """Obtain a segment of raw data corresponding to a range of revisions.
1457 """Obtain a segment of raw data corresponding to a range of revisions.
1453
1458
1454 Accepts the start and end revisions and an optional already-open
1459 Accepts the start and end revisions and an optional already-open
1455 file handle to be used for reading. If the file handle is read, its
1460 file handle to be used for reading. If the file handle is read, its
1456 seek position will not be preserved.
1461 seek position will not be preserved.
1457
1462
1458 Requests for data may be satisfied by a cache.
1463 Requests for data may be satisfied by a cache.
1459
1464
1460 Returns a 2-tuple of (offset, data) for the requested range of
1465 Returns a 2-tuple of (offset, data) for the requested range of
1461 revisions. Offset is the integer offset from the beginning of the
1466 revisions. Offset is the integer offset from the beginning of the
1462 revlog and data is a str or buffer of the raw byte data.
1467 revlog and data is a str or buffer of the raw byte data.
1463
1468
1464 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1465 to determine where each revision's data begins and ends.
1470 to determine where each revision's data begins and ends.
1466 """
1471 """
1467 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1468 # (functions are expensive).
1473 # (functions are expensive).
1469 index = self.index
1474 index = self.index
1470 istart = index[startrev]
1475 istart = index[startrev]
1471 start = int(istart[0] >> 16)
1476 start = int(istart[0] >> 16)
1472 if startrev == endrev:
1477 if startrev == endrev:
1473 end = start + istart[1]
1478 end = start + istart[1]
1474 else:
1479 else:
1475 iend = index[endrev]
1480 iend = index[endrev]
1476 end = int(iend[0] >> 16) + iend[1]
1481 end = int(iend[0] >> 16) + iend[1]
1477
1482
1478 if self._inline:
1483 if self._inline:
1479 start += (startrev + 1) * self._io.size
1484 start += (startrev + 1) * self._io.size
1480 end += (endrev + 1) * self._io.size
1485 end += (endrev + 1) * self._io.size
1481 length = end - start
1486 length = end - start
1482
1487
1483 return start, self._getsegment(start, length, df=df)
1488 return start, self._getsegment(start, length, df=df)
1484
1489
1485 def _chunk(self, rev, df=None):
1490 def _chunk(self, rev, df=None):
1486 """Obtain a single decompressed chunk for a revision.
1491 """Obtain a single decompressed chunk for a revision.
1487
1492
1488 Accepts an integer revision and an optional already-open file handle
1493 Accepts an integer revision and an optional already-open file handle
1489 to be used for reading. If used, the seek position of the file will not
1494 to be used for reading. If used, the seek position of the file will not
1490 be preserved.
1495 be preserved.
1491
1496
1492 Returns a str holding uncompressed data for the requested revision.
1497 Returns a str holding uncompressed data for the requested revision.
1493 """
1498 """
1494 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1495
1500
1496 def _chunks(self, revs, df=None, targetsize=None):
1501 def _chunks(self, revs, df=None, targetsize=None):
1497 """Obtain decompressed chunks for the specified revisions.
1502 """Obtain decompressed chunks for the specified revisions.
1498
1503
1499 Accepts an iterable of numeric revisions that are assumed to be in
1504 Accepts an iterable of numeric revisions that are assumed to be in
1500 ascending order. Also accepts an optional already-open file handle
1505 ascending order. Also accepts an optional already-open file handle
1501 to be used for reading. If used, the seek position of the file will
1506 to be used for reading. If used, the seek position of the file will
1502 not be preserved.
1507 not be preserved.
1503
1508
1504 This function is similar to calling ``self._chunk()`` multiple times,
1509 This function is similar to calling ``self._chunk()`` multiple times,
1505 but is faster.
1510 but is faster.
1506
1511
1507 Returns a list with decompressed data for each requested revision.
1512 Returns a list with decompressed data for each requested revision.
1508 """
1513 """
1509 if not revs:
1514 if not revs:
1510 return []
1515 return []
1511 start = self.start
1516 start = self.start
1512 length = self.length
1517 length = self.length
1513 inline = self._inline
1518 inline = self._inline
1514 iosize = self._io.size
1519 iosize = self._io.size
1515 buffer = util.buffer
1520 buffer = util.buffer
1516
1521
1517 l = []
1522 l = []
1518 ladd = l.append
1523 ladd = l.append
1519
1524
1520 if not self._withsparseread:
1525 if not self._withsparseread:
1521 slicedchunks = (revs,)
1526 slicedchunks = (revs,)
1522 else:
1527 else:
1523 slicedchunks = deltautil.slicechunk(self, revs,
1528 slicedchunks = deltautil.slicechunk(self, revs,
1524 targetsize=targetsize)
1529 targetsize=targetsize)
1525
1530
1526 for revschunk in slicedchunks:
1531 for revschunk in slicedchunks:
1527 firstrev = revschunk[0]
1532 firstrev = revschunk[0]
1528 # Skip trailing revisions with empty diff
1533 # Skip trailing revisions with empty diff
1529 for lastrev in revschunk[::-1]:
1534 for lastrev in revschunk[::-1]:
1530 if length(lastrev) != 0:
1535 if length(lastrev) != 0:
1531 break
1536 break
1532
1537
1533 try:
1538 try:
1534 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1535 except OverflowError:
1540 except OverflowError:
1536 # issue4215 - we can't cache a run of chunks greater than
1541 # issue4215 - we can't cache a run of chunks greater than
1537 # 2G on Windows
1542 # 2G on Windows
1538 return [self._chunk(rev, df=df) for rev in revschunk]
1543 return [self._chunk(rev, df=df) for rev in revschunk]
1539
1544
1540 decomp = self.decompress
1545 decomp = self.decompress
1541 for rev in revschunk:
1546 for rev in revschunk:
1542 chunkstart = start(rev)
1547 chunkstart = start(rev)
1543 if inline:
1548 if inline:
1544 chunkstart += (rev + 1) * iosize
1549 chunkstart += (rev + 1) * iosize
1545 chunklength = length(rev)
1550 chunklength = length(rev)
1546 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1547
1552
1548 return l
1553 return l
1549
1554
1550 def _chunkclear(self):
1555 def _chunkclear(self):
1551 """Clear the raw chunk cache."""
1556 """Clear the raw chunk cache."""
1552 self._chunkcache = (0, '')
1557 self._chunkcache = (0, '')
1553
1558
1554 def deltaparent(self, rev):
1559 def deltaparent(self, rev):
1555 """return deltaparent of the given revision"""
1560 """return deltaparent of the given revision"""
1556 base = self.index[rev][3]
1561 base = self.index[rev][3]
1557 if base == rev:
1562 if base == rev:
1558 return nullrev
1563 return nullrev
1559 elif self._generaldelta:
1564 elif self._generaldelta:
1560 return base
1565 return base
1561 else:
1566 else:
1562 return rev - 1
1567 return rev - 1
1563
1568
1564 def issnapshot(self, rev):
1569 def issnapshot(self, rev):
1565 """tells whether rev is a snapshot
1570 """tells whether rev is a snapshot
1566 """
1571 """
1567 if not self._sparserevlog:
1572 if not self._sparserevlog:
1568 return self.deltaparent(rev) == nullrev
1573 return self.deltaparent(rev) == nullrev
1569 elif util.safehasattr(self.index, 'issnapshot'):
1574 elif util.safehasattr(self.index, 'issnapshot'):
1570 # directly assign the method to cache the testing and access
1575 # directly assign the method to cache the testing and access
1571 self.issnapshot = self.index.issnapshot
1576 self.issnapshot = self.index.issnapshot
1572 return self.issnapshot(rev)
1577 return self.issnapshot(rev)
1573 if rev == nullrev:
1578 if rev == nullrev:
1574 return True
1579 return True
1575 entry = self.index[rev]
1580 entry = self.index[rev]
1576 base = entry[3]
1581 base = entry[3]
1577 if base == rev:
1582 if base == rev:
1578 return True
1583 return True
1579 if base == nullrev:
1584 if base == nullrev:
1580 return True
1585 return True
1581 p1 = entry[5]
1586 p1 = entry[5]
1582 p2 = entry[6]
1587 p2 = entry[6]
1583 if base == p1 or base == p2:
1588 if base == p1 or base == p2:
1584 return False
1589 return False
1585 return self.issnapshot(base)
1590 return self.issnapshot(base)
1586
1591
1587 def snapshotdepth(self, rev):
1592 def snapshotdepth(self, rev):
1588 """number of snapshot in the chain before this one"""
1593 """number of snapshot in the chain before this one"""
1589 if not self.issnapshot(rev):
1594 if not self.issnapshot(rev):
1590 raise error.ProgrammingError('revision %d not a snapshot')
1595 raise error.ProgrammingError('revision %d not a snapshot')
1591 return len(self._deltachain(rev)[0]) - 1
1596 return len(self._deltachain(rev)[0]) - 1
1592
1597
1593 def revdiff(self, rev1, rev2):
1598 def revdiff(self, rev1, rev2):
1594 """return or calculate a delta between two revisions
1599 """return or calculate a delta between two revisions
1595
1600
1596 The delta calculated is in binary form and is intended to be written to
1601 The delta calculated is in binary form and is intended to be written to
1597 revlog data directly. So this function needs raw revision data.
1602 revlog data directly. So this function needs raw revision data.
1598 """
1603 """
1599 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1600 return bytes(self._chunk(rev2))
1605 return bytes(self._chunk(rev2))
1601
1606
1602 return mdiff.textdiff(self.rawdata(rev1),
1607 return mdiff.textdiff(self.rawdata(rev1),
1603 self.rawdata(rev2))
1608 self.rawdata(rev2))
1604
1609
1605 def _processflags(self, text, flags, operation, raw=False):
1610 def _processflags(self, text, flags, operation, raw=False):
1606 """deprecated entry point to access flag processors"""
1611 """deprecated entry point to access flag processors"""
1607 msg = ('_processflag(...) use the specialized variant')
1612 msg = ('_processflag(...) use the specialized variant')
1608 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1613 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1609 if raw:
1614 if raw:
1610 return text, flagutil.processflagsraw(self, text, flags)
1615 return text, flagutil.processflagsraw(self, text, flags)
1611 elif operation == 'read':
1616 elif operation == 'read':
1612 return flagutil.processflagsread(self, text, flags)
1617 return flagutil.processflagsread(self, text, flags)
1613 else: # write operation
1618 else: # write operation
1614 return flagutil.processflagswrite(self, text, flags)
1619 return flagutil.processflagswrite(self, text, flags)
1615
1620
1616 def revision(self, nodeorrev, _df=None, raw=False):
1621 def revision(self, nodeorrev, _df=None, raw=False):
1617 """return an uncompressed revision of a given node or revision
1622 """return an uncompressed revision of a given node or revision
1618 number.
1623 number.
1619
1624
1620 _df - an existing file handle to read from. (internal-only)
1625 _df - an existing file handle to read from. (internal-only)
1621 raw - an optional argument specifying if the revision data is to be
1626 raw - an optional argument specifying if the revision data is to be
1622 treated as raw data when applying flag transforms. 'raw' should be set
1627 treated as raw data when applying flag transforms. 'raw' should be set
1623 to True when generating changegroups or in debug commands.
1628 to True when generating changegroups or in debug commands.
1624 """
1629 """
1625 if raw:
1630 if raw:
1626 msg = ('revlog.revision(..., raw=True) is deprecated, '
1631 msg = ('revlog.revision(..., raw=True) is deprecated, '
1627 'use revlog.rawdata(...)')
1632 'use revlog.rawdata(...)')
1628 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1633 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1629 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1634 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1630
1635
1631 def sidedata(self, nodeorrev, _df=None):
1636 def sidedata(self, nodeorrev, _df=None):
1632 """a map of extra data related to the changeset but not part of the hash
1637 """a map of extra data related to the changeset but not part of the hash
1633
1638
1634 This function currently return a dictionary. However, more advanced
1639 This function currently return a dictionary. However, more advanced
1635 mapping object will likely be used in the future for a more
1640 mapping object will likely be used in the future for a more
1636 efficient/lazy code.
1641 efficient/lazy code.
1637 """
1642 """
1638 return self._revisiondata(nodeorrev, _df)[1]
1643 return self._revisiondata(nodeorrev, _df)[1]
1639
1644
1640 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1645 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1641 # deal with <nodeorrev> argument type
1646 # deal with <nodeorrev> argument type
1642 if isinstance(nodeorrev, int):
1647 if isinstance(nodeorrev, int):
1643 rev = nodeorrev
1648 rev = nodeorrev
1644 node = self.node(rev)
1649 node = self.node(rev)
1645 else:
1650 else:
1646 node = nodeorrev
1651 node = nodeorrev
1647 rev = None
1652 rev = None
1648
1653
1649 # fast path the special `nullid` rev
1654 # fast path the special `nullid` rev
1650 if node == nullid:
1655 if node == nullid:
1651 return "", {}
1656 return "", {}
1652
1657
1653 # The text as stored inside the revlog. Might be the revision or might
1658 # The text as stored inside the revlog. Might be the revision or might
1654 # need to be processed to retrieve the revision.
1659 # need to be processed to retrieve the revision.
1655 rawtext = None
1660 rawtext = None
1656
1661
1657 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1662 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1658
1663
1659 if raw and validated:
1664 if raw and validated:
1660 # if we don't want to process the raw text and that raw
1665 # if we don't want to process the raw text and that raw
1661 # text is cached, we can exit early.
1666 # text is cached, we can exit early.
1662 return rawtext, {}
1667 return rawtext, {}
1663 if rev is None:
1668 if rev is None:
1664 rev = self.rev(node)
1669 rev = self.rev(node)
1665 # the revlog's flag for this revision
1670 # the revlog's flag for this revision
1666 # (usually alter its state or content)
1671 # (usually alter its state or content)
1667 flags = self.flags(rev)
1672 flags = self.flags(rev)
1668
1673
1669 if validated and flags == REVIDX_DEFAULT_FLAGS:
1674 if validated and flags == REVIDX_DEFAULT_FLAGS:
1670 # no extra flags set, no flag processor runs, text = rawtext
1675 # no extra flags set, no flag processor runs, text = rawtext
1671 return rawtext, {}
1676 return rawtext, {}
1672
1677
1673 sidedata = {}
1678 sidedata = {}
1674 if raw:
1679 if raw:
1675 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1680 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1676 text = rawtext
1681 text = rawtext
1677 else:
1682 else:
1678 try:
1683 try:
1679 r = flagutil.processflagsread(self, rawtext, flags)
1684 r = flagutil.processflagsread(self, rawtext, flags)
1680 except error.SidedataHashError as exc:
1685 except error.SidedataHashError as exc:
1681 msg = _("integrity check failed on %s:%s sidedata key %d")
1686 msg = _("integrity check failed on %s:%s sidedata key %d")
1682 msg %= (self.indexfile, pycompat.bytestr(rev),
1687 msg %= (self.indexfile, pycompat.bytestr(rev),
1683 exc.sidedatakey)
1688 exc.sidedatakey)
1684 raise error.RevlogError(msg)
1689 raise error.RevlogError(msg)
1685 text, validatehash, sidedata = r
1690 text, validatehash, sidedata = r
1686 if validatehash:
1691 if validatehash:
1687 self.checkhash(text, node, rev=rev)
1692 self.checkhash(text, node, rev=rev)
1688 if not validated:
1693 if not validated:
1689 self._revisioncache = (node, rev, rawtext)
1694 self._revisioncache = (node, rev, rawtext)
1690
1695
1691 return text, sidedata
1696 return text, sidedata
1692
1697
1693 def _rawtext(self, node, rev, _df=None):
1698 def _rawtext(self, node, rev, _df=None):
1694 """return the possibly unvalidated rawtext for a revision
1699 """return the possibly unvalidated rawtext for a revision
1695
1700
1696 returns (rev, rawtext, validated)
1701 returns (rev, rawtext, validated)
1697 """
1702 """
1698
1703
1699 # revision in the cache (could be useful to apply delta)
1704 # revision in the cache (could be useful to apply delta)
1700 cachedrev = None
1705 cachedrev = None
1701 # An intermediate text to apply deltas to
1706 # An intermediate text to apply deltas to
1702 basetext = None
1707 basetext = None
1703
1708
1704 # Check if we have the entry in cache
1709 # Check if we have the entry in cache
1705 # The cache entry looks like (node, rev, rawtext)
1710 # The cache entry looks like (node, rev, rawtext)
1706 if self._revisioncache:
1711 if self._revisioncache:
1707 if self._revisioncache[0] == node:
1712 if self._revisioncache[0] == node:
1708 return (rev, self._revisioncache[2], True)
1713 return (rev, self._revisioncache[2], True)
1709 cachedrev = self._revisioncache[1]
1714 cachedrev = self._revisioncache[1]
1710
1715
1711 if rev is None:
1716 if rev is None:
1712 rev = self.rev(node)
1717 rev = self.rev(node)
1713
1718
1714 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1719 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1715 if stopped:
1720 if stopped:
1716 basetext = self._revisioncache[2]
1721 basetext = self._revisioncache[2]
1717
1722
1718 # drop cache to save memory, the caller is expected to
1723 # drop cache to save memory, the caller is expected to
1719 # update self._revisioncache after validating the text
1724 # update self._revisioncache after validating the text
1720 self._revisioncache = None
1725 self._revisioncache = None
1721
1726
1722 targetsize = None
1727 targetsize = None
1723 rawsize = self.index[rev][2]
1728 rawsize = self.index[rev][2]
1724 if 0 <= rawsize:
1729 if 0 <= rawsize:
1725 targetsize = 4 * rawsize
1730 targetsize = 4 * rawsize
1726
1731
1727 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1732 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1728 if basetext is None:
1733 if basetext is None:
1729 basetext = bytes(bins[0])
1734 basetext = bytes(bins[0])
1730 bins = bins[1:]
1735 bins = bins[1:]
1731
1736
1732 rawtext = mdiff.patches(basetext, bins)
1737 rawtext = mdiff.patches(basetext, bins)
1733 del basetext # let us have a chance to free memory early
1738 del basetext # let us have a chance to free memory early
1734 return (rev, rawtext, False)
1739 return (rev, rawtext, False)
1735
1740
1736 def rawdata(self, nodeorrev, _df=None):
1741 def rawdata(self, nodeorrev, _df=None):
1737 """return an uncompressed raw data of a given node or revision number.
1742 """return an uncompressed raw data of a given node or revision number.
1738
1743
1739 _df - an existing file handle to read from. (internal-only)
1744 _df - an existing file handle to read from. (internal-only)
1740 """
1745 """
1741 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1746 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1742
1747
1743 def hash(self, text, p1, p2):
1748 def hash(self, text, p1, p2):
1744 """Compute a node hash.
1749 """Compute a node hash.
1745
1750
1746 Available as a function so that subclasses can replace the hash
1751 Available as a function so that subclasses can replace the hash
1747 as needed.
1752 as needed.
1748 """
1753 """
1749 return storageutil.hashrevisionsha1(text, p1, p2)
1754 return storageutil.hashrevisionsha1(text, p1, p2)
1750
1755
1751 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1752 """Check node hash integrity.
1757 """Check node hash integrity.
1753
1758
1754 Available as a function so that subclasses can extend hash mismatch
1759 Available as a function so that subclasses can extend hash mismatch
1755 behaviors as needed.
1760 behaviors as needed.
1756 """
1761 """
1757 try:
1762 try:
1758 if p1 is None and p2 is None:
1763 if p1 is None and p2 is None:
1759 p1, p2 = self.parents(node)
1764 p1, p2 = self.parents(node)
1760 if node != self.hash(text, p1, p2):
1765 if node != self.hash(text, p1, p2):
1761 # Clear the revision cache on hash failure. The revision cache
1766 # Clear the revision cache on hash failure. The revision cache
1762 # only stores the raw revision and clearing the cache does have
1767 # only stores the raw revision and clearing the cache does have
1763 # the side-effect that we won't have a cache hit when the raw
1768 # the side-effect that we won't have a cache hit when the raw
1764 # revision data is accessed. But this case should be rare and
1769 # revision data is accessed. But this case should be rare and
1765 # it is extra work to teach the cache about the hash
1770 # it is extra work to teach the cache about the hash
1766 # verification state.
1771 # verification state.
1767 if self._revisioncache and self._revisioncache[0] == node:
1772 if self._revisioncache and self._revisioncache[0] == node:
1768 self._revisioncache = None
1773 self._revisioncache = None
1769
1774
1770 revornode = rev
1775 revornode = rev
1771 if revornode is None:
1776 if revornode is None:
1772 revornode = templatefilters.short(hex(node))
1777 revornode = templatefilters.short(hex(node))
1773 raise error.RevlogError(_("integrity check failed on %s:%s")
1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1774 % (self.indexfile, pycompat.bytestr(revornode)))
1779 % (self.indexfile, pycompat.bytestr(revornode)))
1775 except error.RevlogError:
1780 except error.RevlogError:
1776 if self._censorable and storageutil.iscensoredtext(text):
1781 if self._censorable and storageutil.iscensoredtext(text):
1777 raise error.CensoredNodeError(self.indexfile, node, text)
1782 raise error.CensoredNodeError(self.indexfile, node, text)
1778 raise
1783 raise
1779
1784
1780 def _enforceinlinesize(self, tr, fp=None):
1785 def _enforceinlinesize(self, tr, fp=None):
1781 """Check if the revlog is too big for inline and convert if so.
1786 """Check if the revlog is too big for inline and convert if so.
1782
1787
1783 This should be called after revisions are added to the revlog. If the
1788 This should be called after revisions are added to the revlog. If the
1784 revlog has grown too large to be an inline revlog, it will convert it
1789 revlog has grown too large to be an inline revlog, it will convert it
1785 to use multiple index and data files.
1790 to use multiple index and data files.
1786 """
1791 """
1787 tiprev = len(self) - 1
1792 tiprev = len(self) - 1
1788 if (not self._inline or
1793 if (not self._inline or
1789 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1790 return
1795 return
1791
1796
1792 trinfo = tr.find(self.indexfile)
1797 trinfo = tr.find(self.indexfile)
1793 if trinfo is None:
1798 if trinfo is None:
1794 raise error.RevlogError(_("%s not found in the transaction")
1799 raise error.RevlogError(_("%s not found in the transaction")
1795 % self.indexfile)
1800 % self.indexfile)
1796
1801
1797 trindex = trinfo[2]
1802 trindex = trinfo[2]
1798 if trindex is not None:
1803 if trindex is not None:
1799 dataoff = self.start(trindex)
1804 dataoff = self.start(trindex)
1800 else:
1805 else:
1801 # revlog was stripped at start of transaction, use all leftover data
1806 # revlog was stripped at start of transaction, use all leftover data
1802 trindex = len(self) - 1
1807 trindex = len(self) - 1
1803 dataoff = self.end(tiprev)
1808 dataoff = self.end(tiprev)
1804
1809
1805 tr.add(self.datafile, dataoff)
1810 tr.add(self.datafile, dataoff)
1806
1811
1807 if fp:
1812 if fp:
1808 fp.flush()
1813 fp.flush()
1809 fp.close()
1814 fp.close()
1810 # We can't use the cached file handle after close(). So prevent
1815 # We can't use the cached file handle after close(). So prevent
1811 # its usage.
1816 # its usage.
1812 self._writinghandles = None
1817 self._writinghandles = None
1813
1818
1814 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1815 for r in self:
1820 for r in self:
1816 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1817
1822
1818 with self._indexfp('w') as fp:
1823 with self._indexfp('w') as fp:
1819 self.version &= ~FLAG_INLINE_DATA
1824 self.version &= ~FLAG_INLINE_DATA
1820 self._inline = False
1825 self._inline = False
1821 io = self._io
1826 io = self._io
1822 for i in self:
1827 for i in self:
1823 e = io.packentry(self.index[i], self.node, self.version, i)
1828 e = io.packentry(self.index[i], self.node, self.version, i)
1824 fp.write(e)
1829 fp.write(e)
1825
1830
1826 # the temp file replace the real index when we exit the context
1831 # the temp file replace the real index when we exit the context
1827 # manager
1832 # manager
1828
1833
1829 tr.replace(self.indexfile, trindex * self._io.size)
1834 tr.replace(self.indexfile, trindex * self._io.size)
1830 self._chunkclear()
1835 self._chunkclear()
1831
1836
1832 def _nodeduplicatecallback(self, transaction, node):
1837 def _nodeduplicatecallback(self, transaction, node):
1833 """called when trying to add a node already stored.
1838 """called when trying to add a node already stored.
1834 """
1839 """
1835
1840
1836 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1837 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1838 sidedata=None):
1843 sidedata=None):
1839 """add a revision to the log
1844 """add a revision to the log
1840
1845
1841 text - the revision data to add
1846 text - the revision data to add
1842 transaction - the transaction object used for rollback
1847 transaction - the transaction object used for rollback
1843 link - the linkrev data to add
1848 link - the linkrev data to add
1844 p1, p2 - the parent nodeids of the revision
1849 p1, p2 - the parent nodeids of the revision
1845 cachedelta - an optional precomputed delta
1850 cachedelta - an optional precomputed delta
1846 node - nodeid of revision; typically node is not specified, and it is
1851 node - nodeid of revision; typically node is not specified, and it is
1847 computed by default as hash(text, p1, p2), however subclasses might
1852 computed by default as hash(text, p1, p2), however subclasses might
1848 use different hashing method (and override checkhash() in such case)
1853 use different hashing method (and override checkhash() in such case)
1849 flags - the known flags to set on the revision
1854 flags - the known flags to set on the revision
1850 deltacomputer - an optional deltacomputer instance shared between
1855 deltacomputer - an optional deltacomputer instance shared between
1851 multiple calls
1856 multiple calls
1852 """
1857 """
1853 if link == nullrev:
1858 if link == nullrev:
1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1859 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1855 % self.indexfile)
1860 % self.indexfile)
1856
1861
1857 if sidedata is None:
1862 if sidedata is None:
1858 sidedata = {}
1863 sidedata = {}
1859 elif not self.hassidedata:
1864 elif not self.hassidedata:
1860 raise error.ProgrammingError(
1865 raise error.ProgrammingError(
1861 _("trying to add sidedata to a revlog who don't support them")
1866 _("trying to add sidedata to a revlog who don't support them")
1862 )
1867 )
1863
1868
1864 if flags:
1869 if flags:
1865 node = node or self.hash(text, p1, p2)
1870 node = node or self.hash(text, p1, p2)
1866
1871
1867 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1872 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1868 sidedata=sidedata)
1873 sidedata=sidedata)
1869
1874
1870 # If the flag processor modifies the revision data, ignore any provided
1875 # If the flag processor modifies the revision data, ignore any provided
1871 # cachedelta.
1876 # cachedelta.
1872 if rawtext != text:
1877 if rawtext != text:
1873 cachedelta = None
1878 cachedelta = None
1874
1879
1875 if len(rawtext) > _maxentrysize:
1880 if len(rawtext) > _maxentrysize:
1876 raise error.RevlogError(
1881 raise error.RevlogError(
1877 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1882 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1878 % (self.indexfile, len(rawtext)))
1883 % (self.indexfile, len(rawtext)))
1879
1884
1880 node = node or self.hash(rawtext, p1, p2)
1885 node = node or self.hash(rawtext, p1, p2)
1881 if node in self.nodemap:
1886 if node in self.nodemap:
1882 return node
1887 return node
1883
1888
1884 if validatehash:
1889 if validatehash:
1885 self.checkhash(rawtext, node, p1=p1, p2=p2)
1890 self.checkhash(rawtext, node, p1=p1, p2=p2)
1886
1891
1887 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1892 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1888 flags, cachedelta=cachedelta,
1893 flags, cachedelta=cachedelta,
1889 deltacomputer=deltacomputer)
1894 deltacomputer=deltacomputer)
1890
1895
1891 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1896 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1892 cachedelta=None, deltacomputer=None):
1897 cachedelta=None, deltacomputer=None):
1893 """add a raw revision with known flags, node and parents
1898 """add a raw revision with known flags, node and parents
1894 useful when reusing a revision not stored in this revlog (ex: received
1899 useful when reusing a revision not stored in this revlog (ex: received
1895 over wire, or read from an external bundle).
1900 over wire, or read from an external bundle).
1896 """
1901 """
1897 dfh = None
1902 dfh = None
1898 if not self._inline:
1903 if not self._inline:
1899 dfh = self._datafp("a+")
1904 dfh = self._datafp("a+")
1900 ifh = self._indexfp("a+")
1905 ifh = self._indexfp("a+")
1901 try:
1906 try:
1902 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1907 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1903 flags, cachedelta, ifh, dfh,
1908 flags, cachedelta, ifh, dfh,
1904 deltacomputer=deltacomputer)
1909 deltacomputer=deltacomputer)
1905 finally:
1910 finally:
1906 if dfh:
1911 if dfh:
1907 dfh.close()
1912 dfh.close()
1908 ifh.close()
1913 ifh.close()
1909
1914
1910 def compress(self, data):
1915 def compress(self, data):
1911 """Generate a possibly-compressed representation of data."""
1916 """Generate a possibly-compressed representation of data."""
1912 if not data:
1917 if not data:
1913 return '', data
1918 return '', data
1914
1919
1915 compressed = self._compressor.compress(data)
1920 compressed = self._compressor.compress(data)
1916
1921
1917 if compressed:
1922 if compressed:
1918 # The revlog compressor added the header in the returned data.
1923 # The revlog compressor added the header in the returned data.
1919 return '', compressed
1924 return '', compressed
1920
1925
1921 if data[0:1] == '\0':
1926 if data[0:1] == '\0':
1922 return '', data
1927 return '', data
1923 return 'u', data
1928 return 'u', data
1924
1929
1925 def decompress(self, data):
1930 def decompress(self, data):
1926 """Decompress a revlog chunk.
1931 """Decompress a revlog chunk.
1927
1932
1928 The chunk is expected to begin with a header identifying the
1933 The chunk is expected to begin with a header identifying the
1929 format type so it can be routed to an appropriate decompressor.
1934 format type so it can be routed to an appropriate decompressor.
1930 """
1935 """
1931 if not data:
1936 if not data:
1932 return data
1937 return data
1933
1938
1934 # Revlogs are read much more frequently than they are written and many
1939 # Revlogs are read much more frequently than they are written and many
1935 # chunks only take microseconds to decompress, so performance is
1940 # chunks only take microseconds to decompress, so performance is
1936 # important here.
1941 # important here.
1937 #
1942 #
1938 # We can make a few assumptions about revlogs:
1943 # We can make a few assumptions about revlogs:
1939 #
1944 #
1940 # 1) the majority of chunks will be compressed (as opposed to inline
1945 # 1) the majority of chunks will be compressed (as opposed to inline
1941 # raw data).
1946 # raw data).
1942 # 2) decompressing *any* data will likely by at least 10x slower than
1947 # 2) decompressing *any* data will likely by at least 10x slower than
1943 # returning raw inline data.
1948 # returning raw inline data.
1944 # 3) we want to prioritize common and officially supported compression
1949 # 3) we want to prioritize common and officially supported compression
1945 # engines
1950 # engines
1946 #
1951 #
1947 # It follows that we want to optimize for "decompress compressed data
1952 # It follows that we want to optimize for "decompress compressed data
1948 # when encoded with common and officially supported compression engines"
1953 # when encoded with common and officially supported compression engines"
1949 # case over "raw data" and "data encoded by less common or non-official
1954 # case over "raw data" and "data encoded by less common or non-official
1950 # compression engines." That is why we have the inline lookup first
1955 # compression engines." That is why we have the inline lookup first
1951 # followed by the compengines lookup.
1956 # followed by the compengines lookup.
1952 #
1957 #
1953 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1958 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1954 # compressed chunks. And this matters for changelog and manifest reads.
1959 # compressed chunks. And this matters for changelog and manifest reads.
1955 t = data[0:1]
1960 t = data[0:1]
1956
1961
1957 if t == 'x':
1962 if t == 'x':
1958 try:
1963 try:
1959 return _zlibdecompress(data)
1964 return _zlibdecompress(data)
1960 except zlib.error as e:
1965 except zlib.error as e:
1961 raise error.RevlogError(_('revlog decompress error: %s') %
1966 raise error.RevlogError(_('revlog decompress error: %s') %
1962 stringutil.forcebytestr(e))
1967 stringutil.forcebytestr(e))
1963 # '\0' is more common than 'u' so it goes first.
1968 # '\0' is more common than 'u' so it goes first.
1964 elif t == '\0':
1969 elif t == '\0':
1965 return data
1970 return data
1966 elif t == 'u':
1971 elif t == 'u':
1967 return util.buffer(data, 1)
1972 return util.buffer(data, 1)
1968
1973
1969 try:
1974 try:
1970 compressor = self._decompressors[t]
1975 compressor = self._decompressors[t]
1971 except KeyError:
1976 except KeyError:
1972 try:
1977 try:
1973 engine = util.compengines.forrevlogheader(t)
1978 engine = util.compengines.forrevlogheader(t)
1974 compressor = engine.revlogcompressor(self._compengineopts)
1979 compressor = engine.revlogcompressor(self._compengineopts)
1975 self._decompressors[t] = compressor
1980 self._decompressors[t] = compressor
1976 except KeyError:
1981 except KeyError:
1977 raise error.RevlogError(_('unknown compression type %r') % t)
1982 raise error.RevlogError(_('unknown compression type %r') % t)
1978
1983
1979 return compressor.decompress(data)
1984 return compressor.decompress(data)
1980
1985
1981 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1986 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1982 cachedelta, ifh, dfh, alwayscache=False,
1987 cachedelta, ifh, dfh, alwayscache=False,
1983 deltacomputer=None):
1988 deltacomputer=None):
1984 """internal function to add revisions to the log
1989 """internal function to add revisions to the log
1985
1990
1986 see addrevision for argument descriptions.
1991 see addrevision for argument descriptions.
1987
1992
1988 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1993 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1989
1994
1990 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1995 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1991 be used.
1996 be used.
1992
1997
1993 invariants:
1998 invariants:
1994 - rawtext is optional (can be None); if not set, cachedelta must be set.
1999 - rawtext is optional (can be None); if not set, cachedelta must be set.
1995 if both are set, they must correspond to each other.
2000 if both are set, they must correspond to each other.
1996 """
2001 """
1997 if node == nullid:
2002 if node == nullid:
1998 raise error.RevlogError(_("%s: attempt to add null revision") %
2003 raise error.RevlogError(_("%s: attempt to add null revision") %
1999 self.indexfile)
2004 self.indexfile)
2000 if node == wdirid or node in wdirfilenodeids:
2005 if node == wdirid or node in wdirfilenodeids:
2001 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2006 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2002 self.indexfile)
2007 self.indexfile)
2003
2008
2004 if self._inline:
2009 if self._inline:
2005 fh = ifh
2010 fh = ifh
2006 else:
2011 else:
2007 fh = dfh
2012 fh = dfh
2008
2013
2009 btext = [rawtext]
2014 btext = [rawtext]
2010
2015
2011 curr = len(self)
2016 curr = len(self)
2012 prev = curr - 1
2017 prev = curr - 1
2013 offset = self.end(prev)
2018 offset = self.end(prev)
2014 p1r, p2r = self.rev(p1), self.rev(p2)
2019 p1r, p2r = self.rev(p1), self.rev(p2)
2015
2020
2016 # full versions are inserted when the needed deltas
2021 # full versions are inserted when the needed deltas
2017 # become comparable to the uncompressed text
2022 # become comparable to the uncompressed text
2018 if rawtext is None:
2023 if rawtext is None:
2019 # need rawtext size, before changed by flag processors, which is
2024 # need rawtext size, before changed by flag processors, which is
2020 # the non-raw size. use revlog explicitly to avoid filelog's extra
2025 # the non-raw size. use revlog explicitly to avoid filelog's extra
2021 # logic that might remove metadata size.
2026 # logic that might remove metadata size.
2022 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2027 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2023 cachedelta[1])
2028 cachedelta[1])
2024 else:
2029 else:
2025 textlen = len(rawtext)
2030 textlen = len(rawtext)
2026
2031
2027 if deltacomputer is None:
2032 if deltacomputer is None:
2028 deltacomputer = deltautil.deltacomputer(self)
2033 deltacomputer = deltautil.deltacomputer(self)
2029
2034
2030 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2035 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2031
2036
2032 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2037 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2033
2038
2034 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2039 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2035 deltainfo.base, link, p1r, p2r, node)
2040 deltainfo.base, link, p1r, p2r, node)
2036 self.index.append(e)
2041 self.index.append(e)
2037 self.nodemap[node] = curr
2042 self.nodemap[node] = curr
2038
2043
2039 # Reset the pure node cache start lookup offset to account for new
2044 # Reset the pure node cache start lookup offset to account for new
2040 # revision.
2045 # revision.
2041 if self._nodepos is not None:
2046 if self._nodepos is not None:
2042 self._nodepos = curr
2047 self._nodepos = curr
2043
2048
2044 entry = self._io.packentry(e, self.node, self.version, curr)
2049 entry = self._io.packentry(e, self.node, self.version, curr)
2045 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2050 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2046 link, offset)
2051 link, offset)
2047
2052
2048 rawtext = btext[0]
2053 rawtext = btext[0]
2049
2054
2050 if alwayscache and rawtext is None:
2055 if alwayscache and rawtext is None:
2051 rawtext = deltacomputer.buildtext(revinfo, fh)
2056 rawtext = deltacomputer.buildtext(revinfo, fh)
2052
2057
2053 if type(rawtext) == bytes: # only accept immutable objects
2058 if type(rawtext) == bytes: # only accept immutable objects
2054 self._revisioncache = (node, curr, rawtext)
2059 self._revisioncache = (node, curr, rawtext)
2055 self._chainbasecache[curr] = deltainfo.chainbase
2060 self._chainbasecache[curr] = deltainfo.chainbase
2056 return node
2061 return node
2057
2062
2058 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2063 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2059 # Files opened in a+ mode have inconsistent behavior on various
2064 # Files opened in a+ mode have inconsistent behavior on various
2060 # platforms. Windows requires that a file positioning call be made
2065 # platforms. Windows requires that a file positioning call be made
2061 # when the file handle transitions between reads and writes. See
2066 # when the file handle transitions between reads and writes. See
2062 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2067 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2063 # platforms, Python or the platform itself can be buggy. Some versions
2068 # platforms, Python or the platform itself can be buggy. Some versions
2064 # of Solaris have been observed to not append at the end of the file
2069 # of Solaris have been observed to not append at the end of the file
2065 # if the file was seeked to before the end. See issue4943 for more.
2070 # if the file was seeked to before the end. See issue4943 for more.
2066 #
2071 #
2067 # We work around this issue by inserting a seek() before writing.
2072 # We work around this issue by inserting a seek() before writing.
2068 # Note: This is likely not necessary on Python 3. However, because
2073 # Note: This is likely not necessary on Python 3. However, because
2069 # the file handle is reused for reads and may be seeked there, we need
2074 # the file handle is reused for reads and may be seeked there, we need
2070 # to be careful before changing this.
2075 # to be careful before changing this.
2071 ifh.seek(0, os.SEEK_END)
2076 ifh.seek(0, os.SEEK_END)
2072 if dfh:
2077 if dfh:
2073 dfh.seek(0, os.SEEK_END)
2078 dfh.seek(0, os.SEEK_END)
2074
2079
2075 curr = len(self) - 1
2080 curr = len(self) - 1
2076 if not self._inline:
2081 if not self._inline:
2077 transaction.add(self.datafile, offset)
2082 transaction.add(self.datafile, offset)
2078 transaction.add(self.indexfile, curr * len(entry))
2083 transaction.add(self.indexfile, curr * len(entry))
2079 if data[0]:
2084 if data[0]:
2080 dfh.write(data[0])
2085 dfh.write(data[0])
2081 dfh.write(data[1])
2086 dfh.write(data[1])
2082 ifh.write(entry)
2087 ifh.write(entry)
2083 else:
2088 else:
2084 offset += curr * self._io.size
2089 offset += curr * self._io.size
2085 transaction.add(self.indexfile, offset, curr)
2090 transaction.add(self.indexfile, offset, curr)
2086 ifh.write(entry)
2091 ifh.write(entry)
2087 ifh.write(data[0])
2092 ifh.write(data[0])
2088 ifh.write(data[1])
2093 ifh.write(data[1])
2089 self._enforceinlinesize(transaction, ifh)
2094 self._enforceinlinesize(transaction, ifh)
2090
2095
2091 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2096 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2092 """
2097 """
2093 add a delta group
2098 add a delta group
2094
2099
2095 given a set of deltas, add them to the revision log. the
2100 given a set of deltas, add them to the revision log. the
2096 first delta is against its parent, which should be in our
2101 first delta is against its parent, which should be in our
2097 log, the rest are against the previous delta.
2102 log, the rest are against the previous delta.
2098
2103
2099 If ``addrevisioncb`` is defined, it will be called with arguments of
2104 If ``addrevisioncb`` is defined, it will be called with arguments of
2100 this revlog and the node that was added.
2105 this revlog and the node that was added.
2101 """
2106 """
2102
2107
2103 if self._writinghandles:
2108 if self._writinghandles:
2104 raise error.ProgrammingError('cannot nest addgroup() calls')
2109 raise error.ProgrammingError('cannot nest addgroup() calls')
2105
2110
2106 nodes = []
2111 nodes = []
2107
2112
2108 r = len(self)
2113 r = len(self)
2109 end = 0
2114 end = 0
2110 if r:
2115 if r:
2111 end = self.end(r - 1)
2116 end = self.end(r - 1)
2112 ifh = self._indexfp("a+")
2117 ifh = self._indexfp("a+")
2113 isize = r * self._io.size
2118 isize = r * self._io.size
2114 if self._inline:
2119 if self._inline:
2115 transaction.add(self.indexfile, end + isize, r)
2120 transaction.add(self.indexfile, end + isize, r)
2116 dfh = None
2121 dfh = None
2117 else:
2122 else:
2118 transaction.add(self.indexfile, isize, r)
2123 transaction.add(self.indexfile, isize, r)
2119 transaction.add(self.datafile, end)
2124 transaction.add(self.datafile, end)
2120 dfh = self._datafp("a+")
2125 dfh = self._datafp("a+")
2121 def flush():
2126 def flush():
2122 if dfh:
2127 if dfh:
2123 dfh.flush()
2128 dfh.flush()
2124 ifh.flush()
2129 ifh.flush()
2125
2130
2126 self._writinghandles = (ifh, dfh)
2131 self._writinghandles = (ifh, dfh)
2127
2132
2128 try:
2133 try:
2129 deltacomputer = deltautil.deltacomputer(self)
2134 deltacomputer = deltautil.deltacomputer(self)
2130 # loop through our set of deltas
2135 # loop through our set of deltas
2131 for data in deltas:
2136 for data in deltas:
2132 node, p1, p2, linknode, deltabase, delta, flags = data
2137 node, p1, p2, linknode, deltabase, delta, flags = data
2133 link = linkmapper(linknode)
2138 link = linkmapper(linknode)
2134 flags = flags or REVIDX_DEFAULT_FLAGS
2139 flags = flags or REVIDX_DEFAULT_FLAGS
2135
2140
2136 nodes.append(node)
2141 nodes.append(node)
2137
2142
2138 if node in self.nodemap:
2143 if node in self.nodemap:
2139 self._nodeduplicatecallback(transaction, node)
2144 self._nodeduplicatecallback(transaction, node)
2140 # this can happen if two branches make the same change
2145 # this can happen if two branches make the same change
2141 continue
2146 continue
2142
2147
2143 for p in (p1, p2):
2148 for p in (p1, p2):
2144 if p not in self.nodemap:
2149 if p not in self.nodemap:
2145 raise error.LookupError(p, self.indexfile,
2150 raise error.LookupError(p, self.indexfile,
2146 _('unknown parent'))
2151 _('unknown parent'))
2147
2152
2148 if deltabase not in self.nodemap:
2153 if deltabase not in self.nodemap:
2149 raise error.LookupError(deltabase, self.indexfile,
2154 raise error.LookupError(deltabase, self.indexfile,
2150 _('unknown delta base'))
2155 _('unknown delta base'))
2151
2156
2152 baserev = self.rev(deltabase)
2157 baserev = self.rev(deltabase)
2153
2158
2154 if baserev != nullrev and self.iscensored(baserev):
2159 if baserev != nullrev and self.iscensored(baserev):
2155 # if base is censored, delta must be full replacement in a
2160 # if base is censored, delta must be full replacement in a
2156 # single patch operation
2161 # single patch operation
2157 hlen = struct.calcsize(">lll")
2162 hlen = struct.calcsize(">lll")
2158 oldlen = self.rawsize(baserev)
2163 oldlen = self.rawsize(baserev)
2159 newlen = len(delta) - hlen
2164 newlen = len(delta) - hlen
2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2165 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2161 raise error.CensoredBaseError(self.indexfile,
2166 raise error.CensoredBaseError(self.indexfile,
2162 self.node(baserev))
2167 self.node(baserev))
2163
2168
2164 if not flags and self._peek_iscensored(baserev, delta, flush):
2169 if not flags and self._peek_iscensored(baserev, delta, flush):
2165 flags |= REVIDX_ISCENSORED
2170 flags |= REVIDX_ISCENSORED
2166
2171
2167 # We assume consumers of addrevisioncb will want to retrieve
2172 # We assume consumers of addrevisioncb will want to retrieve
2168 # the added revision, which will require a call to
2173 # the added revision, which will require a call to
2169 # revision(). revision() will fast path if there is a cache
2174 # revision(). revision() will fast path if there is a cache
2170 # hit. So, we tell _addrevision() to always cache in this case.
2175 # hit. So, we tell _addrevision() to always cache in this case.
2171 # We're only using addgroup() in the context of changegroup
2176 # We're only using addgroup() in the context of changegroup
2172 # generation so the revision data can always be handled as raw
2177 # generation so the revision data can always be handled as raw
2173 # by the flagprocessor.
2178 # by the flagprocessor.
2174 self._addrevision(node, None, transaction, link,
2179 self._addrevision(node, None, transaction, link,
2175 p1, p2, flags, (baserev, delta),
2180 p1, p2, flags, (baserev, delta),
2176 ifh, dfh,
2181 ifh, dfh,
2177 alwayscache=bool(addrevisioncb),
2182 alwayscache=bool(addrevisioncb),
2178 deltacomputer=deltacomputer)
2183 deltacomputer=deltacomputer)
2179
2184
2180 if addrevisioncb:
2185 if addrevisioncb:
2181 addrevisioncb(self, node)
2186 addrevisioncb(self, node)
2182
2187
2183 if not dfh and not self._inline:
2188 if not dfh and not self._inline:
2184 # addrevision switched from inline to conventional
2189 # addrevision switched from inline to conventional
2185 # reopen the index
2190 # reopen the index
2186 ifh.close()
2191 ifh.close()
2187 dfh = self._datafp("a+")
2192 dfh = self._datafp("a+")
2188 ifh = self._indexfp("a+")
2193 ifh = self._indexfp("a+")
2189 self._writinghandles = (ifh, dfh)
2194 self._writinghandles = (ifh, dfh)
2190 finally:
2195 finally:
2191 self._writinghandles = None
2196 self._writinghandles = None
2192
2197
2193 if dfh:
2198 if dfh:
2194 dfh.close()
2199 dfh.close()
2195 ifh.close()
2200 ifh.close()
2196
2201
2197 return nodes
2202 return nodes
2198
2203
2199 def iscensored(self, rev):
2204 def iscensored(self, rev):
2200 """Check if a file revision is censored."""
2205 """Check if a file revision is censored."""
2201 if not self._censorable:
2206 if not self._censorable:
2202 return False
2207 return False
2203
2208
2204 return self.flags(rev) & REVIDX_ISCENSORED
2209 return self.flags(rev) & REVIDX_ISCENSORED
2205
2210
2206 def _peek_iscensored(self, baserev, delta, flush):
2211 def _peek_iscensored(self, baserev, delta, flush):
2207 """Quickly check if a delta produces a censored revision."""
2212 """Quickly check if a delta produces a censored revision."""
2208 if not self._censorable:
2213 if not self._censorable:
2209 return False
2214 return False
2210
2215
2211 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2216 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2212
2217
2213 def getstrippoint(self, minlink):
2218 def getstrippoint(self, minlink):
2214 """find the minimum rev that must be stripped to strip the linkrev
2219 """find the minimum rev that must be stripped to strip the linkrev
2215
2220
2216 Returns a tuple containing the minimum rev and a set of all revs that
2221 Returns a tuple containing the minimum rev and a set of all revs that
2217 have linkrevs that will be broken by this strip.
2222 have linkrevs that will be broken by this strip.
2218 """
2223 """
2219 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2224 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2220 self.headrevs(),
2225 self.headrevs(),
2221 self.linkrev, self.parentrevs)
2226 self.linkrev, self.parentrevs)
2222
2227
2223 def strip(self, minlink, transaction):
2228 def strip(self, minlink, transaction):
2224 """truncate the revlog on the first revision with a linkrev >= minlink
2229 """truncate the revlog on the first revision with a linkrev >= minlink
2225
2230
2226 This function is called when we're stripping revision minlink and
2231 This function is called when we're stripping revision minlink and
2227 its descendants from the repository.
2232 its descendants from the repository.
2228
2233
2229 We have to remove all revisions with linkrev >= minlink, because
2234 We have to remove all revisions with linkrev >= minlink, because
2230 the equivalent changelog revisions will be renumbered after the
2235 the equivalent changelog revisions will be renumbered after the
2231 strip.
2236 strip.
2232
2237
2233 So we truncate the revlog on the first of these revisions, and
2238 So we truncate the revlog on the first of these revisions, and
2234 trust that the caller has saved the revisions that shouldn't be
2239 trust that the caller has saved the revisions that shouldn't be
2235 removed and that it'll re-add them after this truncation.
2240 removed and that it'll re-add them after this truncation.
2236 """
2241 """
2237 if len(self) == 0:
2242 if len(self) == 0:
2238 return
2243 return
2239
2244
2240 rev, _ = self.getstrippoint(minlink)
2245 rev, _ = self.getstrippoint(minlink)
2241 if rev == len(self):
2246 if rev == len(self):
2242 return
2247 return
2243
2248
2244 # first truncate the files on disk
2249 # first truncate the files on disk
2245 end = self.start(rev)
2250 end = self.start(rev)
2246 if not self._inline:
2251 if not self._inline:
2247 transaction.add(self.datafile, end)
2252 transaction.add(self.datafile, end)
2248 end = rev * self._io.size
2253 end = rev * self._io.size
2249 else:
2254 else:
2250 end += rev * self._io.size
2255 end += rev * self._io.size
2251
2256
2252 transaction.add(self.indexfile, end)
2257 transaction.add(self.indexfile, end)
2253
2258
2254 # then reset internal state in memory to forget those revisions
2259 # then reset internal state in memory to forget those revisions
2255 self._revisioncache = None
2260 self._revisioncache = None
2256 self._chaininfocache = {}
2261 self._chaininfocache = {}
2257 self._chunkclear()
2262 self._chunkclear()
2258 for x in pycompat.xrange(rev, len(self)):
2263 for x in pycompat.xrange(rev, len(self)):
2259 del self.nodemap[self.node(x)]
2264 del self.nodemap[self.node(x)]
2260
2265
2261 del self.index[rev:-1]
2266 del self.index[rev:-1]
2262 self._nodepos = None
2267 self._nodepos = None
2263
2268
2264 def checksize(self):
2269 def checksize(self):
2265 """Check size of index and data files
2270 """Check size of index and data files
2266
2271
2267 return a (dd, di) tuple.
2272 return a (dd, di) tuple.
2268 - dd: extra bytes for the "data" file
2273 - dd: extra bytes for the "data" file
2269 - di: extra bytes for the "index" file
2274 - di: extra bytes for the "index" file
2270
2275
2271 A healthy revlog will return (0, 0).
2276 A healthy revlog will return (0, 0).
2272 """
2277 """
2273 expected = 0
2278 expected = 0
2274 if len(self):
2279 if len(self):
2275 expected = max(0, self.end(len(self) - 1))
2280 expected = max(0, self.end(len(self) - 1))
2276
2281
2277 try:
2282 try:
2278 with self._datafp() as f:
2283 with self._datafp() as f:
2279 f.seek(0, io.SEEK_END)
2284 f.seek(0, io.SEEK_END)
2280 actual = f.tell()
2285 actual = f.tell()
2281 dd = actual - expected
2286 dd = actual - expected
2282 except IOError as inst:
2287 except IOError as inst:
2283 if inst.errno != errno.ENOENT:
2288 if inst.errno != errno.ENOENT:
2284 raise
2289 raise
2285 dd = 0
2290 dd = 0
2286
2291
2287 try:
2292 try:
2288 f = self.opener(self.indexfile)
2293 f = self.opener(self.indexfile)
2289 f.seek(0, io.SEEK_END)
2294 f.seek(0, io.SEEK_END)
2290 actual = f.tell()
2295 actual = f.tell()
2291 f.close()
2296 f.close()
2292 s = self._io.size
2297 s = self._io.size
2293 i = max(0, actual // s)
2298 i = max(0, actual // s)
2294 di = actual - (i * s)
2299 di = actual - (i * s)
2295 if self._inline:
2300 if self._inline:
2296 databytes = 0
2301 databytes = 0
2297 for r in self:
2302 for r in self:
2298 databytes += max(0, self.length(r))
2303 databytes += max(0, self.length(r))
2299 dd = 0
2304 dd = 0
2300 di = actual - len(self) * s - databytes
2305 di = actual - len(self) * s - databytes
2301 except IOError as inst:
2306 except IOError as inst:
2302 if inst.errno != errno.ENOENT:
2307 if inst.errno != errno.ENOENT:
2303 raise
2308 raise
2304 di = 0
2309 di = 0
2305
2310
2306 return (dd, di)
2311 return (dd, di)
2307
2312
2308 def files(self):
2313 def files(self):
2309 res = [self.indexfile]
2314 res = [self.indexfile]
2310 if not self._inline:
2315 if not self._inline:
2311 res.append(self.datafile)
2316 res.append(self.datafile)
2312 return res
2317 return res
2313
2318
2314 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2319 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2315 assumehaveparentrevisions=False,
2320 assumehaveparentrevisions=False,
2316 deltamode=repository.CG_DELTAMODE_STD):
2321 deltamode=repository.CG_DELTAMODE_STD):
2317 if nodesorder not in ('nodes', 'storage', 'linear', None):
2322 if nodesorder not in ('nodes', 'storage', 'linear', None):
2318 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2323 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2319 nodesorder)
2324 nodesorder)
2320
2325
2321 if nodesorder is None and not self._generaldelta:
2326 if nodesorder is None and not self._generaldelta:
2322 nodesorder = 'storage'
2327 nodesorder = 'storage'
2323
2328
2324 if (not self._storedeltachains and
2329 if (not self._storedeltachains and
2325 deltamode != repository.CG_DELTAMODE_PREV):
2330 deltamode != repository.CG_DELTAMODE_PREV):
2326 deltamode = repository.CG_DELTAMODE_FULL
2331 deltamode = repository.CG_DELTAMODE_FULL
2327
2332
2328 return storageutil.emitrevisions(
2333 return storageutil.emitrevisions(
2329 self, nodes, nodesorder, revlogrevisiondelta,
2334 self, nodes, nodesorder, revlogrevisiondelta,
2330 deltaparentfn=self.deltaparent,
2335 deltaparentfn=self.deltaparent,
2331 candeltafn=self.candelta,
2336 candeltafn=self.candelta,
2332 rawsizefn=self.rawsize,
2337 rawsizefn=self.rawsize,
2333 revdifffn=self.revdiff,
2338 revdifffn=self.revdiff,
2334 flagsfn=self.flags,
2339 flagsfn=self.flags,
2335 deltamode=deltamode,
2340 deltamode=deltamode,
2336 revisiondata=revisiondata,
2341 revisiondata=revisiondata,
2337 assumehaveparentrevisions=assumehaveparentrevisions)
2342 assumehaveparentrevisions=assumehaveparentrevisions)
2338
2343
2339 DELTAREUSEALWAYS = 'always'
2344 DELTAREUSEALWAYS = 'always'
2340 DELTAREUSESAMEREVS = 'samerevs'
2345 DELTAREUSESAMEREVS = 'samerevs'
2341 DELTAREUSENEVER = 'never'
2346 DELTAREUSENEVER = 'never'
2342
2347
2343 DELTAREUSEFULLADD = 'fulladd'
2348 DELTAREUSEFULLADD = 'fulladd'
2344
2349
2345 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2350 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2346
2351
2347 def clone(self, tr, destrevlog, addrevisioncb=None,
2352 def clone(self, tr, destrevlog, addrevisioncb=None,
2348 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2353 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2349 """Copy this revlog to another, possibly with format changes.
2354 """Copy this revlog to another, possibly with format changes.
2350
2355
2351 The destination revlog will contain the same revisions and nodes.
2356 The destination revlog will contain the same revisions and nodes.
2352 However, it may not be bit-for-bit identical due to e.g. delta encoding
2357 However, it may not be bit-for-bit identical due to e.g. delta encoding
2353 differences.
2358 differences.
2354
2359
2355 The ``deltareuse`` argument control how deltas from the existing revlog
2360 The ``deltareuse`` argument control how deltas from the existing revlog
2356 are preserved in the destination revlog. The argument can have the
2361 are preserved in the destination revlog. The argument can have the
2357 following values:
2362 following values:
2358
2363
2359 DELTAREUSEALWAYS
2364 DELTAREUSEALWAYS
2360 Deltas will always be reused (if possible), even if the destination
2365 Deltas will always be reused (if possible), even if the destination
2361 revlog would not select the same revisions for the delta. This is the
2366 revlog would not select the same revisions for the delta. This is the
2362 fastest mode of operation.
2367 fastest mode of operation.
2363 DELTAREUSESAMEREVS
2368 DELTAREUSESAMEREVS
2364 Deltas will be reused if the destination revlog would pick the same
2369 Deltas will be reused if the destination revlog would pick the same
2365 revisions for the delta. This mode strikes a balance between speed
2370 revisions for the delta. This mode strikes a balance between speed
2366 and optimization.
2371 and optimization.
2367 DELTAREUSENEVER
2372 DELTAREUSENEVER
2368 Deltas will never be reused. This is the slowest mode of execution.
2373 Deltas will never be reused. This is the slowest mode of execution.
2369 This mode can be used to recompute deltas (e.g. if the diff/delta
2374 This mode can be used to recompute deltas (e.g. if the diff/delta
2370 algorithm changes).
2375 algorithm changes).
2371 DELTAREUSEFULLADD
2376 DELTAREUSEFULLADD
2372 Revision will be re-added as if their were new content. This is
2377 Revision will be re-added as if their were new content. This is
2373 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2378 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2374 eg: large file detection and handling.
2379 eg: large file detection and handling.
2375
2380
2376 Delta computation can be slow, so the choice of delta reuse policy can
2381 Delta computation can be slow, so the choice of delta reuse policy can
2377 significantly affect run time.
2382 significantly affect run time.
2378
2383
2379 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2384 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2380 two extremes. Deltas will be reused if they are appropriate. But if the
2385 two extremes. Deltas will be reused if they are appropriate. But if the
2381 delta could choose a better revision, it will do so. This means if you
2386 delta could choose a better revision, it will do so. This means if you
2382 are converting a non-generaldelta revlog to a generaldelta revlog,
2387 are converting a non-generaldelta revlog to a generaldelta revlog,
2383 deltas will be recomputed if the delta's parent isn't a parent of the
2388 deltas will be recomputed if the delta's parent isn't a parent of the
2384 revision.
2389 revision.
2385
2390
2386 In addition to the delta policy, the ``forcedeltabothparents``
2391 In addition to the delta policy, the ``forcedeltabothparents``
2387 argument controls whether to force compute deltas against both parents
2392 argument controls whether to force compute deltas against both parents
2388 for merges. By default, the current default is used.
2393 for merges. By default, the current default is used.
2389 """
2394 """
2390 if deltareuse not in self.DELTAREUSEALL:
2395 if deltareuse not in self.DELTAREUSEALL:
2391 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2396 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2392
2397
2393 if len(destrevlog):
2398 if len(destrevlog):
2394 raise ValueError(_('destination revlog is not empty'))
2399 raise ValueError(_('destination revlog is not empty'))
2395
2400
2396 if getattr(self, 'filteredrevs', None):
2401 if getattr(self, 'filteredrevs', None):
2397 raise ValueError(_('source revlog has filtered revisions'))
2402 raise ValueError(_('source revlog has filtered revisions'))
2398 if getattr(destrevlog, 'filteredrevs', None):
2403 if getattr(destrevlog, 'filteredrevs', None):
2399 raise ValueError(_('destination revlog has filtered revisions'))
2404 raise ValueError(_('destination revlog has filtered revisions'))
2400
2405
2401 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2406 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2402 # if possible.
2407 # if possible.
2403 oldlazydelta = destrevlog._lazydelta
2408 oldlazydelta = destrevlog._lazydelta
2404 oldlazydeltabase = destrevlog._lazydeltabase
2409 oldlazydeltabase = destrevlog._lazydeltabase
2405 oldamd = destrevlog._deltabothparents
2410 oldamd = destrevlog._deltabothparents
2406
2411
2407 try:
2412 try:
2408 if deltareuse == self.DELTAREUSEALWAYS:
2413 if deltareuse == self.DELTAREUSEALWAYS:
2409 destrevlog._lazydeltabase = True
2414 destrevlog._lazydeltabase = True
2410 destrevlog._lazydelta = True
2415 destrevlog._lazydelta = True
2411 elif deltareuse == self.DELTAREUSESAMEREVS:
2416 elif deltareuse == self.DELTAREUSESAMEREVS:
2412 destrevlog._lazydeltabase = False
2417 destrevlog._lazydeltabase = False
2413 destrevlog._lazydelta = True
2418 destrevlog._lazydelta = True
2414 elif deltareuse == self.DELTAREUSENEVER:
2419 elif deltareuse == self.DELTAREUSENEVER:
2415 destrevlog._lazydeltabase = False
2420 destrevlog._lazydeltabase = False
2416 destrevlog._lazydelta = False
2421 destrevlog._lazydelta = False
2417
2422
2418 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2423 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2419
2424
2420 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2425 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2421 forcedeltabothparents)
2426 forcedeltabothparents)
2422
2427
2423 finally:
2428 finally:
2424 destrevlog._lazydelta = oldlazydelta
2429 destrevlog._lazydelta = oldlazydelta
2425 destrevlog._lazydeltabase = oldlazydeltabase
2430 destrevlog._lazydeltabase = oldlazydeltabase
2426 destrevlog._deltabothparents = oldamd
2431 destrevlog._deltabothparents = oldamd
2427
2432
2428 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2433 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2429 forcedeltabothparents):
2434 forcedeltabothparents):
2430 """perform the core duty of `revlog.clone` after parameter processing"""
2435 """perform the core duty of `revlog.clone` after parameter processing"""
2431 deltacomputer = deltautil.deltacomputer(destrevlog)
2436 deltacomputer = deltautil.deltacomputer(destrevlog)
2432 index = self.index
2437 index = self.index
2433 for rev in self:
2438 for rev in self:
2434 entry = index[rev]
2439 entry = index[rev]
2435
2440
2436 # Some classes override linkrev to take filtered revs into
2441 # Some classes override linkrev to take filtered revs into
2437 # account. Use raw entry from index.
2442 # account. Use raw entry from index.
2438 flags = entry[0] & 0xffff
2443 flags = entry[0] & 0xffff
2439 linkrev = entry[4]
2444 linkrev = entry[4]
2440 p1 = index[entry[5]][7]
2445 p1 = index[entry[5]][7]
2441 p2 = index[entry[6]][7]
2446 p2 = index[entry[6]][7]
2442 node = entry[7]
2447 node = entry[7]
2443
2448
2444 # (Possibly) reuse the delta from the revlog if allowed and
2449 # (Possibly) reuse the delta from the revlog if allowed and
2445 # the revlog chunk is a delta.
2450 # the revlog chunk is a delta.
2446 cachedelta = None
2451 cachedelta = None
2447 rawtext = None
2452 rawtext = None
2448 if deltareuse == self.DELTAREUSEFULLADD:
2453 if deltareuse == self.DELTAREUSEFULLADD:
2449 text = self.revision(rev)
2454 text = self.revision(rev)
2450 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2455 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2451 cachedelta=cachedelta,
2456 cachedelta=cachedelta,
2452 node=node, flags=flags,
2457 node=node, flags=flags,
2453 deltacomputer=deltacomputer)
2458 deltacomputer=deltacomputer)
2454 else:
2459 else:
2455 if destrevlog._lazydelta:
2460 if destrevlog._lazydelta:
2456 dp = self.deltaparent(rev)
2461 dp = self.deltaparent(rev)
2457 if dp != nullrev:
2462 if dp != nullrev:
2458 cachedelta = (dp, bytes(self._chunk(rev)))
2463 cachedelta = (dp, bytes(self._chunk(rev)))
2459
2464
2460 if not cachedelta:
2465 if not cachedelta:
2461 rawtext = self.rawdata(rev)
2466 rawtext = self.rawdata(rev)
2462
2467
2463 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2468 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2464 checkambig=False)
2469 checkambig=False)
2465 dfh = None
2470 dfh = None
2466 if not destrevlog._inline:
2471 if not destrevlog._inline:
2467 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2472 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2468 try:
2473 try:
2469 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2474 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2470 p2, flags, cachedelta, ifh, dfh,
2475 p2, flags, cachedelta, ifh, dfh,
2471 deltacomputer=deltacomputer)
2476 deltacomputer=deltacomputer)
2472 finally:
2477 finally:
2473 if dfh:
2478 if dfh:
2474 dfh.close()
2479 dfh.close()
2475 ifh.close()
2480 ifh.close()
2476
2481
2477 if addrevisioncb:
2482 if addrevisioncb:
2478 addrevisioncb(self, rev, node)
2483 addrevisioncb(self, rev, node)
2479
2484
2480 def censorrevision(self, tr, censornode, tombstone=b''):
2485 def censorrevision(self, tr, censornode, tombstone=b''):
2481 if (self.version & 0xFFFF) == REVLOGV0:
2486 if (self.version & 0xFFFF) == REVLOGV0:
2482 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2487 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2483 self.version)
2488 self.version)
2484
2489
2485 censorrev = self.rev(censornode)
2490 censorrev = self.rev(censornode)
2486 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2491 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2487
2492
2488 if len(tombstone) > self.rawsize(censorrev):
2493 if len(tombstone) > self.rawsize(censorrev):
2489 raise error.Abort(_('censor tombstone must be no longer than '
2494 raise error.Abort(_('censor tombstone must be no longer than '
2490 'censored data'))
2495 'censored data'))
2491
2496
2492 # Rewriting the revlog in place is hard. Our strategy for censoring is
2497 # Rewriting the revlog in place is hard. Our strategy for censoring is
2493 # to create a new revlog, copy all revisions to it, then replace the
2498 # to create a new revlog, copy all revisions to it, then replace the
2494 # revlogs on transaction close.
2499 # revlogs on transaction close.
2495
2500
2496 newindexfile = self.indexfile + b'.tmpcensored'
2501 newindexfile = self.indexfile + b'.tmpcensored'
2497 newdatafile = self.datafile + b'.tmpcensored'
2502 newdatafile = self.datafile + b'.tmpcensored'
2498
2503
2499 # This is a bit dangerous. We could easily have a mismatch of state.
2504 # This is a bit dangerous. We could easily have a mismatch of state.
2500 newrl = revlog(self.opener, newindexfile, newdatafile,
2505 newrl = revlog(self.opener, newindexfile, newdatafile,
2501 censorable=True)
2506 censorable=True)
2502 newrl.version = self.version
2507 newrl.version = self.version
2503 newrl._generaldelta = self._generaldelta
2508 newrl._generaldelta = self._generaldelta
2504 newrl._io = self._io
2509 newrl._io = self._io
2505
2510
2506 for rev in self.revs():
2511 for rev in self.revs():
2507 node = self.node(rev)
2512 node = self.node(rev)
2508 p1, p2 = self.parents(node)
2513 p1, p2 = self.parents(node)
2509
2514
2510 if rev == censorrev:
2515 if rev == censorrev:
2511 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2516 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2512 p1, p2, censornode, REVIDX_ISCENSORED)
2517 p1, p2, censornode, REVIDX_ISCENSORED)
2513
2518
2514 if newrl.deltaparent(rev) != nullrev:
2519 if newrl.deltaparent(rev) != nullrev:
2515 raise error.Abort(_('censored revision stored as delta; '
2520 raise error.Abort(_('censored revision stored as delta; '
2516 'cannot censor'),
2521 'cannot censor'),
2517 hint=_('censoring of revlogs is not '
2522 hint=_('censoring of revlogs is not '
2518 'fully implemented; please report '
2523 'fully implemented; please report '
2519 'this bug'))
2524 'this bug'))
2520 continue
2525 continue
2521
2526
2522 if self.iscensored(rev):
2527 if self.iscensored(rev):
2523 if self.deltaparent(rev) != nullrev:
2528 if self.deltaparent(rev) != nullrev:
2524 raise error.Abort(_('cannot censor due to censored '
2529 raise error.Abort(_('cannot censor due to censored '
2525 'revision having delta stored'))
2530 'revision having delta stored'))
2526 rawtext = self._chunk(rev)
2531 rawtext = self._chunk(rev)
2527 else:
2532 else:
2528 rawtext = self.rawdata(rev)
2533 rawtext = self.rawdata(rev)
2529
2534
2530 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2535 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2531 self.flags(rev))
2536 self.flags(rev))
2532
2537
2533 tr.addbackup(self.indexfile, location='store')
2538 tr.addbackup(self.indexfile, location='store')
2534 if not self._inline:
2539 if not self._inline:
2535 tr.addbackup(self.datafile, location='store')
2540 tr.addbackup(self.datafile, location='store')
2536
2541
2537 self.opener.rename(newrl.indexfile, self.indexfile)
2542 self.opener.rename(newrl.indexfile, self.indexfile)
2538 if not self._inline:
2543 if not self._inline:
2539 self.opener.rename(newrl.datafile, self.datafile)
2544 self.opener.rename(newrl.datafile, self.datafile)
2540
2545
2541 self.clearcaches()
2546 self.clearcaches()
2542 self._loadindex()
2547 self._loadindex()
2543
2548
2544 def verifyintegrity(self, state):
2549 def verifyintegrity(self, state):
2545 """Verifies the integrity of the revlog.
2550 """Verifies the integrity of the revlog.
2546
2551
2547 Yields ``revlogproblem`` instances describing problems that are
2552 Yields ``revlogproblem`` instances describing problems that are
2548 found.
2553 found.
2549 """
2554 """
2550 dd, di = self.checksize()
2555 dd, di = self.checksize()
2551 if dd:
2556 if dd:
2552 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2557 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2553 if di:
2558 if di:
2554 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2559 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2555
2560
2556 version = self.version & 0xFFFF
2561 version = self.version & 0xFFFF
2557
2562
2558 # The verifier tells us what version revlog we should be.
2563 # The verifier tells us what version revlog we should be.
2559 if version != state['expectedversion']:
2564 if version != state['expectedversion']:
2560 yield revlogproblem(
2565 yield revlogproblem(
2561 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2566 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2562 (self.indexfile, version, state['expectedversion']))
2567 (self.indexfile, version, state['expectedversion']))
2563
2568
2564 state['skipread'] = set()
2569 state['skipread'] = set()
2565
2570
2566 for rev in self:
2571 for rev in self:
2567 node = self.node(rev)
2572 node = self.node(rev)
2568
2573
2569 # Verify contents. 4 cases to care about:
2574 # Verify contents. 4 cases to care about:
2570 #
2575 #
2571 # common: the most common case
2576 # common: the most common case
2572 # rename: with a rename
2577 # rename: with a rename
2573 # meta: file content starts with b'\1\n', the metadata
2578 # meta: file content starts with b'\1\n', the metadata
2574 # header defined in filelog.py, but without a rename
2579 # header defined in filelog.py, but without a rename
2575 # ext: content stored externally
2580 # ext: content stored externally
2576 #
2581 #
2577 # More formally, their differences are shown below:
2582 # More formally, their differences are shown below:
2578 #
2583 #
2579 # | common | rename | meta | ext
2584 # | common | rename | meta | ext
2580 # -------------------------------------------------------
2585 # -------------------------------------------------------
2581 # flags() | 0 | 0 | 0 | not 0
2586 # flags() | 0 | 0 | 0 | not 0
2582 # renamed() | False | True | False | ?
2587 # renamed() | False | True | False | ?
2583 # rawtext[0:2]=='\1\n'| False | True | True | ?
2588 # rawtext[0:2]=='\1\n'| False | True | True | ?
2584 #
2589 #
2585 # "rawtext" means the raw text stored in revlog data, which
2590 # "rawtext" means the raw text stored in revlog data, which
2586 # could be retrieved by "rawdata(rev)". "text"
2591 # could be retrieved by "rawdata(rev)". "text"
2587 # mentioned below is "revision(rev)".
2592 # mentioned below is "revision(rev)".
2588 #
2593 #
2589 # There are 3 different lengths stored physically:
2594 # There are 3 different lengths stored physically:
2590 # 1. L1: rawsize, stored in revlog index
2595 # 1. L1: rawsize, stored in revlog index
2591 # 2. L2: len(rawtext), stored in revlog data
2596 # 2. L2: len(rawtext), stored in revlog data
2592 # 3. L3: len(text), stored in revlog data if flags==0, or
2597 # 3. L3: len(text), stored in revlog data if flags==0, or
2593 # possibly somewhere else if flags!=0
2598 # possibly somewhere else if flags!=0
2594 #
2599 #
2595 # L1 should be equal to L2. L3 could be different from them.
2600 # L1 should be equal to L2. L3 could be different from them.
2596 # "text" may or may not affect commit hash depending on flag
2601 # "text" may or may not affect commit hash depending on flag
2597 # processors (see flagutil.addflagprocessor).
2602 # processors (see flagutil.addflagprocessor).
2598 #
2603 #
2599 # | common | rename | meta | ext
2604 # | common | rename | meta | ext
2600 # -------------------------------------------------
2605 # -------------------------------------------------
2601 # rawsize() | L1 | L1 | L1 | L1
2606 # rawsize() | L1 | L1 | L1 | L1
2602 # size() | L1 | L2-LM | L1(*) | L1 (?)
2607 # size() | L1 | L2-LM | L1(*) | L1 (?)
2603 # len(rawtext) | L2 | L2 | L2 | L2
2608 # len(rawtext) | L2 | L2 | L2 | L2
2604 # len(text) | L2 | L2 | L2 | L3
2609 # len(text) | L2 | L2 | L2 | L3
2605 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2610 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2606 #
2611 #
2607 # LM: length of metadata, depending on rawtext
2612 # LM: length of metadata, depending on rawtext
2608 # (*): not ideal, see comment in filelog.size
2613 # (*): not ideal, see comment in filelog.size
2609 # (?): could be "- len(meta)" if the resolved content has
2614 # (?): could be "- len(meta)" if the resolved content has
2610 # rename metadata
2615 # rename metadata
2611 #
2616 #
2612 # Checks needed to be done:
2617 # Checks needed to be done:
2613 # 1. length check: L1 == L2, in all cases.
2618 # 1. length check: L1 == L2, in all cases.
2614 # 2. hash check: depending on flag processor, we may need to
2619 # 2. hash check: depending on flag processor, we may need to
2615 # use either "text" (external), or "rawtext" (in revlog).
2620 # use either "text" (external), or "rawtext" (in revlog).
2616
2621
2617 try:
2622 try:
2618 skipflags = state.get('skipflags', 0)
2623 skipflags = state.get('skipflags', 0)
2619 if skipflags:
2624 if skipflags:
2620 skipflags &= self.flags(rev)
2625 skipflags &= self.flags(rev)
2621
2626
2622 if skipflags:
2627 if skipflags:
2623 state['skipread'].add(node)
2628 state['skipread'].add(node)
2624 else:
2629 else:
2625 # Side-effect: read content and verify hash.
2630 # Side-effect: read content and verify hash.
2626 self.revision(node)
2631 self.revision(node)
2627
2632
2628 l1 = self.rawsize(rev)
2633 l1 = self.rawsize(rev)
2629 l2 = len(self.rawdata(node))
2634 l2 = len(self.rawdata(node))
2630
2635
2631 if l1 != l2:
2636 if l1 != l2:
2632 yield revlogproblem(
2637 yield revlogproblem(
2633 error=_('unpacked size is %d, %d expected') % (l2, l1),
2638 error=_('unpacked size is %d, %d expected') % (l2, l1),
2634 node=node)
2639 node=node)
2635
2640
2636 except error.CensoredNodeError:
2641 except error.CensoredNodeError:
2637 if state['erroroncensored']:
2642 if state['erroroncensored']:
2638 yield revlogproblem(error=_('censored file data'),
2643 yield revlogproblem(error=_('censored file data'),
2639 node=node)
2644 node=node)
2640 state['skipread'].add(node)
2645 state['skipread'].add(node)
2641 except Exception as e:
2646 except Exception as e:
2642 yield revlogproblem(
2647 yield revlogproblem(
2643 error=_('unpacking %s: %s') % (short(node),
2648 error=_('unpacking %s: %s') % (short(node),
2644 stringutil.forcebytestr(e)),
2649 stringutil.forcebytestr(e)),
2645 node=node)
2650 node=node)
2646 state['skipread'].add(node)
2651 state['skipread'].add(node)
2647
2652
2648 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2653 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2649 revisionscount=False, trackedsize=False,
2654 revisionscount=False, trackedsize=False,
2650 storedsize=False):
2655 storedsize=False):
2651 d = {}
2656 d = {}
2652
2657
2653 if exclusivefiles:
2658 if exclusivefiles:
2654 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2659 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2655 if not self._inline:
2660 if not self._inline:
2656 d['exclusivefiles'].append((self.opener, self.datafile))
2661 d['exclusivefiles'].append((self.opener, self.datafile))
2657
2662
2658 if sharedfiles:
2663 if sharedfiles:
2659 d['sharedfiles'] = []
2664 d['sharedfiles'] = []
2660
2665
2661 if revisionscount:
2666 if revisionscount:
2662 d['revisionscount'] = len(self)
2667 d['revisionscount'] = len(self)
2663
2668
2664 if trackedsize:
2669 if trackedsize:
2665 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2670 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2666
2671
2667 if storedsize:
2672 if storedsize:
2668 d['storedsize'] = sum(self.opener.stat(path).st_size
2673 d['storedsize'] = sum(self.opener.stat(path).st_size
2669 for path in self.files())
2674 for path in self.files())
2670
2675
2671 return d
2676 return d
@@ -1,185 +1,187 b''
1 # flagutils.py - code to deal with revlog flags and their processors
1 # flagutils.py - code to deal with revlog flags and their processors
2 #
2 #
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 from ..i18n import _
11 from ..i18n import _
12
12
13 from .constants import (
13 from .constants import (
14 REVIDX_DEFAULT_FLAGS,
14 REVIDX_DEFAULT_FLAGS,
15 REVIDX_ELLIPSIS,
15 REVIDX_ELLIPSIS,
16 REVIDX_EXTSTORED,
16 REVIDX_EXTSTORED,
17 REVIDX_FLAGS_ORDER,
17 REVIDX_FLAGS_ORDER,
18 REVIDX_ISCENSORED,
18 REVIDX_ISCENSORED,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 REVIDX_SIDEDATA,
20 )
21 )
21
22
22 from .. import (
23 from .. import (
23 error,
24 error,
24 util
25 util
25 )
26 )
26
27
27 # blanked usage of all the name to prevent pyflakes constraints
28 # blanked usage of all the name to prevent pyflakes constraints
28 # We need these name available in the module for extensions.
29 # We need these name available in the module for extensions.
29 REVIDX_ISCENSORED
30 REVIDX_ISCENSORED
30 REVIDX_ELLIPSIS
31 REVIDX_ELLIPSIS
31 REVIDX_EXTSTORED
32 REVIDX_EXTSTORED
33 REVIDX_SIDEDATA
32 REVIDX_DEFAULT_FLAGS
34 REVIDX_DEFAULT_FLAGS
33 REVIDX_FLAGS_ORDER
35 REVIDX_FLAGS_ORDER
34 REVIDX_RAWTEXT_CHANGING_FLAGS
36 REVIDX_RAWTEXT_CHANGING_FLAGS
35
37
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
38 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37
39
38 # Store flag processors (cf. 'addflagprocessor()' to register)
40 # Store flag processors (cf. 'addflagprocessor()' to register)
39 flagprocessors = {
41 flagprocessors = {
40 REVIDX_ISCENSORED: None,
42 REVIDX_ISCENSORED: None,
41 }
43 }
42
44
43 def addflagprocessor(flag, processor):
45 def addflagprocessor(flag, processor):
44 """Register a flag processor on a revision data flag.
46 """Register a flag processor on a revision data flag.
45
47
46 Invariant:
48 Invariant:
47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
49 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
50 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 - Only one flag processor can be registered on a specific flag.
51 - Only one flag processor can be registered on a specific flag.
50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
52 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 following signatures:
53 following signatures:
52 - (read) f(self, rawtext) -> text, bool
54 - (read) f(self, rawtext) -> text, bool
53 - (write) f(self, text) -> rawtext, bool
55 - (write) f(self, text) -> rawtext, bool
54 - (raw) f(self, rawtext) -> bool
56 - (raw) f(self, rawtext) -> bool
55 "text" is presented to the user. "rawtext" is stored in revlog data, not
57 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 directly visible to the user.
58 directly visible to the user.
57 The boolean returned by these transforms is used to determine whether
59 The boolean returned by these transforms is used to determine whether
58 the returned text can be used for hash integrity checking. For example,
60 the returned text can be used for hash integrity checking. For example,
59 if "write" returns False, then "text" is used to generate hash. If
61 if "write" returns False, then "text" is used to generate hash. If
60 "write" returns True, that basically means "rawtext" returned by "write"
62 "write" returns True, that basically means "rawtext" returned by "write"
61 should be used to generate hash. Usually, "write" and "read" return
63 should be used to generate hash. Usually, "write" and "read" return
62 different booleans. And "raw" returns a same boolean as "write".
64 different booleans. And "raw" returns a same boolean as "write".
63
65
64 Note: The 'raw' transform is used for changegroup generation and in some
66 Note: The 'raw' transform is used for changegroup generation and in some
65 debug commands. In this case the transform only indicates whether the
67 debug commands. In this case the transform only indicates whether the
66 contents can be used for hash integrity checks.
68 contents can be used for hash integrity checks.
67 """
69 """
68 insertflagprocessor(flag, processor, flagprocessors)
70 insertflagprocessor(flag, processor, flagprocessors)
69
71
70 def insertflagprocessor(flag, processor, flagprocessors):
72 def insertflagprocessor(flag, processor, flagprocessors):
71 if not flag & REVIDX_KNOWN_FLAGS:
73 if not flag & REVIDX_KNOWN_FLAGS:
72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
74 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 raise error.ProgrammingError(msg)
75 raise error.ProgrammingError(msg)
74 if flag not in REVIDX_FLAGS_ORDER:
76 if flag not in REVIDX_FLAGS_ORDER:
75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
77 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 raise error.ProgrammingError(msg)
78 raise error.ProgrammingError(msg)
77 if flag in flagprocessors:
79 if flag in flagprocessors:
78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
80 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 raise error.Abort(msg)
81 raise error.Abort(msg)
80 flagprocessors[flag] = processor
82 flagprocessors[flag] = processor
81
83
82 def processflagswrite(revlog, text, flags, sidedata):
84 def processflagswrite(revlog, text, flags, sidedata):
83 """Inspect revision data flags and applies write transformations defined
85 """Inspect revision data flags and applies write transformations defined
84 by registered flag processors.
86 by registered flag processors.
85
87
86 ``text`` - the revision data to process
88 ``text`` - the revision data to process
87 ``flags`` - the revision flags
89 ``flags`` - the revision flags
88
90
89 This method processes the flags in the order (or reverse order if
91 This method processes the flags in the order (or reverse order if
90 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
92 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
91 flag processors registered for present flags. The order of flags defined
93 flag processors registered for present flags. The order of flags defined
92 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
94 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
93
95
94 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
96 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
95 processed text and ``validatehash`` is a bool indicating whether the
97 processed text and ``validatehash`` is a bool indicating whether the
96 returned text should be checked for hash integrity.
98 returned text should be checked for hash integrity.
97 """
99 """
98 return _processflagsfunc(revlog, text, flags, 'write',
100 return _processflagsfunc(revlog, text, flags, 'write',
99 sidedata=sidedata)[:2]
101 sidedata=sidedata)[:2]
100
102
101 def processflagsread(revlog, text, flags):
103 def processflagsread(revlog, text, flags):
102 """Inspect revision data flags and applies read transformations defined
104 """Inspect revision data flags and applies read transformations defined
103 by registered flag processors.
105 by registered flag processors.
104
106
105 ``text`` - the revision data to process
107 ``text`` - the revision data to process
106 ``flags`` - the revision flags
108 ``flags`` - the revision flags
107 ``raw`` - an optional argument describing if the raw transform should be
109 ``raw`` - an optional argument describing if the raw transform should be
108 applied.
110 applied.
109
111
110 This method processes the flags in the order (or reverse order if
112 This method processes the flags in the order (or reverse order if
111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
113 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
112 flag processors registered for present flags. The order of flags defined
114 flag processors registered for present flags. The order of flags defined
113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
115 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
114
116
115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
117 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
116 processed text and ``validatehash`` is a bool indicating whether the
118 processed text and ``validatehash`` is a bool indicating whether the
117 returned text should be checked for hash integrity.
119 returned text should be checked for hash integrity.
118 """
120 """
119 return _processflagsfunc(revlog, text, flags, 'read')
121 return _processflagsfunc(revlog, text, flags, 'read')
120
122
121 def processflagsraw(revlog, text, flags):
123 def processflagsraw(revlog, text, flags):
122 """Inspect revision data flags to check is the content hash should be
124 """Inspect revision data flags to check is the content hash should be
123 validated.
125 validated.
124
126
125 ``text`` - the revision data to process
127 ``text`` - the revision data to process
126 ``flags`` - the revision flags
128 ``flags`` - the revision flags
127
129
128 This method processes the flags in the order (or reverse order if
130 This method processes the flags in the order (or reverse order if
129 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
131 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
130 flag processors registered for present flags. The order of flags defined
132 flag processors registered for present flags. The order of flags defined
131 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
133 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
132
134
133 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
135 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
134 processed text and ``validatehash`` is a bool indicating whether the
136 processed text and ``validatehash`` is a bool indicating whether the
135 returned text should be checked for hash integrity.
137 returned text should be checked for hash integrity.
136 """
138 """
137 return _processflagsfunc(revlog, text, flags, 'raw')[1]
139 return _processflagsfunc(revlog, text, flags, 'raw')[1]
138
140
139 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
141 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
140 """internal function to process flag on a revlog
142 """internal function to process flag on a revlog
141
143
142 This function is private to this module, code should never needs to call it
144 This function is private to this module, code should never needs to call it
143 directly."""
145 directly."""
144 # fast path: no flag processors will run
146 # fast path: no flag processors will run
145 if flags == 0:
147 if flags == 0:
146 return text, True, {}
148 return text, True, {}
147 if operation not in ('read', 'write', 'raw'):
149 if operation not in ('read', 'write', 'raw'):
148 raise error.ProgrammingError(_("invalid '%s' operation") %
150 raise error.ProgrammingError(_("invalid '%s' operation") %
149 operation)
151 operation)
150 # Check all flags are known.
152 # Check all flags are known.
151 if flags & ~REVIDX_KNOWN_FLAGS:
153 if flags & ~REVIDX_KNOWN_FLAGS:
152 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
154 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
153 (flags & ~REVIDX_KNOWN_FLAGS))
155 (flags & ~REVIDX_KNOWN_FLAGS))
154 validatehash = True
156 validatehash = True
155 # Depending on the operation (read or write), the order might be
157 # Depending on the operation (read or write), the order might be
156 # reversed due to non-commutative transforms.
158 # reversed due to non-commutative transforms.
157 orderedflags = REVIDX_FLAGS_ORDER
159 orderedflags = REVIDX_FLAGS_ORDER
158 if operation == 'write':
160 if operation == 'write':
159 orderedflags = reversed(orderedflags)
161 orderedflags = reversed(orderedflags)
160
162
161 outsidedata = {}
163 outsidedata = {}
162 for flag in orderedflags:
164 for flag in orderedflags:
163 # If a flagprocessor has been registered for a known flag, apply the
165 # If a flagprocessor has been registered for a known flag, apply the
164 # related operation transform and update result tuple.
166 # related operation transform and update result tuple.
165 if flag & flags:
167 if flag & flags:
166 vhash = True
168 vhash = True
167
169
168 if flag not in revlog._flagprocessors:
170 if flag not in revlog._flagprocessors:
169 message = _("missing processor for flag '%#x'") % (flag)
171 message = _("missing processor for flag '%#x'") % (flag)
170 raise revlog._flagserrorclass(message)
172 raise revlog._flagserrorclass(message)
171
173
172 processor = revlog._flagprocessors[flag]
174 processor = revlog._flagprocessors[flag]
173 if processor is not None:
175 if processor is not None:
174 readtransform, writetransform, rawtransform = processor
176 readtransform, writetransform, rawtransform = processor
175
177
176 if operation == 'raw':
178 if operation == 'raw':
177 vhash = rawtransform(revlog, text)
179 vhash = rawtransform(revlog, text)
178 elif operation == 'read':
180 elif operation == 'read':
179 text, vhash, s = readtransform(revlog, text)
181 text, vhash, s = readtransform(revlog, text)
180 outsidedata.update(s)
182 outsidedata.update(s)
181 else: # write operation
183 else: # write operation
182 text, vhash = writetransform(revlog, text, sidedata)
184 text, vhash = writetransform(revlog, text, sidedata)
183 validatehash = validatehash and vhash
185 validatehash = validatehash and vhash
184
186
185 return text, validatehash, outsidedata
187 return text, validatehash, outsidedata
@@ -1,79 +1,85 b''
1 # sidedata.py - Logic around store extra data alongside revlog revisions
1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 """core code for "sidedata" support
7 """core code for "sidedata" support
8
8
9 The "sidedata" are stored alongside the revision without actually being part of
9 The "sidedata" are stored alongside the revision without actually being part of
10 its content and not affecting its hash. It's main use cases is to cache
10 its content and not affecting its hash. It's main use cases is to cache
11 important information related to a changesets.
11 important information related to a changesets.
12
12
13 The current implementation is experimental and subject to changes. Do not rely
13 The current implementation is experimental and subject to changes. Do not rely
14 on it in production.
14 on it in production.
15
15
16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
17 are inserted, removed from it using the flagprocessors mechanism. The following
17 are inserted, removed from it using the flagprocessors mechanism. The following
18 format is currently used::
18 format is currently used::
19
19
20 initial header:
20 initial header:
21 <number of sidedata; 2 bytes>
21 <number of sidedata; 2 bytes>
22 sidedata (repeated N times):
22 sidedata (repeated N times):
23 <sidedata-key; 2 bytes>
23 <sidedata-key; 2 bytes>
24 <sidedata-entry-length: 4 bytes>
24 <sidedata-entry-length: 4 bytes>
25 <sidedata-content-sha1-digest: 20 bytes>
25 <sidedata-content-sha1-digest: 20 bytes>
26 <sidedata-content; X bytes>
26 <sidedata-content; X bytes>
27 normal raw text:
27 normal raw text:
28 <all bytes remaining in the rawtext>
28 <all bytes remaining in the rawtext>
29
29
30 This is a simple and effective format. It should be enought to experiment with
30 This is a simple and effective format. It should be enought to experiment with
31 the concept.
31 the concept.
32 """
32 """
33
33
34 from __future__ import absolute_import
34 from __future__ import absolute_import
35
35
36 import hashlib
36 import hashlib
37 import struct
37 import struct
38
38
39 from .. import error
39 from .. import error
40
40
41 SIDEDATA_HEADER = struct.Struct('>H')
41 SIDEDATA_HEADER = struct.Struct('>H')
42 SIDEDATA_ENTRY = struct.Struct('>HL20s')
42 SIDEDATA_ENTRY = struct.Struct('>HL20s')
43
43
44 def sidedatawriteprocessor(rl, text, sidedata):
44 def sidedatawriteprocessor(rl, text, sidedata):
45 sidedata = list(sidedata.items())
45 sidedata = list(sidedata.items())
46 sidedata.sort()
46 sidedata.sort()
47 rawtext = [SIDEDATA_HEADER.pack(len(sidedata))]
47 rawtext = [SIDEDATA_HEADER.pack(len(sidedata))]
48 for key, value in sidedata:
48 for key, value in sidedata:
49 digest = hashlib.sha1(value).digest()
49 digest = hashlib.sha1(value).digest()
50 rawtext.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
50 rawtext.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
51 for key, value in sidedata:
51 for key, value in sidedata:
52 rawtext.append(value)
52 rawtext.append(value)
53 rawtext.append(bytes(text))
53 rawtext.append(bytes(text))
54 return ''.join(rawtext), False
54 return ''.join(rawtext), False
55
55
56 def sidedatareadprocessor(rl, text):
56 def sidedatareadprocessor(rl, text):
57 sidedata = {}
57 sidedata = {}
58 offset = 0
58 offset = 0
59 nbentry, = SIDEDATA_HEADER.unpack(text[:SIDEDATA_HEADER.size])
59 nbentry, = SIDEDATA_HEADER.unpack(text[:SIDEDATA_HEADER.size])
60 offset += SIDEDATA_HEADER.size
60 offset += SIDEDATA_HEADER.size
61 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
61 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
62 for i in range(nbentry):
62 for i in range(nbentry):
63 nextoffset = offset + SIDEDATA_ENTRY.size
63 nextoffset = offset + SIDEDATA_ENTRY.size
64 key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
64 key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
65 offset = nextoffset
65 offset = nextoffset
66 # read the data associated with that entry
66 # read the data associated with that entry
67 nextdataoffset = dataoffset + size
67 nextdataoffset = dataoffset + size
68 entrytext = text[dataoffset:nextdataoffset]
68 entrytext = text[dataoffset:nextdataoffset]
69 readdigest = hashlib.sha1(entrytext).digest()
69 readdigest = hashlib.sha1(entrytext).digest()
70 if storeddigest != readdigest:
70 if storeddigest != readdigest:
71 raise error.SidedataHashError(key, storeddigest, readdigest)
71 raise error.SidedataHashError(key, storeddigest, readdigest)
72 sidedata[key] = entrytext
72 sidedata[key] = entrytext
73 dataoffset = nextdataoffset
73 dataoffset = nextdataoffset
74 text = text[dataoffset:]
74 text = text[dataoffset:]
75 return text, True, sidedata
75 return text, True, sidedata
76
76
77 def sidedatarawprocessor(rl, text):
77 def sidedatarawprocessor(rl, text):
78 # side data modifies rawtext and prevent rawtext hash validation
78 # side data modifies rawtext and prevent rawtext hash validation
79 return False
79 return False
80
81 processors = (
82 sidedatareadprocessor,
83 sidedatawriteprocessor,
84 sidedatarawprocessor,
85 )
General Comments 0
You need to be logged in to leave comments. Login now