##// END OF EJS Templates
flagprocessors: make `processflagsraw` a module level function...
marmoute -
r43262:dff95420 default
parent child Browse files
Show More
@@ -1,2639 +1,2639 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
116 return text, False, {}
117
117
118 def ellipsiswriteprocessor(rl, text, sidedata):
118 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(flagutil.flagprocessorsmixin):
264 class revlog(flagutil.flagprocessorsmixin):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
302 mmaplargeindex=False, censorable=False,
302 mmaplargeindex=False, censorable=False,
303 upperboundcomp=None):
303 upperboundcomp=None):
304 """
304 """
305 create a revlog object
305 create a revlog object
306
306
307 opener is a function that abstracts the file opening operation
307 opener is a function that abstracts the file opening operation
308 and can be used to implement COW semantics or the like.
308 and can be used to implement COW semantics or the like.
309
309
310 """
310 """
311 self.upperboundcomp = upperboundcomp
311 self.upperboundcomp = upperboundcomp
312 self.indexfile = indexfile
312 self.indexfile = indexfile
313 self.datafile = datafile or (indexfile[:-2] + ".d")
313 self.datafile = datafile or (indexfile[:-2] + ".d")
314 self.opener = opener
314 self.opener = opener
315 # When True, indexfile is opened with checkambig=True at writing, to
315 # When True, indexfile is opened with checkambig=True at writing, to
316 # avoid file stat ambiguity.
316 # avoid file stat ambiguity.
317 self._checkambig = checkambig
317 self._checkambig = checkambig
318 self._mmaplargeindex = mmaplargeindex
318 self._mmaplargeindex = mmaplargeindex
319 self._censorable = censorable
319 self._censorable = censorable
320 # 3-tuple of (node, rev, text) for a raw revision.
320 # 3-tuple of (node, rev, text) for a raw revision.
321 self._revisioncache = None
321 self._revisioncache = None
322 # Maps rev to chain base rev.
322 # Maps rev to chain base rev.
323 self._chainbasecache = util.lrucachedict(100)
323 self._chainbasecache = util.lrucachedict(100)
324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
325 self._chunkcache = (0, '')
325 self._chunkcache = (0, '')
326 # How much data to read and cache into the raw revlog data cache.
326 # How much data to read and cache into the raw revlog data cache.
327 self._chunkcachesize = 65536
327 self._chunkcachesize = 65536
328 self._maxchainlen = None
328 self._maxchainlen = None
329 self._deltabothparents = True
329 self._deltabothparents = True
330 self.index = []
330 self.index = []
331 # Mapping of partial identifiers to full nodes.
331 # Mapping of partial identifiers to full nodes.
332 self._pcache = {}
332 self._pcache = {}
333 # Mapping of revision integer to full node.
333 # Mapping of revision integer to full node.
334 self._nodecache = {nullid: nullrev}
334 self._nodecache = {nullid: nullrev}
335 self._nodepos = None
335 self._nodepos = None
336 self._compengine = 'zlib'
336 self._compengine = 'zlib'
337 self._compengineopts = {}
337 self._compengineopts = {}
338 self._maxdeltachainspan = -1
338 self._maxdeltachainspan = -1
339 self._withsparseread = False
339 self._withsparseread = False
340 self._sparserevlog = False
340 self._sparserevlog = False
341 self._srdensitythreshold = 0.50
341 self._srdensitythreshold = 0.50
342 self._srmingapsize = 262144
342 self._srmingapsize = 262144
343
343
344 # Make copy of flag processors so each revlog instance can support
344 # Make copy of flag processors so each revlog instance can support
345 # custom flags.
345 # custom flags.
346 self._flagprocessors = dict(flagutil.flagprocessors)
346 self._flagprocessors = dict(flagutil.flagprocessors)
347
347
348 # 2-tuple of file handles being used for active writing.
348 # 2-tuple of file handles being used for active writing.
349 self._writinghandles = None
349 self._writinghandles = None
350
350
351 self._loadindex()
351 self._loadindex()
352
352
353 def _loadindex(self):
353 def _loadindex(self):
354 mmapindexthreshold = None
354 mmapindexthreshold = None
355 opts = getattr(self.opener, 'options', {}) or {}
355 opts = getattr(self.opener, 'options', {}) or {}
356
356
357 if 'revlogv2' in opts:
357 if 'revlogv2' in opts:
358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
359 elif 'revlogv1' in opts:
359 elif 'revlogv1' in opts:
360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
361 if 'generaldelta' in opts:
361 if 'generaldelta' in opts:
362 newversionflags |= FLAG_GENERALDELTA
362 newversionflags |= FLAG_GENERALDELTA
363 elif getattr(self.opener, 'options', None) is not None:
363 elif getattr(self.opener, 'options', None) is not None:
364 # If options provided but no 'revlog*' found, the repository
364 # If options provided but no 'revlog*' found, the repository
365 # would have no 'requires' file in it, which means we have to
365 # would have no 'requires' file in it, which means we have to
366 # stick to the old format.
366 # stick to the old format.
367 newversionflags = REVLOGV0
367 newversionflags = REVLOGV0
368 else:
368 else:
369 newversionflags = REVLOG_DEFAULT_VERSION
369 newversionflags = REVLOG_DEFAULT_VERSION
370
370
371 if 'chunkcachesize' in opts:
371 if 'chunkcachesize' in opts:
372 self._chunkcachesize = opts['chunkcachesize']
372 self._chunkcachesize = opts['chunkcachesize']
373 if 'maxchainlen' in opts:
373 if 'maxchainlen' in opts:
374 self._maxchainlen = opts['maxchainlen']
374 self._maxchainlen = opts['maxchainlen']
375 if 'deltabothparents' in opts:
375 if 'deltabothparents' in opts:
376 self._deltabothparents = opts['deltabothparents']
376 self._deltabothparents = opts['deltabothparents']
377 self._lazydelta = bool(opts.get('lazydelta', True))
377 self._lazydelta = bool(opts.get('lazydelta', True))
378 self._lazydeltabase = False
378 self._lazydeltabase = False
379 if self._lazydelta:
379 if self._lazydelta:
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 if 'compengine' in opts:
381 if 'compengine' in opts:
382 self._compengine = opts['compengine']
382 self._compengine = opts['compengine']
383 if 'zlib.level' in opts:
383 if 'zlib.level' in opts:
384 self._compengineopts['zlib.level'] = opts['zlib.level']
384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 if 'zstd.level' in opts:
385 if 'zstd.level' in opts:
386 self._compengineopts['zstd.level'] = opts['zstd.level']
386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 if 'maxdeltachainspan' in opts:
387 if 'maxdeltachainspan' in opts:
388 self._maxdeltachainspan = opts['maxdeltachainspan']
388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 mmapindexthreshold = opts['mmapindexthreshold']
390 mmapindexthreshold = opts['mmapindexthreshold']
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
393 # sparse-revlog forces sparse-read
393 # sparse-revlog forces sparse-read
394 self._withsparseread = self._sparserevlog or withsparseread
394 self._withsparseread = self._sparserevlog or withsparseread
395 if 'sparse-read-density-threshold' in opts:
395 if 'sparse-read-density-threshold' in opts:
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 if 'sparse-read-min-gap-size' in opts:
397 if 'sparse-read-min-gap-size' in opts:
398 self._srmingapsize = opts['sparse-read-min-gap-size']
398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 if opts.get('enableellipsis'):
399 if opts.get('enableellipsis'):
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401
401
402 # revlog v0 doesn't have flag processors
402 # revlog v0 doesn't have flag processors
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405
405
406 if self._chunkcachesize <= 0:
406 if self._chunkcachesize <= 0:
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 'greater than 0') % self._chunkcachesize)
408 'greater than 0') % self._chunkcachesize)
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 'power of 2') % self._chunkcachesize)
411 'power of 2') % self._chunkcachesize)
412
412
413 indexdata = ''
413 indexdata = ''
414 self._initempty = True
414 self._initempty = True
415 try:
415 try:
416 with self._indexfp() as f:
416 with self._indexfp() as f:
417 if (mmapindexthreshold is not None and
417 if (mmapindexthreshold is not None and
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 # TODO: should .close() to release resources without
419 # TODO: should .close() to release resources without
420 # relying on Python GC
420 # relying on Python GC
421 indexdata = util.buffer(util.mmapread(f))
421 indexdata = util.buffer(util.mmapread(f))
422 else:
422 else:
423 indexdata = f.read()
423 indexdata = f.read()
424 if len(indexdata) > 0:
424 if len(indexdata) > 0:
425 versionflags = versionformat_unpack(indexdata[:4])[0]
425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 self._initempty = False
426 self._initempty = False
427 else:
427 else:
428 versionflags = newversionflags
428 versionflags = newversionflags
429 except IOError as inst:
429 except IOError as inst:
430 if inst.errno != errno.ENOENT:
430 if inst.errno != errno.ENOENT:
431 raise
431 raise
432
432
433 versionflags = newversionflags
433 versionflags = newversionflags
434
434
435 self.version = versionflags
435 self.version = versionflags
436
436
437 flags = versionflags & ~0xFFFF
437 flags = versionflags & ~0xFFFF
438 fmt = versionflags & 0xFFFF
438 fmt = versionflags & 0xFFFF
439
439
440 if fmt == REVLOGV0:
440 if fmt == REVLOGV0:
441 if flags:
441 if flags:
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 'revlog %s') %
443 'revlog %s') %
444 (flags >> 16, fmt, self.indexfile))
444 (flags >> 16, fmt, self.indexfile))
445
445
446 self._inline = False
446 self._inline = False
447 self._generaldelta = False
447 self._generaldelta = False
448
448
449 elif fmt == REVLOGV1:
449 elif fmt == REVLOGV1:
450 if flags & ~REVLOGV1_FLAGS:
450 if flags & ~REVLOGV1_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454
454
455 self._inline = versionflags & FLAG_INLINE_DATA
455 self._inline = versionflags & FLAG_INLINE_DATA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457
457
458 elif fmt == REVLOGV2:
458 elif fmt == REVLOGV2:
459 if flags & ~REVLOGV2_FLAGS:
459 if flags & ~REVLOGV2_FLAGS:
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 'revlog %s') %
461 'revlog %s') %
462 (flags >> 16, fmt, self.indexfile))
462 (flags >> 16, fmt, self.indexfile))
463
463
464 self._inline = versionflags & FLAG_INLINE_DATA
464 self._inline = versionflags & FLAG_INLINE_DATA
465 # generaldelta implied by version 2 revlogs.
465 # generaldelta implied by version 2 revlogs.
466 self._generaldelta = True
466 self._generaldelta = True
467
467
468 else:
468 else:
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 (fmt, self.indexfile))
470 (fmt, self.indexfile))
471 # sparse-revlog can't be on without general-delta (issue6056)
471 # sparse-revlog can't be on without general-delta (issue6056)
472 if not self._generaldelta:
472 if not self._generaldelta:
473 self._sparserevlog = False
473 self._sparserevlog = False
474
474
475 self._storedeltachains = True
475 self._storedeltachains = True
476
476
477 self._io = revlogio()
477 self._io = revlogio()
478 if self.version == REVLOGV0:
478 if self.version == REVLOGV0:
479 self._io = revlogoldio()
479 self._io = revlogoldio()
480 try:
480 try:
481 d = self._io.parseindex(indexdata, self._inline)
481 d = self._io.parseindex(indexdata, self._inline)
482 except (ValueError, IndexError):
482 except (ValueError, IndexError):
483 raise error.RevlogError(_("index %s is corrupted") %
483 raise error.RevlogError(_("index %s is corrupted") %
484 self.indexfile)
484 self.indexfile)
485 self.index, nodemap, self._chunkcache = d
485 self.index, nodemap, self._chunkcache = d
486 if nodemap is not None:
486 if nodemap is not None:
487 self.nodemap = self._nodecache = nodemap
487 self.nodemap = self._nodecache = nodemap
488 if not self._chunkcache:
488 if not self._chunkcache:
489 self._chunkclear()
489 self._chunkclear()
490 # revnum -> (chain-length, sum-delta-length)
490 # revnum -> (chain-length, sum-delta-length)
491 self._chaininfocache = {}
491 self._chaininfocache = {}
492 # revlog header -> revlog compressor
492 # revlog header -> revlog compressor
493 self._decompressors = {}
493 self._decompressors = {}
494
494
495 @util.propertycache
495 @util.propertycache
496 def _compressor(self):
496 def _compressor(self):
497 engine = util.compengines[self._compengine]
497 engine = util.compengines[self._compengine]
498 return engine.revlogcompressor(self._compengineopts)
498 return engine.revlogcompressor(self._compengineopts)
499
499
500 def _indexfp(self, mode='r'):
500 def _indexfp(self, mode='r'):
501 """file object for the revlog's index file"""
501 """file object for the revlog's index file"""
502 args = {r'mode': mode}
502 args = {r'mode': mode}
503 if mode != 'r':
503 if mode != 'r':
504 args[r'checkambig'] = self._checkambig
504 args[r'checkambig'] = self._checkambig
505 if mode == 'w':
505 if mode == 'w':
506 args[r'atomictemp'] = True
506 args[r'atomictemp'] = True
507 return self.opener(self.indexfile, **args)
507 return self.opener(self.indexfile, **args)
508
508
509 def _datafp(self, mode='r'):
509 def _datafp(self, mode='r'):
510 """file object for the revlog's data file"""
510 """file object for the revlog's data file"""
511 return self.opener(self.datafile, mode=mode)
511 return self.opener(self.datafile, mode=mode)
512
512
513 @contextlib.contextmanager
513 @contextlib.contextmanager
514 def _datareadfp(self, existingfp=None):
514 def _datareadfp(self, existingfp=None):
515 """file object suitable to read data"""
515 """file object suitable to read data"""
516 # Use explicit file handle, if given.
516 # Use explicit file handle, if given.
517 if existingfp is not None:
517 if existingfp is not None:
518 yield existingfp
518 yield existingfp
519
519
520 # Use a file handle being actively used for writes, if available.
520 # Use a file handle being actively used for writes, if available.
521 # There is some danger to doing this because reads will seek the
521 # There is some danger to doing this because reads will seek the
522 # file. However, _writeentry() performs a SEEK_END before all writes,
522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 # so we should be safe.
523 # so we should be safe.
524 elif self._writinghandles:
524 elif self._writinghandles:
525 if self._inline:
525 if self._inline:
526 yield self._writinghandles[0]
526 yield self._writinghandles[0]
527 else:
527 else:
528 yield self._writinghandles[1]
528 yield self._writinghandles[1]
529
529
530 # Otherwise open a new file handle.
530 # Otherwise open a new file handle.
531 else:
531 else:
532 if self._inline:
532 if self._inline:
533 func = self._indexfp
533 func = self._indexfp
534 else:
534 else:
535 func = self._datafp
535 func = self._datafp
536 with func() as fp:
536 with func() as fp:
537 yield fp
537 yield fp
538
538
539 def tip(self):
539 def tip(self):
540 return self.node(len(self.index) - 1)
540 return self.node(len(self.index) - 1)
541 def __contains__(self, rev):
541 def __contains__(self, rev):
542 return 0 <= rev < len(self)
542 return 0 <= rev < len(self)
543 def __len__(self):
543 def __len__(self):
544 return len(self.index)
544 return len(self.index)
545 def __iter__(self):
545 def __iter__(self):
546 return iter(pycompat.xrange(len(self)))
546 return iter(pycompat.xrange(len(self)))
547 def revs(self, start=0, stop=None):
547 def revs(self, start=0, stop=None):
548 """iterate over all rev in this revlog (from start to stop)"""
548 """iterate over all rev in this revlog (from start to stop)"""
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550
550
551 @util.propertycache
551 @util.propertycache
552 def nodemap(self):
552 def nodemap(self):
553 if self.index:
553 if self.index:
554 # populate mapping down to the initial node
554 # populate mapping down to the initial node
555 node0 = self.index[0][7] # get around changelog filtering
555 node0 = self.index[0][7] # get around changelog filtering
556 self.rev(node0)
556 self.rev(node0)
557 return self._nodecache
557 return self._nodecache
558
558
559 def hasnode(self, node):
559 def hasnode(self, node):
560 try:
560 try:
561 self.rev(node)
561 self.rev(node)
562 return True
562 return True
563 except KeyError:
563 except KeyError:
564 return False
564 return False
565
565
566 def candelta(self, baserev, rev):
566 def candelta(self, baserev, rev):
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 # Disable delta if either rev requires a content-changing flag
568 # Disable delta if either rev requires a content-changing flag
569 # processor (ex. LFS). This is because such flag processor can alter
569 # processor (ex. LFS). This is because such flag processor can alter
570 # the rawtext content that the delta will be based on, and two clients
570 # the rawtext content that the delta will be based on, and two clients
571 # could have a same revlog node with different flags (i.e. different
571 # could have a same revlog node with different flags (i.e. different
572 # rawtext contents) and the delta could be incompatible.
572 # rawtext contents) and the delta could be incompatible.
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 return False
575 return False
576 return True
576 return True
577
577
578 def clearcaches(self):
578 def clearcaches(self):
579 self._revisioncache = None
579 self._revisioncache = None
580 self._chainbasecache.clear()
580 self._chainbasecache.clear()
581 self._chunkcache = (0, '')
581 self._chunkcache = (0, '')
582 self._pcache = {}
582 self._pcache = {}
583
583
584 try:
584 try:
585 # If we are using the native C version, you are in a fun case
585 # If we are using the native C version, you are in a fun case
586 # where self.index, self.nodemap and self._nodecaches is the same
586 # where self.index, self.nodemap and self._nodecaches is the same
587 # object.
587 # object.
588 self._nodecache.clearcaches()
588 self._nodecache.clearcaches()
589 except AttributeError:
589 except AttributeError:
590 self._nodecache = {nullid: nullrev}
590 self._nodecache = {nullid: nullrev}
591 self._nodepos = None
591 self._nodepos = None
592
592
593 def rev(self, node):
593 def rev(self, node):
594 try:
594 try:
595 return self._nodecache[node]
595 return self._nodecache[node]
596 except TypeError:
596 except TypeError:
597 raise
597 raise
598 except error.RevlogError:
598 except error.RevlogError:
599 # parsers.c radix tree lookup failed
599 # parsers.c radix tree lookup failed
600 if node == wdirid or node in wdirfilenodeids:
600 if node == wdirid or node in wdirfilenodeids:
601 raise error.WdirUnsupported
601 raise error.WdirUnsupported
602 raise error.LookupError(node, self.indexfile, _('no node'))
602 raise error.LookupError(node, self.indexfile, _('no node'))
603 except KeyError:
603 except KeyError:
604 # pure python cache lookup failed
604 # pure python cache lookup failed
605 n = self._nodecache
605 n = self._nodecache
606 i = self.index
606 i = self.index
607 p = self._nodepos
607 p = self._nodepos
608 if p is None:
608 if p is None:
609 p = len(i) - 1
609 p = len(i) - 1
610 else:
610 else:
611 assert p < len(i)
611 assert p < len(i)
612 for r in pycompat.xrange(p, -1, -1):
612 for r in pycompat.xrange(p, -1, -1):
613 v = i[r][7]
613 v = i[r][7]
614 n[v] = r
614 n[v] = r
615 if v == node:
615 if v == node:
616 self._nodepos = r - 1
616 self._nodepos = r - 1
617 return r
617 return r
618 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
619 raise error.WdirUnsupported
620 raise error.LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
621
621
622 # Accessors for index entries.
622 # Accessors for index entries.
623
623
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # are flags.
625 # are flags.
626 def start(self, rev):
626 def start(self, rev):
627 return int(self.index[rev][0] >> 16)
627 return int(self.index[rev][0] >> 16)
628
628
629 def flags(self, rev):
629 def flags(self, rev):
630 return self.index[rev][0] & 0xFFFF
630 return self.index[rev][0] & 0xFFFF
631
631
632 def length(self, rev):
632 def length(self, rev):
633 return self.index[rev][1]
633 return self.index[rev][1]
634
634
635 def rawsize(self, rev):
635 def rawsize(self, rev):
636 """return the length of the uncompressed text for a given revision"""
636 """return the length of the uncompressed text for a given revision"""
637 l = self.index[rev][2]
637 l = self.index[rev][2]
638 if l >= 0:
638 if l >= 0:
639 return l
639 return l
640
640
641 t = self.rawdata(rev)
641 t = self.rawdata(rev)
642 return len(t)
642 return len(t)
643
643
644 def size(self, rev):
644 def size(self, rev):
645 """length of non-raw text (processed by a "read" flag processor)"""
645 """length of non-raw text (processed by a "read" flag processor)"""
646 # fast path: if no "read" flag processor could change the content,
646 # fast path: if no "read" flag processor could change the content,
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 flags = self.flags(rev)
648 flags = self.flags(rev)
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 return self.rawsize(rev)
650 return self.rawsize(rev)
651
651
652 return len(self.revision(rev, raw=False))
652 return len(self.revision(rev, raw=False))
653
653
654 def chainbase(self, rev):
654 def chainbase(self, rev):
655 base = self._chainbasecache.get(rev)
655 base = self._chainbasecache.get(rev)
656 if base is not None:
656 if base is not None:
657 return base
657 return base
658
658
659 index = self.index
659 index = self.index
660 iterrev = rev
660 iterrev = rev
661 base = index[iterrev][3]
661 base = index[iterrev][3]
662 while base != iterrev:
662 while base != iterrev:
663 iterrev = base
663 iterrev = base
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665
665
666 self._chainbasecache[rev] = base
666 self._chainbasecache[rev] = base
667 return base
667 return base
668
668
669 def linkrev(self, rev):
669 def linkrev(self, rev):
670 return self.index[rev][4]
670 return self.index[rev][4]
671
671
672 def parentrevs(self, rev):
672 def parentrevs(self, rev):
673 try:
673 try:
674 entry = self.index[rev]
674 entry = self.index[rev]
675 except IndexError:
675 except IndexError:
676 if rev == wdirrev:
676 if rev == wdirrev:
677 raise error.WdirUnsupported
677 raise error.WdirUnsupported
678 raise
678 raise
679
679
680 return entry[5], entry[6]
680 return entry[5], entry[6]
681
681
682 # fast parentrevs(rev) where rev isn't filtered
682 # fast parentrevs(rev) where rev isn't filtered
683 _uncheckedparentrevs = parentrevs
683 _uncheckedparentrevs = parentrevs
684
684
685 def node(self, rev):
685 def node(self, rev):
686 try:
686 try:
687 return self.index[rev][7]
687 return self.index[rev][7]
688 except IndexError:
688 except IndexError:
689 if rev == wdirrev:
689 if rev == wdirrev:
690 raise error.WdirUnsupported
690 raise error.WdirUnsupported
691 raise
691 raise
692
692
693 # Derived from index values.
693 # Derived from index values.
694
694
695 def end(self, rev):
695 def end(self, rev):
696 return self.start(rev) + self.length(rev)
696 return self.start(rev) + self.length(rev)
697
697
698 def parents(self, node):
698 def parents(self, node):
699 i = self.index
699 i = self.index
700 d = i[self.rev(node)]
700 d = i[self.rev(node)]
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702
702
703 def chainlen(self, rev):
703 def chainlen(self, rev):
704 return self._chaininfo(rev)[0]
704 return self._chaininfo(rev)[0]
705
705
706 def _chaininfo(self, rev):
706 def _chaininfo(self, rev):
707 chaininfocache = self._chaininfocache
707 chaininfocache = self._chaininfocache
708 if rev in chaininfocache:
708 if rev in chaininfocache:
709 return chaininfocache[rev]
709 return chaininfocache[rev]
710 index = self.index
710 index = self.index
711 generaldelta = self._generaldelta
711 generaldelta = self._generaldelta
712 iterrev = rev
712 iterrev = rev
713 e = index[iterrev]
713 e = index[iterrev]
714 clen = 0
714 clen = 0
715 compresseddeltalen = 0
715 compresseddeltalen = 0
716 while iterrev != e[3]:
716 while iterrev != e[3]:
717 clen += 1
717 clen += 1
718 compresseddeltalen += e[1]
718 compresseddeltalen += e[1]
719 if generaldelta:
719 if generaldelta:
720 iterrev = e[3]
720 iterrev = e[3]
721 else:
721 else:
722 iterrev -= 1
722 iterrev -= 1
723 if iterrev in chaininfocache:
723 if iterrev in chaininfocache:
724 t = chaininfocache[iterrev]
724 t = chaininfocache[iterrev]
725 clen += t[0]
725 clen += t[0]
726 compresseddeltalen += t[1]
726 compresseddeltalen += t[1]
727 break
727 break
728 e = index[iterrev]
728 e = index[iterrev]
729 else:
729 else:
730 # Add text length of base since decompressing that also takes
730 # Add text length of base since decompressing that also takes
731 # work. For cache hits the length is already included.
731 # work. For cache hits the length is already included.
732 compresseddeltalen += e[1]
732 compresseddeltalen += e[1]
733 r = (clen, compresseddeltalen)
733 r = (clen, compresseddeltalen)
734 chaininfocache[rev] = r
734 chaininfocache[rev] = r
735 return r
735 return r
736
736
737 def _deltachain(self, rev, stoprev=None):
737 def _deltachain(self, rev, stoprev=None):
738 """Obtain the delta chain for a revision.
738 """Obtain the delta chain for a revision.
739
739
740 ``stoprev`` specifies a revision to stop at. If not specified, we
740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 stop at the base of the chain.
741 stop at the base of the chain.
742
742
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 revs in ascending order and ``stopped`` is a bool indicating whether
744 revs in ascending order and ``stopped`` is a bool indicating whether
745 ``stoprev`` was hit.
745 ``stoprev`` was hit.
746 """
746 """
747 # Try C implementation.
747 # Try C implementation.
748 try:
748 try:
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 except AttributeError:
750 except AttributeError:
751 pass
751 pass
752
752
753 chain = []
753 chain = []
754
754
755 # Alias to prevent attribute lookup in tight loop.
755 # Alias to prevent attribute lookup in tight loop.
756 index = self.index
756 index = self.index
757 generaldelta = self._generaldelta
757 generaldelta = self._generaldelta
758
758
759 iterrev = rev
759 iterrev = rev
760 e = index[iterrev]
760 e = index[iterrev]
761 while iterrev != e[3] and iterrev != stoprev:
761 while iterrev != e[3] and iterrev != stoprev:
762 chain.append(iterrev)
762 chain.append(iterrev)
763 if generaldelta:
763 if generaldelta:
764 iterrev = e[3]
764 iterrev = e[3]
765 else:
765 else:
766 iterrev -= 1
766 iterrev -= 1
767 e = index[iterrev]
767 e = index[iterrev]
768
768
769 if iterrev == stoprev:
769 if iterrev == stoprev:
770 stopped = True
770 stopped = True
771 else:
771 else:
772 chain.append(iterrev)
772 chain.append(iterrev)
773 stopped = False
773 stopped = False
774
774
775 chain.reverse()
775 chain.reverse()
776 return chain, stopped
776 return chain, stopped
777
777
778 def ancestors(self, revs, stoprev=0, inclusive=False):
778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 """Generate the ancestors of 'revs' in reverse revision order.
779 """Generate the ancestors of 'revs' in reverse revision order.
780 Does not generate revs lower than stoprev.
780 Does not generate revs lower than stoprev.
781
781
782 See the documentation for ancestor.lazyancestors for more details."""
782 See the documentation for ancestor.lazyancestors for more details."""
783
783
784 # first, make sure start revisions aren't filtered
784 # first, make sure start revisions aren't filtered
785 revs = list(revs)
785 revs = list(revs)
786 checkrev = self.node
786 checkrev = self.node
787 for r in revs:
787 for r in revs:
788 checkrev(r)
788 checkrev(r)
789 # and we're sure ancestors aren't filtered as well
789 # and we're sure ancestors aren't filtered as well
790
790
791 if rustancestor is not None:
791 if rustancestor is not None:
792 lazyancestors = rustancestor.LazyAncestors
792 lazyancestors = rustancestor.LazyAncestors
793 arg = self.index
793 arg = self.index
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 lazyancestors = ancestor.rustlazyancestors
795 lazyancestors = ancestor.rustlazyancestors
796 arg = self.index
796 arg = self.index
797 else:
797 else:
798 lazyancestors = ancestor.lazyancestors
798 lazyancestors = ancestor.lazyancestors
799 arg = self._uncheckedparentrevs
799 arg = self._uncheckedparentrevs
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801
801
802 def descendants(self, revs):
802 def descendants(self, revs):
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804
804
805 def findcommonmissing(self, common=None, heads=None):
805 def findcommonmissing(self, common=None, heads=None):
806 """Return a tuple of the ancestors of common and the ancestors of heads
806 """Return a tuple of the ancestors of common and the ancestors of heads
807 that are not ancestors of common. In revset terminology, we return the
807 that are not ancestors of common. In revset terminology, we return the
808 tuple:
808 tuple:
809
809
810 ::common, (::heads) - (::common)
810 ::common, (::heads) - (::common)
811
811
812 The list is sorted by revision number, meaning it is
812 The list is sorted by revision number, meaning it is
813 topologically sorted.
813 topologically sorted.
814
814
815 'heads' and 'common' are both lists of node IDs. If heads is
815 'heads' and 'common' are both lists of node IDs. If heads is
816 not supplied, uses all of the revlog's heads. If common is not
816 not supplied, uses all of the revlog's heads. If common is not
817 supplied, uses nullid."""
817 supplied, uses nullid."""
818 if common is None:
818 if common is None:
819 common = [nullid]
819 common = [nullid]
820 if heads is None:
820 if heads is None:
821 heads = self.heads()
821 heads = self.heads()
822
822
823 common = [self.rev(n) for n in common]
823 common = [self.rev(n) for n in common]
824 heads = [self.rev(n) for n in heads]
824 heads = [self.rev(n) for n in heads]
825
825
826 # we want the ancestors, but inclusive
826 # we want the ancestors, but inclusive
827 class lazyset(object):
827 class lazyset(object):
828 def __init__(self, lazyvalues):
828 def __init__(self, lazyvalues):
829 self.addedvalues = set()
829 self.addedvalues = set()
830 self.lazyvalues = lazyvalues
830 self.lazyvalues = lazyvalues
831
831
832 def __contains__(self, value):
832 def __contains__(self, value):
833 return value in self.addedvalues or value in self.lazyvalues
833 return value in self.addedvalues or value in self.lazyvalues
834
834
835 def __iter__(self):
835 def __iter__(self):
836 added = self.addedvalues
836 added = self.addedvalues
837 for r in added:
837 for r in added:
838 yield r
838 yield r
839 for r in self.lazyvalues:
839 for r in self.lazyvalues:
840 if not r in added:
840 if not r in added:
841 yield r
841 yield r
842
842
843 def add(self, value):
843 def add(self, value):
844 self.addedvalues.add(value)
844 self.addedvalues.add(value)
845
845
846 def update(self, values):
846 def update(self, values):
847 self.addedvalues.update(values)
847 self.addedvalues.update(values)
848
848
849 has = lazyset(self.ancestors(common))
849 has = lazyset(self.ancestors(common))
850 has.add(nullrev)
850 has.add(nullrev)
851 has.update(common)
851 has.update(common)
852
852
853 # take all ancestors from heads that aren't in has
853 # take all ancestors from heads that aren't in has
854 missing = set()
854 missing = set()
855 visit = collections.deque(r for r in heads if r not in has)
855 visit = collections.deque(r for r in heads if r not in has)
856 while visit:
856 while visit:
857 r = visit.popleft()
857 r = visit.popleft()
858 if r in missing:
858 if r in missing:
859 continue
859 continue
860 else:
860 else:
861 missing.add(r)
861 missing.add(r)
862 for p in self.parentrevs(r):
862 for p in self.parentrevs(r):
863 if p not in has:
863 if p not in has:
864 visit.append(p)
864 visit.append(p)
865 missing = list(missing)
865 missing = list(missing)
866 missing.sort()
866 missing.sort()
867 return has, [self.node(miss) for miss in missing]
867 return has, [self.node(miss) for miss in missing]
868
868
869 def incrementalmissingrevs(self, common=None):
869 def incrementalmissingrevs(self, common=None):
870 """Return an object that can be used to incrementally compute the
870 """Return an object that can be used to incrementally compute the
871 revision numbers of the ancestors of arbitrary sets that are not
871 revision numbers of the ancestors of arbitrary sets that are not
872 ancestors of common. This is an ancestor.incrementalmissingancestors
872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 object.
873 object.
874
874
875 'common' is a list of revision numbers. If common is not supplied, uses
875 'common' is a list of revision numbers. If common is not supplied, uses
876 nullrev.
876 nullrev.
877 """
877 """
878 if common is None:
878 if common is None:
879 common = [nullrev]
879 common = [nullrev]
880
880
881 if rustancestor is not None:
881 if rustancestor is not None:
882 return rustancestor.MissingAncestors(self.index, common)
882 return rustancestor.MissingAncestors(self.index, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884
884
885 def findmissingrevs(self, common=None, heads=None):
885 def findmissingrevs(self, common=None, heads=None):
886 """Return the revision numbers of the ancestors of heads that
886 """Return the revision numbers of the ancestors of heads that
887 are not ancestors of common.
887 are not ancestors of common.
888
888
889 More specifically, return a list of revision numbers corresponding to
889 More specifically, return a list of revision numbers corresponding to
890 nodes N such that every N satisfies the following constraints:
890 nodes N such that every N satisfies the following constraints:
891
891
892 1. N is an ancestor of some node in 'heads'
892 1. N is an ancestor of some node in 'heads'
893 2. N is not an ancestor of any node in 'common'
893 2. N is not an ancestor of any node in 'common'
894
894
895 The list is sorted by revision number, meaning it is
895 The list is sorted by revision number, meaning it is
896 topologically sorted.
896 topologically sorted.
897
897
898 'heads' and 'common' are both lists of revision numbers. If heads is
898 'heads' and 'common' are both lists of revision numbers. If heads is
899 not supplied, uses all of the revlog's heads. If common is not
899 not supplied, uses all of the revlog's heads. If common is not
900 supplied, uses nullid."""
900 supplied, uses nullid."""
901 if common is None:
901 if common is None:
902 common = [nullrev]
902 common = [nullrev]
903 if heads is None:
903 if heads is None:
904 heads = self.headrevs()
904 heads = self.headrevs()
905
905
906 inc = self.incrementalmissingrevs(common=common)
906 inc = self.incrementalmissingrevs(common=common)
907 return inc.missingancestors(heads)
907 return inc.missingancestors(heads)
908
908
909 def findmissing(self, common=None, heads=None):
909 def findmissing(self, common=None, heads=None):
910 """Return the ancestors of heads that are not ancestors of common.
910 """Return the ancestors of heads that are not ancestors of common.
911
911
912 More specifically, return a list of nodes N such that every N
912 More specifically, return a list of nodes N such that every N
913 satisfies the following constraints:
913 satisfies the following constraints:
914
914
915 1. N is an ancestor of some node in 'heads'
915 1. N is an ancestor of some node in 'heads'
916 2. N is not an ancestor of any node in 'common'
916 2. N is not an ancestor of any node in 'common'
917
917
918 The list is sorted by revision number, meaning it is
918 The list is sorted by revision number, meaning it is
919 topologically sorted.
919 topologically sorted.
920
920
921 'heads' and 'common' are both lists of node IDs. If heads is
921 'heads' and 'common' are both lists of node IDs. If heads is
922 not supplied, uses all of the revlog's heads. If common is not
922 not supplied, uses all of the revlog's heads. If common is not
923 supplied, uses nullid."""
923 supplied, uses nullid."""
924 if common is None:
924 if common is None:
925 common = [nullid]
925 common = [nullid]
926 if heads is None:
926 if heads is None:
927 heads = self.heads()
927 heads = self.heads()
928
928
929 common = [self.rev(n) for n in common]
929 common = [self.rev(n) for n in common]
930 heads = [self.rev(n) for n in heads]
930 heads = [self.rev(n) for n in heads]
931
931
932 inc = self.incrementalmissingrevs(common=common)
932 inc = self.incrementalmissingrevs(common=common)
933 return [self.node(r) for r in inc.missingancestors(heads)]
933 return [self.node(r) for r in inc.missingancestors(heads)]
934
934
935 def nodesbetween(self, roots=None, heads=None):
935 def nodesbetween(self, roots=None, heads=None):
936 """Return a topological path from 'roots' to 'heads'.
936 """Return a topological path from 'roots' to 'heads'.
937
937
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 topologically sorted list of all nodes N that satisfy both of
939 topologically sorted list of all nodes N that satisfy both of
940 these constraints:
940 these constraints:
941
941
942 1. N is a descendant of some node in 'roots'
942 1. N is a descendant of some node in 'roots'
943 2. N is an ancestor of some node in 'heads'
943 2. N is an ancestor of some node in 'heads'
944
944
945 Every node is considered to be both a descendant and an ancestor
945 Every node is considered to be both a descendant and an ancestor
946 of itself, so every reachable node in 'roots' and 'heads' will be
946 of itself, so every reachable node in 'roots' and 'heads' will be
947 included in 'nodes'.
947 included in 'nodes'.
948
948
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 subset of 'roots' that is returned in 'nodes'. Likewise,
950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952
952
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 unspecified, uses nullid as the only root. If 'heads' is
954 unspecified, uses nullid as the only root. If 'heads' is
955 unspecified, uses list of all of the revlog's heads."""
955 unspecified, uses list of all of the revlog's heads."""
956 nonodes = ([], [], [])
956 nonodes = ([], [], [])
957 if roots is not None:
957 if roots is not None:
958 roots = list(roots)
958 roots = list(roots)
959 if not roots:
959 if not roots:
960 return nonodes
960 return nonodes
961 lowestrev = min([self.rev(n) for n in roots])
961 lowestrev = min([self.rev(n) for n in roots])
962 else:
962 else:
963 roots = [nullid] # Everybody's a descendant of nullid
963 roots = [nullid] # Everybody's a descendant of nullid
964 lowestrev = nullrev
964 lowestrev = nullrev
965 if (lowestrev == nullrev) and (heads is None):
965 if (lowestrev == nullrev) and (heads is None):
966 # We want _all_ the nodes!
966 # We want _all_ the nodes!
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 if heads is None:
968 if heads is None:
969 # All nodes are ancestors, so the latest ancestor is the last
969 # All nodes are ancestors, so the latest ancestor is the last
970 # node.
970 # node.
971 highestrev = len(self) - 1
971 highestrev = len(self) - 1
972 # Set ancestors to None to signal that every node is an ancestor.
972 # Set ancestors to None to signal that every node is an ancestor.
973 ancestors = None
973 ancestors = None
974 # Set heads to an empty dictionary for later discovery of heads
974 # Set heads to an empty dictionary for later discovery of heads
975 heads = {}
975 heads = {}
976 else:
976 else:
977 heads = list(heads)
977 heads = list(heads)
978 if not heads:
978 if not heads:
979 return nonodes
979 return nonodes
980 ancestors = set()
980 ancestors = set()
981 # Turn heads into a dictionary so we can remove 'fake' heads.
981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 # Also, later we will be using it to filter out the heads we can't
982 # Also, later we will be using it to filter out the heads we can't
983 # find from roots.
983 # find from roots.
984 heads = dict.fromkeys(heads, False)
984 heads = dict.fromkeys(heads, False)
985 # Start at the top and keep marking parents until we're done.
985 # Start at the top and keep marking parents until we're done.
986 nodestotag = set(heads)
986 nodestotag = set(heads)
987 # Remember where the top was so we can use it as a limit later.
987 # Remember where the top was so we can use it as a limit later.
988 highestrev = max([self.rev(n) for n in nodestotag])
988 highestrev = max([self.rev(n) for n in nodestotag])
989 while nodestotag:
989 while nodestotag:
990 # grab a node to tag
990 # grab a node to tag
991 n = nodestotag.pop()
991 n = nodestotag.pop()
992 # Never tag nullid
992 # Never tag nullid
993 if n == nullid:
993 if n == nullid:
994 continue
994 continue
995 # A node's revision number represents its place in a
995 # A node's revision number represents its place in a
996 # topologically sorted list of nodes.
996 # topologically sorted list of nodes.
997 r = self.rev(n)
997 r = self.rev(n)
998 if r >= lowestrev:
998 if r >= lowestrev:
999 if n not in ancestors:
999 if n not in ancestors:
1000 # If we are possibly a descendant of one of the roots
1000 # If we are possibly a descendant of one of the roots
1001 # and we haven't already been marked as an ancestor
1001 # and we haven't already been marked as an ancestor
1002 ancestors.add(n) # Mark as ancestor
1002 ancestors.add(n) # Mark as ancestor
1003 # Add non-nullid parents to list of nodes to tag.
1003 # Add non-nullid parents to list of nodes to tag.
1004 nodestotag.update([p for p in self.parents(n) if
1004 nodestotag.update([p for p in self.parents(n) if
1005 p != nullid])
1005 p != nullid])
1006 elif n in heads: # We've seen it before, is it a fake head?
1006 elif n in heads: # We've seen it before, is it a fake head?
1007 # So it is, real heads should not be the ancestors of
1007 # So it is, real heads should not be the ancestors of
1008 # any other heads.
1008 # any other heads.
1009 heads.pop(n)
1009 heads.pop(n)
1010 if not ancestors:
1010 if not ancestors:
1011 return nonodes
1011 return nonodes
1012 # Now that we have our set of ancestors, we want to remove any
1012 # Now that we have our set of ancestors, we want to remove any
1013 # roots that are not ancestors.
1013 # roots that are not ancestors.
1014
1014
1015 # If one of the roots was nullid, everything is included anyway.
1015 # If one of the roots was nullid, everything is included anyway.
1016 if lowestrev > nullrev:
1016 if lowestrev > nullrev:
1017 # But, since we weren't, let's recompute the lowest rev to not
1017 # But, since we weren't, let's recompute the lowest rev to not
1018 # include roots that aren't ancestors.
1018 # include roots that aren't ancestors.
1019
1019
1020 # Filter out roots that aren't ancestors of heads
1020 # Filter out roots that aren't ancestors of heads
1021 roots = [root for root in roots if root in ancestors]
1021 roots = [root for root in roots if root in ancestors]
1022 # Recompute the lowest revision
1022 # Recompute the lowest revision
1023 if roots:
1023 if roots:
1024 lowestrev = min([self.rev(root) for root in roots])
1024 lowestrev = min([self.rev(root) for root in roots])
1025 else:
1025 else:
1026 # No more roots? Return empty list
1026 # No more roots? Return empty list
1027 return nonodes
1027 return nonodes
1028 else:
1028 else:
1029 # We are descending from nullid, and don't need to care about
1029 # We are descending from nullid, and don't need to care about
1030 # any other roots.
1030 # any other roots.
1031 lowestrev = nullrev
1031 lowestrev = nullrev
1032 roots = [nullid]
1032 roots = [nullid]
1033 # Transform our roots list into a set.
1033 # Transform our roots list into a set.
1034 descendants = set(roots)
1034 descendants = set(roots)
1035 # Also, keep the original roots so we can filter out roots that aren't
1035 # Also, keep the original roots so we can filter out roots that aren't
1036 # 'real' roots (i.e. are descended from other roots).
1036 # 'real' roots (i.e. are descended from other roots).
1037 roots = descendants.copy()
1037 roots = descendants.copy()
1038 # Our topologically sorted list of output nodes.
1038 # Our topologically sorted list of output nodes.
1039 orderedout = []
1039 orderedout = []
1040 # Don't start at nullid since we don't want nullid in our output list,
1040 # Don't start at nullid since we don't want nullid in our output list,
1041 # and if nullid shows up in descendants, empty parents will look like
1041 # and if nullid shows up in descendants, empty parents will look like
1042 # they're descendants.
1042 # they're descendants.
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 n = self.node(r)
1044 n = self.node(r)
1045 isdescendant = False
1045 isdescendant = False
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 isdescendant = True
1047 isdescendant = True
1048 elif n in descendants:
1048 elif n in descendants:
1049 # n is already a descendant
1049 # n is already a descendant
1050 isdescendant = True
1050 isdescendant = True
1051 # This check only needs to be done here because all the roots
1051 # This check only needs to be done here because all the roots
1052 # will start being marked is descendants before the loop.
1052 # will start being marked is descendants before the loop.
1053 if n in roots:
1053 if n in roots:
1054 # If n was a root, check if it's a 'real' root.
1054 # If n was a root, check if it's a 'real' root.
1055 p = tuple(self.parents(n))
1055 p = tuple(self.parents(n))
1056 # If any of its parents are descendants, it's not a root.
1056 # If any of its parents are descendants, it's not a root.
1057 if (p[0] in descendants) or (p[1] in descendants):
1057 if (p[0] in descendants) or (p[1] in descendants):
1058 roots.remove(n)
1058 roots.remove(n)
1059 else:
1059 else:
1060 p = tuple(self.parents(n))
1060 p = tuple(self.parents(n))
1061 # A node is a descendant if either of its parents are
1061 # A node is a descendant if either of its parents are
1062 # descendants. (We seeded the dependents list with the roots
1062 # descendants. (We seeded the dependents list with the roots
1063 # up there, remember?)
1063 # up there, remember?)
1064 if (p[0] in descendants) or (p[1] in descendants):
1064 if (p[0] in descendants) or (p[1] in descendants):
1065 descendants.add(n)
1065 descendants.add(n)
1066 isdescendant = True
1066 isdescendant = True
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 # Only include nodes that are both descendants and ancestors.
1068 # Only include nodes that are both descendants and ancestors.
1069 orderedout.append(n)
1069 orderedout.append(n)
1070 if (ancestors is not None) and (n in heads):
1070 if (ancestors is not None) and (n in heads):
1071 # We're trying to figure out which heads are reachable
1071 # We're trying to figure out which heads are reachable
1072 # from roots.
1072 # from roots.
1073 # Mark this head as having been reached
1073 # Mark this head as having been reached
1074 heads[n] = True
1074 heads[n] = True
1075 elif ancestors is None:
1075 elif ancestors is None:
1076 # Otherwise, we're trying to discover the heads.
1076 # Otherwise, we're trying to discover the heads.
1077 # Assume this is a head because if it isn't, the next step
1077 # Assume this is a head because if it isn't, the next step
1078 # will eventually remove it.
1078 # will eventually remove it.
1079 heads[n] = True
1079 heads[n] = True
1080 # But, obviously its parents aren't.
1080 # But, obviously its parents aren't.
1081 for p in self.parents(n):
1081 for p in self.parents(n):
1082 heads.pop(p, None)
1082 heads.pop(p, None)
1083 heads = [head for head, flag in heads.iteritems() if flag]
1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 roots = list(roots)
1084 roots = list(roots)
1085 assert orderedout
1085 assert orderedout
1086 assert roots
1086 assert roots
1087 assert heads
1087 assert heads
1088 return (orderedout, roots, heads)
1088 return (orderedout, roots, heads)
1089
1089
1090 def headrevs(self, revs=None):
1090 def headrevs(self, revs=None):
1091 if revs is None:
1091 if revs is None:
1092 try:
1092 try:
1093 return self.index.headrevs()
1093 return self.index.headrevs()
1094 except AttributeError:
1094 except AttributeError:
1095 return self._headrevs()
1095 return self._headrevs()
1096 if rustdagop is not None:
1096 if rustdagop is not None:
1097 return rustdagop.headrevs(self.index, revs)
1097 return rustdagop.headrevs(self.index, revs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099
1099
1100 def computephases(self, roots):
1100 def computephases(self, roots):
1101 return self.index.computephasesmapsets(roots)
1101 return self.index.computephasesmapsets(roots)
1102
1102
1103 def _headrevs(self):
1103 def _headrevs(self):
1104 count = len(self)
1104 count = len(self)
1105 if not count:
1105 if not count:
1106 return [nullrev]
1106 return [nullrev]
1107 # we won't iter over filtered rev so nobody is a head at start
1107 # we won't iter over filtered rev so nobody is a head at start
1108 ishead = [0] * (count + 1)
1108 ishead = [0] * (count + 1)
1109 index = self.index
1109 index = self.index
1110 for r in self:
1110 for r in self:
1111 ishead[r] = 1 # I may be an head
1111 ishead[r] = 1 # I may be an head
1112 e = index[r]
1112 e = index[r]
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 return [r for r, val in enumerate(ishead) if val]
1114 return [r for r, val in enumerate(ishead) if val]
1115
1115
1116 def heads(self, start=None, stop=None):
1116 def heads(self, start=None, stop=None):
1117 """return the list of all nodes that have no children
1117 """return the list of all nodes that have no children
1118
1118
1119 if start is specified, only heads that are descendants of
1119 if start is specified, only heads that are descendants of
1120 start will be returned
1120 start will be returned
1121 if stop is specified, it will consider all the revs from stop
1121 if stop is specified, it will consider all the revs from stop
1122 as if they had no children
1122 as if they had no children
1123 """
1123 """
1124 if start is None and stop is None:
1124 if start is None and stop is None:
1125 if not len(self):
1125 if not len(self):
1126 return [nullid]
1126 return [nullid]
1127 return [self.node(r) for r in self.headrevs()]
1127 return [self.node(r) for r in self.headrevs()]
1128
1128
1129 if start is None:
1129 if start is None:
1130 start = nullrev
1130 start = nullrev
1131 else:
1131 else:
1132 start = self.rev(start)
1132 start = self.rev(start)
1133
1133
1134 stoprevs = set(self.rev(n) for n in stop or [])
1134 stoprevs = set(self.rev(n) for n in stop or [])
1135
1135
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 stoprevs=stoprevs)
1137 stoprevs=stoprevs)
1138
1138
1139 return [self.node(rev) for rev in revs]
1139 return [self.node(rev) for rev in revs]
1140
1140
1141 def children(self, node):
1141 def children(self, node):
1142 """find the children of a given node"""
1142 """find the children of a given node"""
1143 c = []
1143 c = []
1144 p = self.rev(node)
1144 p = self.rev(node)
1145 for r in self.revs(start=p + 1):
1145 for r in self.revs(start=p + 1):
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 if prevs:
1147 if prevs:
1148 for pr in prevs:
1148 for pr in prevs:
1149 if pr == p:
1149 if pr == p:
1150 c.append(self.node(r))
1150 c.append(self.node(r))
1151 elif p == nullrev:
1151 elif p == nullrev:
1152 c.append(self.node(r))
1152 c.append(self.node(r))
1153 return c
1153 return c
1154
1154
1155 def commonancestorsheads(self, a, b):
1155 def commonancestorsheads(self, a, b):
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 a, b = self.rev(a), self.rev(b)
1157 a, b = self.rev(a), self.rev(b)
1158 ancs = self._commonancestorsheads(a, b)
1158 ancs = self._commonancestorsheads(a, b)
1159 return pycompat.maplist(self.node, ancs)
1159 return pycompat.maplist(self.node, ancs)
1160
1160
1161 def _commonancestorsheads(self, *revs):
1161 def _commonancestorsheads(self, *revs):
1162 """calculate all the heads of the common ancestors of revs"""
1162 """calculate all the heads of the common ancestors of revs"""
1163 try:
1163 try:
1164 ancs = self.index.commonancestorsheads(*revs)
1164 ancs = self.index.commonancestorsheads(*revs)
1165 except (AttributeError, OverflowError): # C implementation failed
1165 except (AttributeError, OverflowError): # C implementation failed
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 return ancs
1167 return ancs
1168
1168
1169 def isancestor(self, a, b):
1169 def isancestor(self, a, b):
1170 """return True if node a is an ancestor of node b
1170 """return True if node a is an ancestor of node b
1171
1171
1172 A revision is considered an ancestor of itself."""
1172 A revision is considered an ancestor of itself."""
1173 a, b = self.rev(a), self.rev(b)
1173 a, b = self.rev(a), self.rev(b)
1174 return self.isancestorrev(a, b)
1174 return self.isancestorrev(a, b)
1175
1175
1176 def isancestorrev(self, a, b):
1176 def isancestorrev(self, a, b):
1177 """return True if revision a is an ancestor of revision b
1177 """return True if revision a is an ancestor of revision b
1178
1178
1179 A revision is considered an ancestor of itself.
1179 A revision is considered an ancestor of itself.
1180
1180
1181 The implementation of this is trivial but the use of
1181 The implementation of this is trivial but the use of
1182 reachableroots is not."""
1182 reachableroots is not."""
1183 if a == nullrev:
1183 if a == nullrev:
1184 return True
1184 return True
1185 elif a == b:
1185 elif a == b:
1186 return True
1186 return True
1187 elif a > b:
1187 elif a > b:
1188 return False
1188 return False
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190
1190
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 """return (heads(::<roots> and <roots>::<heads>))
1192 """return (heads(::<roots> and <roots>::<heads>))
1193
1193
1194 If includepath is True, return (<roots>::<heads>)."""
1194 If includepath is True, return (<roots>::<heads>)."""
1195 try:
1195 try:
1196 return self.index.reachableroots2(minroot, heads, roots,
1196 return self.index.reachableroots2(minroot, heads, roots,
1197 includepath)
1197 includepath)
1198 except AttributeError:
1198 except AttributeError:
1199 return dagop._reachablerootspure(self.parentrevs,
1199 return dagop._reachablerootspure(self.parentrevs,
1200 minroot, roots, heads, includepath)
1200 minroot, roots, heads, includepath)
1201
1201
1202 def ancestor(self, a, b):
1202 def ancestor(self, a, b):
1203 """calculate the "best" common ancestor of nodes a and b"""
1203 """calculate the "best" common ancestor of nodes a and b"""
1204
1204
1205 a, b = self.rev(a), self.rev(b)
1205 a, b = self.rev(a), self.rev(b)
1206 try:
1206 try:
1207 ancs = self.index.ancestors(a, b)
1207 ancs = self.index.ancestors(a, b)
1208 except (AttributeError, OverflowError):
1208 except (AttributeError, OverflowError):
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 if ancs:
1210 if ancs:
1211 # choose a consistent winner when there's a tie
1211 # choose a consistent winner when there's a tie
1212 return min(map(self.node, ancs))
1212 return min(map(self.node, ancs))
1213 return nullid
1213 return nullid
1214
1214
1215 def _match(self, id):
1215 def _match(self, id):
1216 if isinstance(id, int):
1216 if isinstance(id, int):
1217 # rev
1217 # rev
1218 return self.node(id)
1218 return self.node(id)
1219 if len(id) == 20:
1219 if len(id) == 20:
1220 # possibly a binary node
1220 # possibly a binary node
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 try:
1222 try:
1223 node = id
1223 node = id
1224 self.rev(node) # quick search the index
1224 self.rev(node) # quick search the index
1225 return node
1225 return node
1226 except error.LookupError:
1226 except error.LookupError:
1227 pass # may be partial hex id
1227 pass # may be partial hex id
1228 try:
1228 try:
1229 # str(rev)
1229 # str(rev)
1230 rev = int(id)
1230 rev = int(id)
1231 if "%d" % rev != id:
1231 if "%d" % rev != id:
1232 raise ValueError
1232 raise ValueError
1233 if rev < 0:
1233 if rev < 0:
1234 rev = len(self) + rev
1234 rev = len(self) + rev
1235 if rev < 0 or rev >= len(self):
1235 if rev < 0 or rev >= len(self):
1236 raise ValueError
1236 raise ValueError
1237 return self.node(rev)
1237 return self.node(rev)
1238 except (ValueError, OverflowError):
1238 except (ValueError, OverflowError):
1239 pass
1239 pass
1240 if len(id) == 40:
1240 if len(id) == 40:
1241 try:
1241 try:
1242 # a full hex nodeid?
1242 # a full hex nodeid?
1243 node = bin(id)
1243 node = bin(id)
1244 self.rev(node)
1244 self.rev(node)
1245 return node
1245 return node
1246 except (TypeError, error.LookupError):
1246 except (TypeError, error.LookupError):
1247 pass
1247 pass
1248
1248
1249 def _partialmatch(self, id):
1249 def _partialmatch(self, id):
1250 # we don't care wdirfilenodeids as they should be always full hash
1250 # we don't care wdirfilenodeids as they should be always full hash
1251 maybewdir = wdirhex.startswith(id)
1251 maybewdir = wdirhex.startswith(id)
1252 try:
1252 try:
1253 partial = self.index.partialmatch(id)
1253 partial = self.index.partialmatch(id)
1254 if partial and self.hasnode(partial):
1254 if partial and self.hasnode(partial):
1255 if maybewdir:
1255 if maybewdir:
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 raise error.RevlogError
1257 raise error.RevlogError
1258 return partial
1258 return partial
1259 if maybewdir:
1259 if maybewdir:
1260 # no 'ff...' match in radix tree, wdir identified
1260 # no 'ff...' match in radix tree, wdir identified
1261 raise error.WdirUnsupported
1261 raise error.WdirUnsupported
1262 return None
1262 return None
1263 except error.RevlogError:
1263 except error.RevlogError:
1264 # parsers.c radix tree lookup gave multiple matches
1264 # parsers.c radix tree lookup gave multiple matches
1265 # fast path: for unfiltered changelog, radix tree is accurate
1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 if not getattr(self, 'filteredrevs', None):
1266 if not getattr(self, 'filteredrevs', None):
1267 raise error.AmbiguousPrefixLookupError(
1267 raise error.AmbiguousPrefixLookupError(
1268 id, self.indexfile, _('ambiguous identifier'))
1268 id, self.indexfile, _('ambiguous identifier'))
1269 # fall through to slow path that filters hidden revisions
1269 # fall through to slow path that filters hidden revisions
1270 except (AttributeError, ValueError):
1270 except (AttributeError, ValueError):
1271 # we are pure python, or key was too short to search radix tree
1271 # we are pure python, or key was too short to search radix tree
1272 pass
1272 pass
1273
1273
1274 if id in self._pcache:
1274 if id in self._pcache:
1275 return self._pcache[id]
1275 return self._pcache[id]
1276
1276
1277 if len(id) <= 40:
1277 if len(id) <= 40:
1278 try:
1278 try:
1279 # hex(node)[:...]
1279 # hex(node)[:...]
1280 l = len(id) // 2 # grab an even number of digits
1280 l = len(id) // 2 # grab an even number of digits
1281 prefix = bin(id[:l * 2])
1281 prefix = bin(id[:l * 2])
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 nl = [n for n in nl if hex(n).startswith(id) and
1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 self.hasnode(n)]
1284 self.hasnode(n)]
1285 if nullhex.startswith(id):
1285 if nullhex.startswith(id):
1286 nl.append(nullid)
1286 nl.append(nullid)
1287 if len(nl) > 0:
1287 if len(nl) > 0:
1288 if len(nl) == 1 and not maybewdir:
1288 if len(nl) == 1 and not maybewdir:
1289 self._pcache[id] = nl[0]
1289 self._pcache[id] = nl[0]
1290 return nl[0]
1290 return nl[0]
1291 raise error.AmbiguousPrefixLookupError(
1291 raise error.AmbiguousPrefixLookupError(
1292 id, self.indexfile, _('ambiguous identifier'))
1292 id, self.indexfile, _('ambiguous identifier'))
1293 if maybewdir:
1293 if maybewdir:
1294 raise error.WdirUnsupported
1294 raise error.WdirUnsupported
1295 return None
1295 return None
1296 except TypeError:
1296 except TypeError:
1297 pass
1297 pass
1298
1298
1299 def lookup(self, id):
1299 def lookup(self, id):
1300 """locate a node based on:
1300 """locate a node based on:
1301 - revision number or str(revision number)
1301 - revision number or str(revision number)
1302 - nodeid or subset of hex nodeid
1302 - nodeid or subset of hex nodeid
1303 """
1303 """
1304 n = self._match(id)
1304 n = self._match(id)
1305 if n is not None:
1305 if n is not None:
1306 return n
1306 return n
1307 n = self._partialmatch(id)
1307 n = self._partialmatch(id)
1308 if n:
1308 if n:
1309 return n
1309 return n
1310
1310
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312
1312
1313 def shortest(self, node, minlength=1):
1313 def shortest(self, node, minlength=1):
1314 """Find the shortest unambiguous prefix that matches node."""
1314 """Find the shortest unambiguous prefix that matches node."""
1315 def isvalid(prefix):
1315 def isvalid(prefix):
1316 try:
1316 try:
1317 matchednode = self._partialmatch(prefix)
1317 matchednode = self._partialmatch(prefix)
1318 except error.AmbiguousPrefixLookupError:
1318 except error.AmbiguousPrefixLookupError:
1319 return False
1319 return False
1320 except error.WdirUnsupported:
1320 except error.WdirUnsupported:
1321 # single 'ff...' match
1321 # single 'ff...' match
1322 return True
1322 return True
1323 if matchednode is None:
1323 if matchednode is None:
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 return True
1325 return True
1326
1326
1327 def maybewdir(prefix):
1327 def maybewdir(prefix):
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329
1329
1330 hexnode = hex(node)
1330 hexnode = hex(node)
1331
1331
1332 def disambiguate(hexnode, minlength):
1332 def disambiguate(hexnode, minlength):
1333 """Disambiguate against wdirid."""
1333 """Disambiguate against wdirid."""
1334 for length in range(minlength, 41):
1334 for length in range(minlength, 41):
1335 prefix = hexnode[:length]
1335 prefix = hexnode[:length]
1336 if not maybewdir(prefix):
1336 if not maybewdir(prefix):
1337 return prefix
1337 return prefix
1338
1338
1339 if not getattr(self, 'filteredrevs', None):
1339 if not getattr(self, 'filteredrevs', None):
1340 try:
1340 try:
1341 length = max(self.index.shortest(node), minlength)
1341 length = max(self.index.shortest(node), minlength)
1342 return disambiguate(hexnode, length)
1342 return disambiguate(hexnode, length)
1343 except error.RevlogError:
1343 except error.RevlogError:
1344 if node != wdirid:
1344 if node != wdirid:
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 except AttributeError:
1346 except AttributeError:
1347 # Fall through to pure code
1347 # Fall through to pure code
1348 pass
1348 pass
1349
1349
1350 if node == wdirid:
1350 if node == wdirid:
1351 for length in range(minlength, 41):
1351 for length in range(minlength, 41):
1352 prefix = hexnode[:length]
1352 prefix = hexnode[:length]
1353 if isvalid(prefix):
1353 if isvalid(prefix):
1354 return prefix
1354 return prefix
1355
1355
1356 for length in range(minlength, 41):
1356 for length in range(minlength, 41):
1357 prefix = hexnode[:length]
1357 prefix = hexnode[:length]
1358 if isvalid(prefix):
1358 if isvalid(prefix):
1359 return disambiguate(hexnode, length)
1359 return disambiguate(hexnode, length)
1360
1360
1361 def cmp(self, node, text):
1361 def cmp(self, node, text):
1362 """compare text with a given file revision
1362 """compare text with a given file revision
1363
1363
1364 returns True if text is different than what is stored.
1364 returns True if text is different than what is stored.
1365 """
1365 """
1366 p1, p2 = self.parents(node)
1366 p1, p2 = self.parents(node)
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368
1368
1369 def _cachesegment(self, offset, data):
1369 def _cachesegment(self, offset, data):
1370 """Add a segment to the revlog cache.
1370 """Add a segment to the revlog cache.
1371
1371
1372 Accepts an absolute offset and the data that is at that location.
1372 Accepts an absolute offset and the data that is at that location.
1373 """
1373 """
1374 o, d = self._chunkcache
1374 o, d = self._chunkcache
1375 # try to add to existing cache
1375 # try to add to existing cache
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 self._chunkcache = o, d + data
1377 self._chunkcache = o, d + data
1378 else:
1378 else:
1379 self._chunkcache = offset, data
1379 self._chunkcache = offset, data
1380
1380
1381 def _readsegment(self, offset, length, df=None):
1381 def _readsegment(self, offset, length, df=None):
1382 """Load a segment of raw data from the revlog.
1382 """Load a segment of raw data from the revlog.
1383
1383
1384 Accepts an absolute offset, length to read, and an optional existing
1384 Accepts an absolute offset, length to read, and an optional existing
1385 file handle to read from.
1385 file handle to read from.
1386
1386
1387 If an existing file handle is passed, it will be seeked and the
1387 If an existing file handle is passed, it will be seeked and the
1388 original seek position will NOT be restored.
1388 original seek position will NOT be restored.
1389
1389
1390 Returns a str or buffer of raw byte data.
1390 Returns a str or buffer of raw byte data.
1391
1391
1392 Raises if the requested number of bytes could not be read.
1392 Raises if the requested number of bytes could not be read.
1393 """
1393 """
1394 # Cache data both forward and backward around the requested
1394 # Cache data both forward and backward around the requested
1395 # data, in a fixed size window. This helps speed up operations
1395 # data, in a fixed size window. This helps speed up operations
1396 # involving reading the revlog backwards.
1396 # involving reading the revlog backwards.
1397 cachesize = self._chunkcachesize
1397 cachesize = self._chunkcachesize
1398 realoffset = offset & ~(cachesize - 1)
1398 realoffset = offset & ~(cachesize - 1)
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 - realoffset)
1400 - realoffset)
1401 with self._datareadfp(df) as df:
1401 with self._datareadfp(df) as df:
1402 df.seek(realoffset)
1402 df.seek(realoffset)
1403 d = df.read(reallength)
1403 d = df.read(reallength)
1404
1404
1405 self._cachesegment(realoffset, d)
1405 self._cachesegment(realoffset, d)
1406 if offset != realoffset or reallength != length:
1406 if offset != realoffset or reallength != length:
1407 startoffset = offset - realoffset
1407 startoffset = offset - realoffset
1408 if len(d) - startoffset < length:
1408 if len(d) - startoffset < length:
1409 raise error.RevlogError(
1409 raise error.RevlogError(
1410 _('partial read of revlog %s; expected %d bytes from '
1410 _('partial read of revlog %s; expected %d bytes from '
1411 'offset %d, got %d') %
1411 'offset %d, got %d') %
1412 (self.indexfile if self._inline else self.datafile,
1412 (self.indexfile if self._inline else self.datafile,
1413 length, realoffset, len(d) - startoffset))
1413 length, realoffset, len(d) - startoffset))
1414
1414
1415 return util.buffer(d, startoffset, length)
1415 return util.buffer(d, startoffset, length)
1416
1416
1417 if len(d) < length:
1417 if len(d) < length:
1418 raise error.RevlogError(
1418 raise error.RevlogError(
1419 _('partial read of revlog %s; expected %d bytes from offset '
1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 '%d, got %d') %
1420 '%d, got %d') %
1421 (self.indexfile if self._inline else self.datafile,
1421 (self.indexfile if self._inline else self.datafile,
1422 length, offset, len(d)))
1422 length, offset, len(d)))
1423
1423
1424 return d
1424 return d
1425
1425
1426 def _getsegment(self, offset, length, df=None):
1426 def _getsegment(self, offset, length, df=None):
1427 """Obtain a segment of raw data from the revlog.
1427 """Obtain a segment of raw data from the revlog.
1428
1428
1429 Accepts an absolute offset, length of bytes to obtain, and an
1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 optional file handle to the already-opened revlog. If the file
1430 optional file handle to the already-opened revlog. If the file
1431 handle is used, it's original seek position will not be preserved.
1431 handle is used, it's original seek position will not be preserved.
1432
1432
1433 Requests for data may be returned from a cache.
1433 Requests for data may be returned from a cache.
1434
1434
1435 Returns a str or a buffer instance of raw byte data.
1435 Returns a str or a buffer instance of raw byte data.
1436 """
1436 """
1437 o, d = self._chunkcache
1437 o, d = self._chunkcache
1438 l = len(d)
1438 l = len(d)
1439
1439
1440 # is it in the cache?
1440 # is it in the cache?
1441 cachestart = offset - o
1441 cachestart = offset - o
1442 cacheend = cachestart + length
1442 cacheend = cachestart + length
1443 if cachestart >= 0 and cacheend <= l:
1443 if cachestart >= 0 and cacheend <= l:
1444 if cachestart == 0 and cacheend == l:
1444 if cachestart == 0 and cacheend == l:
1445 return d # avoid a copy
1445 return d # avoid a copy
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447
1447
1448 return self._readsegment(offset, length, df=df)
1448 return self._readsegment(offset, length, df=df)
1449
1449
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452
1452
1453 Accepts the start and end revisions and an optional already-open
1453 Accepts the start and end revisions and an optional already-open
1454 file handle to be used for reading. If the file handle is read, its
1454 file handle to be used for reading. If the file handle is read, its
1455 seek position will not be preserved.
1455 seek position will not be preserved.
1456
1456
1457 Requests for data may be satisfied by a cache.
1457 Requests for data may be satisfied by a cache.
1458
1458
1459 Returns a 2-tuple of (offset, data) for the requested range of
1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 revisions. Offset is the integer offset from the beginning of the
1460 revisions. Offset is the integer offset from the beginning of the
1461 revlog and data is a str or buffer of the raw byte data.
1461 revlog and data is a str or buffer of the raw byte data.
1462
1462
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 to determine where each revision's data begins and ends.
1464 to determine where each revision's data begins and ends.
1465 """
1465 """
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # (functions are expensive).
1467 # (functions are expensive).
1468 index = self.index
1468 index = self.index
1469 istart = index[startrev]
1469 istart = index[startrev]
1470 start = int(istart[0] >> 16)
1470 start = int(istart[0] >> 16)
1471 if startrev == endrev:
1471 if startrev == endrev:
1472 end = start + istart[1]
1472 end = start + istart[1]
1473 else:
1473 else:
1474 iend = index[endrev]
1474 iend = index[endrev]
1475 end = int(iend[0] >> 16) + iend[1]
1475 end = int(iend[0] >> 16) + iend[1]
1476
1476
1477 if self._inline:
1477 if self._inline:
1478 start += (startrev + 1) * self._io.size
1478 start += (startrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1480 length = end - start
1480 length = end - start
1481
1481
1482 return start, self._getsegment(start, length, df=df)
1482 return start, self._getsegment(start, length, df=df)
1483
1483
1484 def _chunk(self, rev, df=None):
1484 def _chunk(self, rev, df=None):
1485 """Obtain a single decompressed chunk for a revision.
1485 """Obtain a single decompressed chunk for a revision.
1486
1486
1487 Accepts an integer revision and an optional already-open file handle
1487 Accepts an integer revision and an optional already-open file handle
1488 to be used for reading. If used, the seek position of the file will not
1488 to be used for reading. If used, the seek position of the file will not
1489 be preserved.
1489 be preserved.
1490
1490
1491 Returns a str holding uncompressed data for the requested revision.
1491 Returns a str holding uncompressed data for the requested revision.
1492 """
1492 """
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494
1494
1495 def _chunks(self, revs, df=None, targetsize=None):
1495 def _chunks(self, revs, df=None, targetsize=None):
1496 """Obtain decompressed chunks for the specified revisions.
1496 """Obtain decompressed chunks for the specified revisions.
1497
1497
1498 Accepts an iterable of numeric revisions that are assumed to be in
1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 ascending order. Also accepts an optional already-open file handle
1499 ascending order. Also accepts an optional already-open file handle
1500 to be used for reading. If used, the seek position of the file will
1500 to be used for reading. If used, the seek position of the file will
1501 not be preserved.
1501 not be preserved.
1502
1502
1503 This function is similar to calling ``self._chunk()`` multiple times,
1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 but is faster.
1504 but is faster.
1505
1505
1506 Returns a list with decompressed data for each requested revision.
1506 Returns a list with decompressed data for each requested revision.
1507 """
1507 """
1508 if not revs:
1508 if not revs:
1509 return []
1509 return []
1510 start = self.start
1510 start = self.start
1511 length = self.length
1511 length = self.length
1512 inline = self._inline
1512 inline = self._inline
1513 iosize = self._io.size
1513 iosize = self._io.size
1514 buffer = util.buffer
1514 buffer = util.buffer
1515
1515
1516 l = []
1516 l = []
1517 ladd = l.append
1517 ladd = l.append
1518
1518
1519 if not self._withsparseread:
1519 if not self._withsparseread:
1520 slicedchunks = (revs,)
1520 slicedchunks = (revs,)
1521 else:
1521 else:
1522 slicedchunks = deltautil.slicechunk(self, revs,
1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 targetsize=targetsize)
1523 targetsize=targetsize)
1524
1524
1525 for revschunk in slicedchunks:
1525 for revschunk in slicedchunks:
1526 firstrev = revschunk[0]
1526 firstrev = revschunk[0]
1527 # Skip trailing revisions with empty diff
1527 # Skip trailing revisions with empty diff
1528 for lastrev in revschunk[::-1]:
1528 for lastrev in revschunk[::-1]:
1529 if length(lastrev) != 0:
1529 if length(lastrev) != 0:
1530 break
1530 break
1531
1531
1532 try:
1532 try:
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 except OverflowError:
1534 except OverflowError:
1535 # issue4215 - we can't cache a run of chunks greater than
1535 # issue4215 - we can't cache a run of chunks greater than
1536 # 2G on Windows
1536 # 2G on Windows
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538
1538
1539 decomp = self.decompress
1539 decomp = self.decompress
1540 for rev in revschunk:
1540 for rev in revschunk:
1541 chunkstart = start(rev)
1541 chunkstart = start(rev)
1542 if inline:
1542 if inline:
1543 chunkstart += (rev + 1) * iosize
1543 chunkstart += (rev + 1) * iosize
1544 chunklength = length(rev)
1544 chunklength = length(rev)
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546
1546
1547 return l
1547 return l
1548
1548
1549 def _chunkclear(self):
1549 def _chunkclear(self):
1550 """Clear the raw chunk cache."""
1550 """Clear the raw chunk cache."""
1551 self._chunkcache = (0, '')
1551 self._chunkcache = (0, '')
1552
1552
1553 def deltaparent(self, rev):
1553 def deltaparent(self, rev):
1554 """return deltaparent of the given revision"""
1554 """return deltaparent of the given revision"""
1555 base = self.index[rev][3]
1555 base = self.index[rev][3]
1556 if base == rev:
1556 if base == rev:
1557 return nullrev
1557 return nullrev
1558 elif self._generaldelta:
1558 elif self._generaldelta:
1559 return base
1559 return base
1560 else:
1560 else:
1561 return rev - 1
1561 return rev - 1
1562
1562
1563 def issnapshot(self, rev):
1563 def issnapshot(self, rev):
1564 """tells whether rev is a snapshot
1564 """tells whether rev is a snapshot
1565 """
1565 """
1566 if not self._sparserevlog:
1566 if not self._sparserevlog:
1567 return self.deltaparent(rev) == nullrev
1567 return self.deltaparent(rev) == nullrev
1568 elif util.safehasattr(self.index, 'issnapshot'):
1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 # directly assign the method to cache the testing and access
1569 # directly assign the method to cache the testing and access
1570 self.issnapshot = self.index.issnapshot
1570 self.issnapshot = self.index.issnapshot
1571 return self.issnapshot(rev)
1571 return self.issnapshot(rev)
1572 if rev == nullrev:
1572 if rev == nullrev:
1573 return True
1573 return True
1574 entry = self.index[rev]
1574 entry = self.index[rev]
1575 base = entry[3]
1575 base = entry[3]
1576 if base == rev:
1576 if base == rev:
1577 return True
1577 return True
1578 if base == nullrev:
1578 if base == nullrev:
1579 return True
1579 return True
1580 p1 = entry[5]
1580 p1 = entry[5]
1581 p2 = entry[6]
1581 p2 = entry[6]
1582 if base == p1 or base == p2:
1582 if base == p1 or base == p2:
1583 return False
1583 return False
1584 return self.issnapshot(base)
1584 return self.issnapshot(base)
1585
1585
1586 def snapshotdepth(self, rev):
1586 def snapshotdepth(self, rev):
1587 """number of snapshot in the chain before this one"""
1587 """number of snapshot in the chain before this one"""
1588 if not self.issnapshot(rev):
1588 if not self.issnapshot(rev):
1589 raise error.ProgrammingError('revision %d not a snapshot')
1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 return len(self._deltachain(rev)[0]) - 1
1590 return len(self._deltachain(rev)[0]) - 1
1591
1591
1592 def revdiff(self, rev1, rev2):
1592 def revdiff(self, rev1, rev2):
1593 """return or calculate a delta between two revisions
1593 """return or calculate a delta between two revisions
1594
1594
1595 The delta calculated is in binary form and is intended to be written to
1595 The delta calculated is in binary form and is intended to be written to
1596 revlog data directly. So this function needs raw revision data.
1596 revlog data directly. So this function needs raw revision data.
1597 """
1597 """
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 return bytes(self._chunk(rev2))
1599 return bytes(self._chunk(rev2))
1600
1600
1601 return mdiff.textdiff(self.rawdata(rev1),
1601 return mdiff.textdiff(self.rawdata(rev1),
1602 self.rawdata(rev2))
1602 self.rawdata(rev2))
1603
1603
1604 def revision(self, nodeorrev, _df=None, raw=False):
1604 def revision(self, nodeorrev, _df=None, raw=False):
1605 """return an uncompressed revision of a given node or revision
1605 """return an uncompressed revision of a given node or revision
1606 number.
1606 number.
1607
1607
1608 _df - an existing file handle to read from. (internal-only)
1608 _df - an existing file handle to read from. (internal-only)
1609 raw - an optional argument specifying if the revision data is to be
1609 raw - an optional argument specifying if the revision data is to be
1610 treated as raw data when applying flag transforms. 'raw' should be set
1610 treated as raw data when applying flag transforms. 'raw' should be set
1611 to True when generating changegroups or in debug commands.
1611 to True when generating changegroups or in debug commands.
1612 """
1612 """
1613 if raw:
1613 if raw:
1614 msg = ('revlog.revision(..., raw=True) is deprecated, '
1614 msg = ('revlog.revision(..., raw=True) is deprecated, '
1615 'use revlog.rawdata(...)')
1615 'use revlog.rawdata(...)')
1616 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1616 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1617 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1617 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1618
1618
1619 def sidedata(self, nodeorrev, _df=None):
1619 def sidedata(self, nodeorrev, _df=None):
1620 """a map of extra data related to the changeset but not part of the hash
1620 """a map of extra data related to the changeset but not part of the hash
1621
1621
1622 This function currently return a dictionary. However, more advanced
1622 This function currently return a dictionary. However, more advanced
1623 mapping object will likely be used in the future for a more
1623 mapping object will likely be used in the future for a more
1624 efficient/lazy code.
1624 efficient/lazy code.
1625 """
1625 """
1626 return self._revisiondata(nodeorrev, _df)[1]
1626 return self._revisiondata(nodeorrev, _df)[1]
1627
1627
1628 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1628 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1629 # deal with <nodeorrev> argument type
1629 # deal with <nodeorrev> argument type
1630 if isinstance(nodeorrev, int):
1630 if isinstance(nodeorrev, int):
1631 rev = nodeorrev
1631 rev = nodeorrev
1632 node = self.node(rev)
1632 node = self.node(rev)
1633 else:
1633 else:
1634 node = nodeorrev
1634 node = nodeorrev
1635 rev = None
1635 rev = None
1636
1636
1637 # fast path the special `nullid` rev
1637 # fast path the special `nullid` rev
1638 if node == nullid:
1638 if node == nullid:
1639 return "", {}
1639 return "", {}
1640
1640
1641 # The text as stored inside the revlog. Might be the revision or might
1641 # The text as stored inside the revlog. Might be the revision or might
1642 # need to be processed to retrieve the revision.
1642 # need to be processed to retrieve the revision.
1643 rawtext = None
1643 rawtext = None
1644
1644
1645 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1645 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1646
1646
1647 if raw and validated:
1647 if raw and validated:
1648 # if we don't want to process the raw text and that raw
1648 # if we don't want to process the raw text and that raw
1649 # text is cached, we can exit early.
1649 # text is cached, we can exit early.
1650 return rawtext, {}
1650 return rawtext, {}
1651 if rev is None:
1651 if rev is None:
1652 rev = self.rev(node)
1652 rev = self.rev(node)
1653 # the revlog's flag for this revision
1653 # the revlog's flag for this revision
1654 # (usually alter its state or content)
1654 # (usually alter its state or content)
1655 flags = self.flags(rev)
1655 flags = self.flags(rev)
1656
1656
1657 if validated and flags == REVIDX_DEFAULT_FLAGS:
1657 if validated and flags == REVIDX_DEFAULT_FLAGS:
1658 # no extra flags set, no flag processor runs, text = rawtext
1658 # no extra flags set, no flag processor runs, text = rawtext
1659 return rawtext, {}
1659 return rawtext, {}
1660
1660
1661 sidedata = {}
1661 sidedata = {}
1662 if raw:
1662 if raw:
1663 validatehash = self._processflagsraw(rawtext, flags)
1663 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1664 text = rawtext
1664 text = rawtext
1665 else:
1665 else:
1666 r = flagutil.processflagsread(self, rawtext, flags)
1666 r = flagutil.processflagsread(self, rawtext, flags)
1667 text, validatehash, sidedata = r
1667 text, validatehash, sidedata = r
1668 if validatehash:
1668 if validatehash:
1669 self.checkhash(text, node, rev=rev)
1669 self.checkhash(text, node, rev=rev)
1670 if not validated:
1670 if not validated:
1671 self._revisioncache = (node, rev, rawtext)
1671 self._revisioncache = (node, rev, rawtext)
1672
1672
1673 return text, sidedata
1673 return text, sidedata
1674
1674
1675 def _rawtext(self, node, rev, _df=None):
1675 def _rawtext(self, node, rev, _df=None):
1676 """return the possibly unvalidated rawtext for a revision
1676 """return the possibly unvalidated rawtext for a revision
1677
1677
1678 returns (rev, rawtext, validated)
1678 returns (rev, rawtext, validated)
1679 """
1679 """
1680
1680
1681 # revision in the cache (could be useful to apply delta)
1681 # revision in the cache (could be useful to apply delta)
1682 cachedrev = None
1682 cachedrev = None
1683 # An intermediate text to apply deltas to
1683 # An intermediate text to apply deltas to
1684 basetext = None
1684 basetext = None
1685
1685
1686 # Check if we have the entry in cache
1686 # Check if we have the entry in cache
1687 # The cache entry looks like (node, rev, rawtext)
1687 # The cache entry looks like (node, rev, rawtext)
1688 if self._revisioncache:
1688 if self._revisioncache:
1689 if self._revisioncache[0] == node:
1689 if self._revisioncache[0] == node:
1690 return (rev, self._revisioncache[2], True)
1690 return (rev, self._revisioncache[2], True)
1691 cachedrev = self._revisioncache[1]
1691 cachedrev = self._revisioncache[1]
1692
1692
1693 if rev is None:
1693 if rev is None:
1694 rev = self.rev(node)
1694 rev = self.rev(node)
1695
1695
1696 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1696 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1697 if stopped:
1697 if stopped:
1698 basetext = self._revisioncache[2]
1698 basetext = self._revisioncache[2]
1699
1699
1700 # drop cache to save memory, the caller is expected to
1700 # drop cache to save memory, the caller is expected to
1701 # update self._revisioncache after validating the text
1701 # update self._revisioncache after validating the text
1702 self._revisioncache = None
1702 self._revisioncache = None
1703
1703
1704 targetsize = None
1704 targetsize = None
1705 rawsize = self.index[rev][2]
1705 rawsize = self.index[rev][2]
1706 if 0 <= rawsize:
1706 if 0 <= rawsize:
1707 targetsize = 4 * rawsize
1707 targetsize = 4 * rawsize
1708
1708
1709 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1709 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1710 if basetext is None:
1710 if basetext is None:
1711 basetext = bytes(bins[0])
1711 basetext = bytes(bins[0])
1712 bins = bins[1:]
1712 bins = bins[1:]
1713
1713
1714 rawtext = mdiff.patches(basetext, bins)
1714 rawtext = mdiff.patches(basetext, bins)
1715 del basetext # let us have a chance to free memory early
1715 del basetext # let us have a chance to free memory early
1716 return (rev, rawtext, False)
1716 return (rev, rawtext, False)
1717
1717
1718 def rawdata(self, nodeorrev, _df=None):
1718 def rawdata(self, nodeorrev, _df=None):
1719 """return an uncompressed raw data of a given node or revision number.
1719 """return an uncompressed raw data of a given node or revision number.
1720
1720
1721 _df - an existing file handle to read from. (internal-only)
1721 _df - an existing file handle to read from. (internal-only)
1722 """
1722 """
1723 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1723 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1724
1724
1725 def hash(self, text, p1, p2):
1725 def hash(self, text, p1, p2):
1726 """Compute a node hash.
1726 """Compute a node hash.
1727
1727
1728 Available as a function so that subclasses can replace the hash
1728 Available as a function so that subclasses can replace the hash
1729 as needed.
1729 as needed.
1730 """
1730 """
1731 return storageutil.hashrevisionsha1(text, p1, p2)
1731 return storageutil.hashrevisionsha1(text, p1, p2)
1732
1732
1733 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1733 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1734 """Check node hash integrity.
1734 """Check node hash integrity.
1735
1735
1736 Available as a function so that subclasses can extend hash mismatch
1736 Available as a function so that subclasses can extend hash mismatch
1737 behaviors as needed.
1737 behaviors as needed.
1738 """
1738 """
1739 try:
1739 try:
1740 if p1 is None and p2 is None:
1740 if p1 is None and p2 is None:
1741 p1, p2 = self.parents(node)
1741 p1, p2 = self.parents(node)
1742 if node != self.hash(text, p1, p2):
1742 if node != self.hash(text, p1, p2):
1743 # Clear the revision cache on hash failure. The revision cache
1743 # Clear the revision cache on hash failure. The revision cache
1744 # only stores the raw revision and clearing the cache does have
1744 # only stores the raw revision and clearing the cache does have
1745 # the side-effect that we won't have a cache hit when the raw
1745 # the side-effect that we won't have a cache hit when the raw
1746 # revision data is accessed. But this case should be rare and
1746 # revision data is accessed. But this case should be rare and
1747 # it is extra work to teach the cache about the hash
1747 # it is extra work to teach the cache about the hash
1748 # verification state.
1748 # verification state.
1749 if self._revisioncache and self._revisioncache[0] == node:
1749 if self._revisioncache and self._revisioncache[0] == node:
1750 self._revisioncache = None
1750 self._revisioncache = None
1751
1751
1752 revornode = rev
1752 revornode = rev
1753 if revornode is None:
1753 if revornode is None:
1754 revornode = templatefilters.short(hex(node))
1754 revornode = templatefilters.short(hex(node))
1755 raise error.RevlogError(_("integrity check failed on %s:%s")
1755 raise error.RevlogError(_("integrity check failed on %s:%s")
1756 % (self.indexfile, pycompat.bytestr(revornode)))
1756 % (self.indexfile, pycompat.bytestr(revornode)))
1757 except error.RevlogError:
1757 except error.RevlogError:
1758 if self._censorable and storageutil.iscensoredtext(text):
1758 if self._censorable and storageutil.iscensoredtext(text):
1759 raise error.CensoredNodeError(self.indexfile, node, text)
1759 raise error.CensoredNodeError(self.indexfile, node, text)
1760 raise
1760 raise
1761
1761
1762 def _enforceinlinesize(self, tr, fp=None):
1762 def _enforceinlinesize(self, tr, fp=None):
1763 """Check if the revlog is too big for inline and convert if so.
1763 """Check if the revlog is too big for inline and convert if so.
1764
1764
1765 This should be called after revisions are added to the revlog. If the
1765 This should be called after revisions are added to the revlog. If the
1766 revlog has grown too large to be an inline revlog, it will convert it
1766 revlog has grown too large to be an inline revlog, it will convert it
1767 to use multiple index and data files.
1767 to use multiple index and data files.
1768 """
1768 """
1769 tiprev = len(self) - 1
1769 tiprev = len(self) - 1
1770 if (not self._inline or
1770 if (not self._inline or
1771 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1771 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1772 return
1772 return
1773
1773
1774 trinfo = tr.find(self.indexfile)
1774 trinfo = tr.find(self.indexfile)
1775 if trinfo is None:
1775 if trinfo is None:
1776 raise error.RevlogError(_("%s not found in the transaction")
1776 raise error.RevlogError(_("%s not found in the transaction")
1777 % self.indexfile)
1777 % self.indexfile)
1778
1778
1779 trindex = trinfo[2]
1779 trindex = trinfo[2]
1780 if trindex is not None:
1780 if trindex is not None:
1781 dataoff = self.start(trindex)
1781 dataoff = self.start(trindex)
1782 else:
1782 else:
1783 # revlog was stripped at start of transaction, use all leftover data
1783 # revlog was stripped at start of transaction, use all leftover data
1784 trindex = len(self) - 1
1784 trindex = len(self) - 1
1785 dataoff = self.end(tiprev)
1785 dataoff = self.end(tiprev)
1786
1786
1787 tr.add(self.datafile, dataoff)
1787 tr.add(self.datafile, dataoff)
1788
1788
1789 if fp:
1789 if fp:
1790 fp.flush()
1790 fp.flush()
1791 fp.close()
1791 fp.close()
1792 # We can't use the cached file handle after close(). So prevent
1792 # We can't use the cached file handle after close(). So prevent
1793 # its usage.
1793 # its usage.
1794 self._writinghandles = None
1794 self._writinghandles = None
1795
1795
1796 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1796 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1797 for r in self:
1797 for r in self:
1798 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1798 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1799
1799
1800 with self._indexfp('w') as fp:
1800 with self._indexfp('w') as fp:
1801 self.version &= ~FLAG_INLINE_DATA
1801 self.version &= ~FLAG_INLINE_DATA
1802 self._inline = False
1802 self._inline = False
1803 io = self._io
1803 io = self._io
1804 for i in self:
1804 for i in self:
1805 e = io.packentry(self.index[i], self.node, self.version, i)
1805 e = io.packentry(self.index[i], self.node, self.version, i)
1806 fp.write(e)
1806 fp.write(e)
1807
1807
1808 # the temp file replace the real index when we exit the context
1808 # the temp file replace the real index when we exit the context
1809 # manager
1809 # manager
1810
1810
1811 tr.replace(self.indexfile, trindex * self._io.size)
1811 tr.replace(self.indexfile, trindex * self._io.size)
1812 self._chunkclear()
1812 self._chunkclear()
1813
1813
1814 def _nodeduplicatecallback(self, transaction, node):
1814 def _nodeduplicatecallback(self, transaction, node):
1815 """called when trying to add a node already stored.
1815 """called when trying to add a node already stored.
1816 """
1816 """
1817
1817
1818 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1818 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1819 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1819 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1820 sidedata=None):
1820 sidedata=None):
1821 """add a revision to the log
1821 """add a revision to the log
1822
1822
1823 text - the revision data to add
1823 text - the revision data to add
1824 transaction - the transaction object used for rollback
1824 transaction - the transaction object used for rollback
1825 link - the linkrev data to add
1825 link - the linkrev data to add
1826 p1, p2 - the parent nodeids of the revision
1826 p1, p2 - the parent nodeids of the revision
1827 cachedelta - an optional precomputed delta
1827 cachedelta - an optional precomputed delta
1828 node - nodeid of revision; typically node is not specified, and it is
1828 node - nodeid of revision; typically node is not specified, and it is
1829 computed by default as hash(text, p1, p2), however subclasses might
1829 computed by default as hash(text, p1, p2), however subclasses might
1830 use different hashing method (and override checkhash() in such case)
1830 use different hashing method (and override checkhash() in such case)
1831 flags - the known flags to set on the revision
1831 flags - the known flags to set on the revision
1832 deltacomputer - an optional deltacomputer instance shared between
1832 deltacomputer - an optional deltacomputer instance shared between
1833 multiple calls
1833 multiple calls
1834 """
1834 """
1835 if link == nullrev:
1835 if link == nullrev:
1836 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1836 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1837 % self.indexfile)
1837 % self.indexfile)
1838
1838
1839 if sidedata is None:
1839 if sidedata is None:
1840 sidedata = {}
1840 sidedata = {}
1841
1841
1842 if flags:
1842 if flags:
1843 node = node or self.hash(text, p1, p2)
1843 node = node or self.hash(text, p1, p2)
1844
1844
1845 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1845 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1846 sidedata=sidedata)
1846 sidedata=sidedata)
1847
1847
1848 # If the flag processor modifies the revision data, ignore any provided
1848 # If the flag processor modifies the revision data, ignore any provided
1849 # cachedelta.
1849 # cachedelta.
1850 if rawtext != text:
1850 if rawtext != text:
1851 cachedelta = None
1851 cachedelta = None
1852
1852
1853 if len(rawtext) > _maxentrysize:
1853 if len(rawtext) > _maxentrysize:
1854 raise error.RevlogError(
1854 raise error.RevlogError(
1855 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1855 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1856 % (self.indexfile, len(rawtext)))
1856 % (self.indexfile, len(rawtext)))
1857
1857
1858 node = node or self.hash(rawtext, p1, p2)
1858 node = node or self.hash(rawtext, p1, p2)
1859 if node in self.nodemap:
1859 if node in self.nodemap:
1860 return node
1860 return node
1861
1861
1862 if validatehash:
1862 if validatehash:
1863 self.checkhash(rawtext, node, p1=p1, p2=p2)
1863 self.checkhash(rawtext, node, p1=p1, p2=p2)
1864
1864
1865 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1865 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1866 flags, cachedelta=cachedelta,
1866 flags, cachedelta=cachedelta,
1867 deltacomputer=deltacomputer)
1867 deltacomputer=deltacomputer)
1868
1868
1869 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1869 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1870 cachedelta=None, deltacomputer=None):
1870 cachedelta=None, deltacomputer=None):
1871 """add a raw revision with known flags, node and parents
1871 """add a raw revision with known flags, node and parents
1872 useful when reusing a revision not stored in this revlog (ex: received
1872 useful when reusing a revision not stored in this revlog (ex: received
1873 over wire, or read from an external bundle).
1873 over wire, or read from an external bundle).
1874 """
1874 """
1875 dfh = None
1875 dfh = None
1876 if not self._inline:
1876 if not self._inline:
1877 dfh = self._datafp("a+")
1877 dfh = self._datafp("a+")
1878 ifh = self._indexfp("a+")
1878 ifh = self._indexfp("a+")
1879 try:
1879 try:
1880 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1880 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1881 flags, cachedelta, ifh, dfh,
1881 flags, cachedelta, ifh, dfh,
1882 deltacomputer=deltacomputer)
1882 deltacomputer=deltacomputer)
1883 finally:
1883 finally:
1884 if dfh:
1884 if dfh:
1885 dfh.close()
1885 dfh.close()
1886 ifh.close()
1886 ifh.close()
1887
1887
1888 def compress(self, data):
1888 def compress(self, data):
1889 """Generate a possibly-compressed representation of data."""
1889 """Generate a possibly-compressed representation of data."""
1890 if not data:
1890 if not data:
1891 return '', data
1891 return '', data
1892
1892
1893 compressed = self._compressor.compress(data)
1893 compressed = self._compressor.compress(data)
1894
1894
1895 if compressed:
1895 if compressed:
1896 # The revlog compressor added the header in the returned data.
1896 # The revlog compressor added the header in the returned data.
1897 return '', compressed
1897 return '', compressed
1898
1898
1899 if data[0:1] == '\0':
1899 if data[0:1] == '\0':
1900 return '', data
1900 return '', data
1901 return 'u', data
1901 return 'u', data
1902
1902
1903 def decompress(self, data):
1903 def decompress(self, data):
1904 """Decompress a revlog chunk.
1904 """Decompress a revlog chunk.
1905
1905
1906 The chunk is expected to begin with a header identifying the
1906 The chunk is expected to begin with a header identifying the
1907 format type so it can be routed to an appropriate decompressor.
1907 format type so it can be routed to an appropriate decompressor.
1908 """
1908 """
1909 if not data:
1909 if not data:
1910 return data
1910 return data
1911
1911
1912 # Revlogs are read much more frequently than they are written and many
1912 # Revlogs are read much more frequently than they are written and many
1913 # chunks only take microseconds to decompress, so performance is
1913 # chunks only take microseconds to decompress, so performance is
1914 # important here.
1914 # important here.
1915 #
1915 #
1916 # We can make a few assumptions about revlogs:
1916 # We can make a few assumptions about revlogs:
1917 #
1917 #
1918 # 1) the majority of chunks will be compressed (as opposed to inline
1918 # 1) the majority of chunks will be compressed (as opposed to inline
1919 # raw data).
1919 # raw data).
1920 # 2) decompressing *any* data will likely by at least 10x slower than
1920 # 2) decompressing *any* data will likely by at least 10x slower than
1921 # returning raw inline data.
1921 # returning raw inline data.
1922 # 3) we want to prioritize common and officially supported compression
1922 # 3) we want to prioritize common and officially supported compression
1923 # engines
1923 # engines
1924 #
1924 #
1925 # It follows that we want to optimize for "decompress compressed data
1925 # It follows that we want to optimize for "decompress compressed data
1926 # when encoded with common and officially supported compression engines"
1926 # when encoded with common and officially supported compression engines"
1927 # case over "raw data" and "data encoded by less common or non-official
1927 # case over "raw data" and "data encoded by less common or non-official
1928 # compression engines." That is why we have the inline lookup first
1928 # compression engines." That is why we have the inline lookup first
1929 # followed by the compengines lookup.
1929 # followed by the compengines lookup.
1930 #
1930 #
1931 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1931 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1932 # compressed chunks. And this matters for changelog and manifest reads.
1932 # compressed chunks. And this matters for changelog and manifest reads.
1933 t = data[0:1]
1933 t = data[0:1]
1934
1934
1935 if t == 'x':
1935 if t == 'x':
1936 try:
1936 try:
1937 return _zlibdecompress(data)
1937 return _zlibdecompress(data)
1938 except zlib.error as e:
1938 except zlib.error as e:
1939 raise error.RevlogError(_('revlog decompress error: %s') %
1939 raise error.RevlogError(_('revlog decompress error: %s') %
1940 stringutil.forcebytestr(e))
1940 stringutil.forcebytestr(e))
1941 # '\0' is more common than 'u' so it goes first.
1941 # '\0' is more common than 'u' so it goes first.
1942 elif t == '\0':
1942 elif t == '\0':
1943 return data
1943 return data
1944 elif t == 'u':
1944 elif t == 'u':
1945 return util.buffer(data, 1)
1945 return util.buffer(data, 1)
1946
1946
1947 try:
1947 try:
1948 compressor = self._decompressors[t]
1948 compressor = self._decompressors[t]
1949 except KeyError:
1949 except KeyError:
1950 try:
1950 try:
1951 engine = util.compengines.forrevlogheader(t)
1951 engine = util.compengines.forrevlogheader(t)
1952 compressor = engine.revlogcompressor(self._compengineopts)
1952 compressor = engine.revlogcompressor(self._compengineopts)
1953 self._decompressors[t] = compressor
1953 self._decompressors[t] = compressor
1954 except KeyError:
1954 except KeyError:
1955 raise error.RevlogError(_('unknown compression type %r') % t)
1955 raise error.RevlogError(_('unknown compression type %r') % t)
1956
1956
1957 return compressor.decompress(data)
1957 return compressor.decompress(data)
1958
1958
1959 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1959 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1960 cachedelta, ifh, dfh, alwayscache=False,
1960 cachedelta, ifh, dfh, alwayscache=False,
1961 deltacomputer=None):
1961 deltacomputer=None):
1962 """internal function to add revisions to the log
1962 """internal function to add revisions to the log
1963
1963
1964 see addrevision for argument descriptions.
1964 see addrevision for argument descriptions.
1965
1965
1966 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1966 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1967
1967
1968 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1968 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1969 be used.
1969 be used.
1970
1970
1971 invariants:
1971 invariants:
1972 - rawtext is optional (can be None); if not set, cachedelta must be set.
1972 - rawtext is optional (can be None); if not set, cachedelta must be set.
1973 if both are set, they must correspond to each other.
1973 if both are set, they must correspond to each other.
1974 """
1974 """
1975 if node == nullid:
1975 if node == nullid:
1976 raise error.RevlogError(_("%s: attempt to add null revision") %
1976 raise error.RevlogError(_("%s: attempt to add null revision") %
1977 self.indexfile)
1977 self.indexfile)
1978 if node == wdirid or node in wdirfilenodeids:
1978 if node == wdirid or node in wdirfilenodeids:
1979 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1979 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1980 self.indexfile)
1980 self.indexfile)
1981
1981
1982 if self._inline:
1982 if self._inline:
1983 fh = ifh
1983 fh = ifh
1984 else:
1984 else:
1985 fh = dfh
1985 fh = dfh
1986
1986
1987 btext = [rawtext]
1987 btext = [rawtext]
1988
1988
1989 curr = len(self)
1989 curr = len(self)
1990 prev = curr - 1
1990 prev = curr - 1
1991 offset = self.end(prev)
1991 offset = self.end(prev)
1992 p1r, p2r = self.rev(p1), self.rev(p2)
1992 p1r, p2r = self.rev(p1), self.rev(p2)
1993
1993
1994 # full versions are inserted when the needed deltas
1994 # full versions are inserted when the needed deltas
1995 # become comparable to the uncompressed text
1995 # become comparable to the uncompressed text
1996 if rawtext is None:
1996 if rawtext is None:
1997 # need rawtext size, before changed by flag processors, which is
1997 # need rawtext size, before changed by flag processors, which is
1998 # the non-raw size. use revlog explicitly to avoid filelog's extra
1998 # the non-raw size. use revlog explicitly to avoid filelog's extra
1999 # logic that might remove metadata size.
1999 # logic that might remove metadata size.
2000 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2000 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2001 cachedelta[1])
2001 cachedelta[1])
2002 else:
2002 else:
2003 textlen = len(rawtext)
2003 textlen = len(rawtext)
2004
2004
2005 if deltacomputer is None:
2005 if deltacomputer is None:
2006 deltacomputer = deltautil.deltacomputer(self)
2006 deltacomputer = deltautil.deltacomputer(self)
2007
2007
2008 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2008 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2009
2009
2010 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2010 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2011
2011
2012 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2012 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2013 deltainfo.base, link, p1r, p2r, node)
2013 deltainfo.base, link, p1r, p2r, node)
2014 self.index.append(e)
2014 self.index.append(e)
2015 self.nodemap[node] = curr
2015 self.nodemap[node] = curr
2016
2016
2017 # Reset the pure node cache start lookup offset to account for new
2017 # Reset the pure node cache start lookup offset to account for new
2018 # revision.
2018 # revision.
2019 if self._nodepos is not None:
2019 if self._nodepos is not None:
2020 self._nodepos = curr
2020 self._nodepos = curr
2021
2021
2022 entry = self._io.packentry(e, self.node, self.version, curr)
2022 entry = self._io.packentry(e, self.node, self.version, curr)
2023 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2023 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2024 link, offset)
2024 link, offset)
2025
2025
2026 rawtext = btext[0]
2026 rawtext = btext[0]
2027
2027
2028 if alwayscache and rawtext is None:
2028 if alwayscache and rawtext is None:
2029 rawtext = deltacomputer.buildtext(revinfo, fh)
2029 rawtext = deltacomputer.buildtext(revinfo, fh)
2030
2030
2031 if type(rawtext) == bytes: # only accept immutable objects
2031 if type(rawtext) == bytes: # only accept immutable objects
2032 self._revisioncache = (node, curr, rawtext)
2032 self._revisioncache = (node, curr, rawtext)
2033 self._chainbasecache[curr] = deltainfo.chainbase
2033 self._chainbasecache[curr] = deltainfo.chainbase
2034 return node
2034 return node
2035
2035
2036 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2036 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2037 # Files opened in a+ mode have inconsistent behavior on various
2037 # Files opened in a+ mode have inconsistent behavior on various
2038 # platforms. Windows requires that a file positioning call be made
2038 # platforms. Windows requires that a file positioning call be made
2039 # when the file handle transitions between reads and writes. See
2039 # when the file handle transitions between reads and writes. See
2040 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2040 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2041 # platforms, Python or the platform itself can be buggy. Some versions
2041 # platforms, Python or the platform itself can be buggy. Some versions
2042 # of Solaris have been observed to not append at the end of the file
2042 # of Solaris have been observed to not append at the end of the file
2043 # if the file was seeked to before the end. See issue4943 for more.
2043 # if the file was seeked to before the end. See issue4943 for more.
2044 #
2044 #
2045 # We work around this issue by inserting a seek() before writing.
2045 # We work around this issue by inserting a seek() before writing.
2046 # Note: This is likely not necessary on Python 3. However, because
2046 # Note: This is likely not necessary on Python 3. However, because
2047 # the file handle is reused for reads and may be seeked there, we need
2047 # the file handle is reused for reads and may be seeked there, we need
2048 # to be careful before changing this.
2048 # to be careful before changing this.
2049 ifh.seek(0, os.SEEK_END)
2049 ifh.seek(0, os.SEEK_END)
2050 if dfh:
2050 if dfh:
2051 dfh.seek(0, os.SEEK_END)
2051 dfh.seek(0, os.SEEK_END)
2052
2052
2053 curr = len(self) - 1
2053 curr = len(self) - 1
2054 if not self._inline:
2054 if not self._inline:
2055 transaction.add(self.datafile, offset)
2055 transaction.add(self.datafile, offset)
2056 transaction.add(self.indexfile, curr * len(entry))
2056 transaction.add(self.indexfile, curr * len(entry))
2057 if data[0]:
2057 if data[0]:
2058 dfh.write(data[0])
2058 dfh.write(data[0])
2059 dfh.write(data[1])
2059 dfh.write(data[1])
2060 ifh.write(entry)
2060 ifh.write(entry)
2061 else:
2061 else:
2062 offset += curr * self._io.size
2062 offset += curr * self._io.size
2063 transaction.add(self.indexfile, offset, curr)
2063 transaction.add(self.indexfile, offset, curr)
2064 ifh.write(entry)
2064 ifh.write(entry)
2065 ifh.write(data[0])
2065 ifh.write(data[0])
2066 ifh.write(data[1])
2066 ifh.write(data[1])
2067 self._enforceinlinesize(transaction, ifh)
2067 self._enforceinlinesize(transaction, ifh)
2068
2068
2069 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2069 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2070 """
2070 """
2071 add a delta group
2071 add a delta group
2072
2072
2073 given a set of deltas, add them to the revision log. the
2073 given a set of deltas, add them to the revision log. the
2074 first delta is against its parent, which should be in our
2074 first delta is against its parent, which should be in our
2075 log, the rest are against the previous delta.
2075 log, the rest are against the previous delta.
2076
2076
2077 If ``addrevisioncb`` is defined, it will be called with arguments of
2077 If ``addrevisioncb`` is defined, it will be called with arguments of
2078 this revlog and the node that was added.
2078 this revlog and the node that was added.
2079 """
2079 """
2080
2080
2081 if self._writinghandles:
2081 if self._writinghandles:
2082 raise error.ProgrammingError('cannot nest addgroup() calls')
2082 raise error.ProgrammingError('cannot nest addgroup() calls')
2083
2083
2084 nodes = []
2084 nodes = []
2085
2085
2086 r = len(self)
2086 r = len(self)
2087 end = 0
2087 end = 0
2088 if r:
2088 if r:
2089 end = self.end(r - 1)
2089 end = self.end(r - 1)
2090 ifh = self._indexfp("a+")
2090 ifh = self._indexfp("a+")
2091 isize = r * self._io.size
2091 isize = r * self._io.size
2092 if self._inline:
2092 if self._inline:
2093 transaction.add(self.indexfile, end + isize, r)
2093 transaction.add(self.indexfile, end + isize, r)
2094 dfh = None
2094 dfh = None
2095 else:
2095 else:
2096 transaction.add(self.indexfile, isize, r)
2096 transaction.add(self.indexfile, isize, r)
2097 transaction.add(self.datafile, end)
2097 transaction.add(self.datafile, end)
2098 dfh = self._datafp("a+")
2098 dfh = self._datafp("a+")
2099 def flush():
2099 def flush():
2100 if dfh:
2100 if dfh:
2101 dfh.flush()
2101 dfh.flush()
2102 ifh.flush()
2102 ifh.flush()
2103
2103
2104 self._writinghandles = (ifh, dfh)
2104 self._writinghandles = (ifh, dfh)
2105
2105
2106 try:
2106 try:
2107 deltacomputer = deltautil.deltacomputer(self)
2107 deltacomputer = deltautil.deltacomputer(self)
2108 # loop through our set of deltas
2108 # loop through our set of deltas
2109 for data in deltas:
2109 for data in deltas:
2110 node, p1, p2, linknode, deltabase, delta, flags = data
2110 node, p1, p2, linknode, deltabase, delta, flags = data
2111 link = linkmapper(linknode)
2111 link = linkmapper(linknode)
2112 flags = flags or REVIDX_DEFAULT_FLAGS
2112 flags = flags or REVIDX_DEFAULT_FLAGS
2113
2113
2114 nodes.append(node)
2114 nodes.append(node)
2115
2115
2116 if node in self.nodemap:
2116 if node in self.nodemap:
2117 self._nodeduplicatecallback(transaction, node)
2117 self._nodeduplicatecallback(transaction, node)
2118 # this can happen if two branches make the same change
2118 # this can happen if two branches make the same change
2119 continue
2119 continue
2120
2120
2121 for p in (p1, p2):
2121 for p in (p1, p2):
2122 if p not in self.nodemap:
2122 if p not in self.nodemap:
2123 raise error.LookupError(p, self.indexfile,
2123 raise error.LookupError(p, self.indexfile,
2124 _('unknown parent'))
2124 _('unknown parent'))
2125
2125
2126 if deltabase not in self.nodemap:
2126 if deltabase not in self.nodemap:
2127 raise error.LookupError(deltabase, self.indexfile,
2127 raise error.LookupError(deltabase, self.indexfile,
2128 _('unknown delta base'))
2128 _('unknown delta base'))
2129
2129
2130 baserev = self.rev(deltabase)
2130 baserev = self.rev(deltabase)
2131
2131
2132 if baserev != nullrev and self.iscensored(baserev):
2132 if baserev != nullrev and self.iscensored(baserev):
2133 # if base is censored, delta must be full replacement in a
2133 # if base is censored, delta must be full replacement in a
2134 # single patch operation
2134 # single patch operation
2135 hlen = struct.calcsize(">lll")
2135 hlen = struct.calcsize(">lll")
2136 oldlen = self.rawsize(baserev)
2136 oldlen = self.rawsize(baserev)
2137 newlen = len(delta) - hlen
2137 newlen = len(delta) - hlen
2138 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2138 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2139 raise error.CensoredBaseError(self.indexfile,
2139 raise error.CensoredBaseError(self.indexfile,
2140 self.node(baserev))
2140 self.node(baserev))
2141
2141
2142 if not flags and self._peek_iscensored(baserev, delta, flush):
2142 if not flags and self._peek_iscensored(baserev, delta, flush):
2143 flags |= REVIDX_ISCENSORED
2143 flags |= REVIDX_ISCENSORED
2144
2144
2145 # We assume consumers of addrevisioncb will want to retrieve
2145 # We assume consumers of addrevisioncb will want to retrieve
2146 # the added revision, which will require a call to
2146 # the added revision, which will require a call to
2147 # revision(). revision() will fast path if there is a cache
2147 # revision(). revision() will fast path if there is a cache
2148 # hit. So, we tell _addrevision() to always cache in this case.
2148 # hit. So, we tell _addrevision() to always cache in this case.
2149 # We're only using addgroup() in the context of changegroup
2149 # We're only using addgroup() in the context of changegroup
2150 # generation so the revision data can always be handled as raw
2150 # generation so the revision data can always be handled as raw
2151 # by the flagprocessor.
2151 # by the flagprocessor.
2152 self._addrevision(node, None, transaction, link,
2152 self._addrevision(node, None, transaction, link,
2153 p1, p2, flags, (baserev, delta),
2153 p1, p2, flags, (baserev, delta),
2154 ifh, dfh,
2154 ifh, dfh,
2155 alwayscache=bool(addrevisioncb),
2155 alwayscache=bool(addrevisioncb),
2156 deltacomputer=deltacomputer)
2156 deltacomputer=deltacomputer)
2157
2157
2158 if addrevisioncb:
2158 if addrevisioncb:
2159 addrevisioncb(self, node)
2159 addrevisioncb(self, node)
2160
2160
2161 if not dfh and not self._inline:
2161 if not dfh and not self._inline:
2162 # addrevision switched from inline to conventional
2162 # addrevision switched from inline to conventional
2163 # reopen the index
2163 # reopen the index
2164 ifh.close()
2164 ifh.close()
2165 dfh = self._datafp("a+")
2165 dfh = self._datafp("a+")
2166 ifh = self._indexfp("a+")
2166 ifh = self._indexfp("a+")
2167 self._writinghandles = (ifh, dfh)
2167 self._writinghandles = (ifh, dfh)
2168 finally:
2168 finally:
2169 self._writinghandles = None
2169 self._writinghandles = None
2170
2170
2171 if dfh:
2171 if dfh:
2172 dfh.close()
2172 dfh.close()
2173 ifh.close()
2173 ifh.close()
2174
2174
2175 return nodes
2175 return nodes
2176
2176
2177 def iscensored(self, rev):
2177 def iscensored(self, rev):
2178 """Check if a file revision is censored."""
2178 """Check if a file revision is censored."""
2179 if not self._censorable:
2179 if not self._censorable:
2180 return False
2180 return False
2181
2181
2182 return self.flags(rev) & REVIDX_ISCENSORED
2182 return self.flags(rev) & REVIDX_ISCENSORED
2183
2183
2184 def _peek_iscensored(self, baserev, delta, flush):
2184 def _peek_iscensored(self, baserev, delta, flush):
2185 """Quickly check if a delta produces a censored revision."""
2185 """Quickly check if a delta produces a censored revision."""
2186 if not self._censorable:
2186 if not self._censorable:
2187 return False
2187 return False
2188
2188
2189 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2189 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2190
2190
2191 def getstrippoint(self, minlink):
2191 def getstrippoint(self, minlink):
2192 """find the minimum rev that must be stripped to strip the linkrev
2192 """find the minimum rev that must be stripped to strip the linkrev
2193
2193
2194 Returns a tuple containing the minimum rev and a set of all revs that
2194 Returns a tuple containing the minimum rev and a set of all revs that
2195 have linkrevs that will be broken by this strip.
2195 have linkrevs that will be broken by this strip.
2196 """
2196 """
2197 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2197 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2198 self.headrevs(),
2198 self.headrevs(),
2199 self.linkrev, self.parentrevs)
2199 self.linkrev, self.parentrevs)
2200
2200
2201 def strip(self, minlink, transaction):
2201 def strip(self, minlink, transaction):
2202 """truncate the revlog on the first revision with a linkrev >= minlink
2202 """truncate the revlog on the first revision with a linkrev >= minlink
2203
2203
2204 This function is called when we're stripping revision minlink and
2204 This function is called when we're stripping revision minlink and
2205 its descendants from the repository.
2205 its descendants from the repository.
2206
2206
2207 We have to remove all revisions with linkrev >= minlink, because
2207 We have to remove all revisions with linkrev >= minlink, because
2208 the equivalent changelog revisions will be renumbered after the
2208 the equivalent changelog revisions will be renumbered after the
2209 strip.
2209 strip.
2210
2210
2211 So we truncate the revlog on the first of these revisions, and
2211 So we truncate the revlog on the first of these revisions, and
2212 trust that the caller has saved the revisions that shouldn't be
2212 trust that the caller has saved the revisions that shouldn't be
2213 removed and that it'll re-add them after this truncation.
2213 removed and that it'll re-add them after this truncation.
2214 """
2214 """
2215 if len(self) == 0:
2215 if len(self) == 0:
2216 return
2216 return
2217
2217
2218 rev, _ = self.getstrippoint(minlink)
2218 rev, _ = self.getstrippoint(minlink)
2219 if rev == len(self):
2219 if rev == len(self):
2220 return
2220 return
2221
2221
2222 # first truncate the files on disk
2222 # first truncate the files on disk
2223 end = self.start(rev)
2223 end = self.start(rev)
2224 if not self._inline:
2224 if not self._inline:
2225 transaction.add(self.datafile, end)
2225 transaction.add(self.datafile, end)
2226 end = rev * self._io.size
2226 end = rev * self._io.size
2227 else:
2227 else:
2228 end += rev * self._io.size
2228 end += rev * self._io.size
2229
2229
2230 transaction.add(self.indexfile, end)
2230 transaction.add(self.indexfile, end)
2231
2231
2232 # then reset internal state in memory to forget those revisions
2232 # then reset internal state in memory to forget those revisions
2233 self._revisioncache = None
2233 self._revisioncache = None
2234 self._chaininfocache = {}
2234 self._chaininfocache = {}
2235 self._chunkclear()
2235 self._chunkclear()
2236 for x in pycompat.xrange(rev, len(self)):
2236 for x in pycompat.xrange(rev, len(self)):
2237 del self.nodemap[self.node(x)]
2237 del self.nodemap[self.node(x)]
2238
2238
2239 del self.index[rev:-1]
2239 del self.index[rev:-1]
2240 self._nodepos = None
2240 self._nodepos = None
2241
2241
2242 def checksize(self):
2242 def checksize(self):
2243 """Check size of index and data files
2243 """Check size of index and data files
2244
2244
2245 return a (dd, di) tuple.
2245 return a (dd, di) tuple.
2246 - dd: extra bytes for the "data" file
2246 - dd: extra bytes for the "data" file
2247 - di: extra bytes for the "index" file
2247 - di: extra bytes for the "index" file
2248
2248
2249 A healthy revlog will return (0, 0).
2249 A healthy revlog will return (0, 0).
2250 """
2250 """
2251 expected = 0
2251 expected = 0
2252 if len(self):
2252 if len(self):
2253 expected = max(0, self.end(len(self) - 1))
2253 expected = max(0, self.end(len(self) - 1))
2254
2254
2255 try:
2255 try:
2256 with self._datafp() as f:
2256 with self._datafp() as f:
2257 f.seek(0, io.SEEK_END)
2257 f.seek(0, io.SEEK_END)
2258 actual = f.tell()
2258 actual = f.tell()
2259 dd = actual - expected
2259 dd = actual - expected
2260 except IOError as inst:
2260 except IOError as inst:
2261 if inst.errno != errno.ENOENT:
2261 if inst.errno != errno.ENOENT:
2262 raise
2262 raise
2263 dd = 0
2263 dd = 0
2264
2264
2265 try:
2265 try:
2266 f = self.opener(self.indexfile)
2266 f = self.opener(self.indexfile)
2267 f.seek(0, io.SEEK_END)
2267 f.seek(0, io.SEEK_END)
2268 actual = f.tell()
2268 actual = f.tell()
2269 f.close()
2269 f.close()
2270 s = self._io.size
2270 s = self._io.size
2271 i = max(0, actual // s)
2271 i = max(0, actual // s)
2272 di = actual - (i * s)
2272 di = actual - (i * s)
2273 if self._inline:
2273 if self._inline:
2274 databytes = 0
2274 databytes = 0
2275 for r in self:
2275 for r in self:
2276 databytes += max(0, self.length(r))
2276 databytes += max(0, self.length(r))
2277 dd = 0
2277 dd = 0
2278 di = actual - len(self) * s - databytes
2278 di = actual - len(self) * s - databytes
2279 except IOError as inst:
2279 except IOError as inst:
2280 if inst.errno != errno.ENOENT:
2280 if inst.errno != errno.ENOENT:
2281 raise
2281 raise
2282 di = 0
2282 di = 0
2283
2283
2284 return (dd, di)
2284 return (dd, di)
2285
2285
2286 def files(self):
2286 def files(self):
2287 res = [self.indexfile]
2287 res = [self.indexfile]
2288 if not self._inline:
2288 if not self._inline:
2289 res.append(self.datafile)
2289 res.append(self.datafile)
2290 return res
2290 return res
2291
2291
2292 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2292 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2293 assumehaveparentrevisions=False,
2293 assumehaveparentrevisions=False,
2294 deltamode=repository.CG_DELTAMODE_STD):
2294 deltamode=repository.CG_DELTAMODE_STD):
2295 if nodesorder not in ('nodes', 'storage', 'linear', None):
2295 if nodesorder not in ('nodes', 'storage', 'linear', None):
2296 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2296 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2297 nodesorder)
2297 nodesorder)
2298
2298
2299 if nodesorder is None and not self._generaldelta:
2299 if nodesorder is None and not self._generaldelta:
2300 nodesorder = 'storage'
2300 nodesorder = 'storage'
2301
2301
2302 if (not self._storedeltachains and
2302 if (not self._storedeltachains and
2303 deltamode != repository.CG_DELTAMODE_PREV):
2303 deltamode != repository.CG_DELTAMODE_PREV):
2304 deltamode = repository.CG_DELTAMODE_FULL
2304 deltamode = repository.CG_DELTAMODE_FULL
2305
2305
2306 return storageutil.emitrevisions(
2306 return storageutil.emitrevisions(
2307 self, nodes, nodesorder, revlogrevisiondelta,
2307 self, nodes, nodesorder, revlogrevisiondelta,
2308 deltaparentfn=self.deltaparent,
2308 deltaparentfn=self.deltaparent,
2309 candeltafn=self.candelta,
2309 candeltafn=self.candelta,
2310 rawsizefn=self.rawsize,
2310 rawsizefn=self.rawsize,
2311 revdifffn=self.revdiff,
2311 revdifffn=self.revdiff,
2312 flagsfn=self.flags,
2312 flagsfn=self.flags,
2313 deltamode=deltamode,
2313 deltamode=deltamode,
2314 revisiondata=revisiondata,
2314 revisiondata=revisiondata,
2315 assumehaveparentrevisions=assumehaveparentrevisions)
2315 assumehaveparentrevisions=assumehaveparentrevisions)
2316
2316
2317 DELTAREUSEALWAYS = 'always'
2317 DELTAREUSEALWAYS = 'always'
2318 DELTAREUSESAMEREVS = 'samerevs'
2318 DELTAREUSESAMEREVS = 'samerevs'
2319 DELTAREUSENEVER = 'never'
2319 DELTAREUSENEVER = 'never'
2320
2320
2321 DELTAREUSEFULLADD = 'fulladd'
2321 DELTAREUSEFULLADD = 'fulladd'
2322
2322
2323 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2323 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2324
2324
2325 def clone(self, tr, destrevlog, addrevisioncb=None,
2325 def clone(self, tr, destrevlog, addrevisioncb=None,
2326 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2326 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2327 """Copy this revlog to another, possibly with format changes.
2327 """Copy this revlog to another, possibly with format changes.
2328
2328
2329 The destination revlog will contain the same revisions and nodes.
2329 The destination revlog will contain the same revisions and nodes.
2330 However, it may not be bit-for-bit identical due to e.g. delta encoding
2330 However, it may not be bit-for-bit identical due to e.g. delta encoding
2331 differences.
2331 differences.
2332
2332
2333 The ``deltareuse`` argument control how deltas from the existing revlog
2333 The ``deltareuse`` argument control how deltas from the existing revlog
2334 are preserved in the destination revlog. The argument can have the
2334 are preserved in the destination revlog. The argument can have the
2335 following values:
2335 following values:
2336
2336
2337 DELTAREUSEALWAYS
2337 DELTAREUSEALWAYS
2338 Deltas will always be reused (if possible), even if the destination
2338 Deltas will always be reused (if possible), even if the destination
2339 revlog would not select the same revisions for the delta. This is the
2339 revlog would not select the same revisions for the delta. This is the
2340 fastest mode of operation.
2340 fastest mode of operation.
2341 DELTAREUSESAMEREVS
2341 DELTAREUSESAMEREVS
2342 Deltas will be reused if the destination revlog would pick the same
2342 Deltas will be reused if the destination revlog would pick the same
2343 revisions for the delta. This mode strikes a balance between speed
2343 revisions for the delta. This mode strikes a balance between speed
2344 and optimization.
2344 and optimization.
2345 DELTAREUSENEVER
2345 DELTAREUSENEVER
2346 Deltas will never be reused. This is the slowest mode of execution.
2346 Deltas will never be reused. This is the slowest mode of execution.
2347 This mode can be used to recompute deltas (e.g. if the diff/delta
2347 This mode can be used to recompute deltas (e.g. if the diff/delta
2348 algorithm changes).
2348 algorithm changes).
2349
2349
2350 Delta computation can be slow, so the choice of delta reuse policy can
2350 Delta computation can be slow, so the choice of delta reuse policy can
2351 significantly affect run time.
2351 significantly affect run time.
2352
2352
2353 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2353 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2354 two extremes. Deltas will be reused if they are appropriate. But if the
2354 two extremes. Deltas will be reused if they are appropriate. But if the
2355 delta could choose a better revision, it will do so. This means if you
2355 delta could choose a better revision, it will do so. This means if you
2356 are converting a non-generaldelta revlog to a generaldelta revlog,
2356 are converting a non-generaldelta revlog to a generaldelta revlog,
2357 deltas will be recomputed if the delta's parent isn't a parent of the
2357 deltas will be recomputed if the delta's parent isn't a parent of the
2358 revision.
2358 revision.
2359
2359
2360 In addition to the delta policy, the ``forcedeltabothparents``
2360 In addition to the delta policy, the ``forcedeltabothparents``
2361 argument controls whether to force compute deltas against both parents
2361 argument controls whether to force compute deltas against both parents
2362 for merges. By default, the current default is used.
2362 for merges. By default, the current default is used.
2363 """
2363 """
2364 if deltareuse not in self.DELTAREUSEALL:
2364 if deltareuse not in self.DELTAREUSEALL:
2365 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2365 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2366
2366
2367 if len(destrevlog):
2367 if len(destrevlog):
2368 raise ValueError(_('destination revlog is not empty'))
2368 raise ValueError(_('destination revlog is not empty'))
2369
2369
2370 if getattr(self, 'filteredrevs', None):
2370 if getattr(self, 'filteredrevs', None):
2371 raise ValueError(_('source revlog has filtered revisions'))
2371 raise ValueError(_('source revlog has filtered revisions'))
2372 if getattr(destrevlog, 'filteredrevs', None):
2372 if getattr(destrevlog, 'filteredrevs', None):
2373 raise ValueError(_('destination revlog has filtered revisions'))
2373 raise ValueError(_('destination revlog has filtered revisions'))
2374
2374
2375 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2375 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2376 # if possible.
2376 # if possible.
2377 oldlazydelta = destrevlog._lazydelta
2377 oldlazydelta = destrevlog._lazydelta
2378 oldlazydeltabase = destrevlog._lazydeltabase
2378 oldlazydeltabase = destrevlog._lazydeltabase
2379 oldamd = destrevlog._deltabothparents
2379 oldamd = destrevlog._deltabothparents
2380
2380
2381 try:
2381 try:
2382 if deltareuse == self.DELTAREUSEALWAYS:
2382 if deltareuse == self.DELTAREUSEALWAYS:
2383 destrevlog._lazydeltabase = True
2383 destrevlog._lazydeltabase = True
2384 destrevlog._lazydelta = True
2384 destrevlog._lazydelta = True
2385 elif deltareuse == self.DELTAREUSESAMEREVS:
2385 elif deltareuse == self.DELTAREUSESAMEREVS:
2386 destrevlog._lazydeltabase = False
2386 destrevlog._lazydeltabase = False
2387 destrevlog._lazydelta = True
2387 destrevlog._lazydelta = True
2388 elif deltareuse == self.DELTAREUSENEVER:
2388 elif deltareuse == self.DELTAREUSENEVER:
2389 destrevlog._lazydeltabase = False
2389 destrevlog._lazydeltabase = False
2390 destrevlog._lazydelta = False
2390 destrevlog._lazydelta = False
2391
2391
2392 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2392 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2393
2393
2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2395 index = self.index
2395 index = self.index
2396 for rev in self:
2396 for rev in self:
2397 entry = index[rev]
2397 entry = index[rev]
2398
2398
2399 # Some classes override linkrev to take filtered revs into
2399 # Some classes override linkrev to take filtered revs into
2400 # account. Use raw entry from index.
2400 # account. Use raw entry from index.
2401 flags = entry[0] & 0xffff
2401 flags = entry[0] & 0xffff
2402 linkrev = entry[4]
2402 linkrev = entry[4]
2403 p1 = index[entry[5]][7]
2403 p1 = index[entry[5]][7]
2404 p2 = index[entry[6]][7]
2404 p2 = index[entry[6]][7]
2405 node = entry[7]
2405 node = entry[7]
2406
2406
2407 # (Possibly) reuse the delta from the revlog if allowed and
2407 # (Possibly) reuse the delta from the revlog if allowed and
2408 # the revlog chunk is a delta.
2408 # the revlog chunk is a delta.
2409 cachedelta = None
2409 cachedelta = None
2410 rawtext = None
2410 rawtext = None
2411 if (deltareuse != self.DELTAREUSEFULLADD
2411 if (deltareuse != self.DELTAREUSEFULLADD
2412 and destrevlog._lazydelta):
2412 and destrevlog._lazydelta):
2413 dp = self.deltaparent(rev)
2413 dp = self.deltaparent(rev)
2414 if dp != nullrev:
2414 if dp != nullrev:
2415 cachedelta = (dp, bytes(self._chunk(rev)))
2415 cachedelta = (dp, bytes(self._chunk(rev)))
2416
2416
2417 if not cachedelta:
2417 if not cachedelta:
2418 rawtext = self.rawdata(rev)
2418 rawtext = self.rawdata(rev)
2419
2419
2420
2420
2421 if deltareuse == self.DELTAREUSEFULLADD:
2421 if deltareuse == self.DELTAREUSEFULLADD:
2422 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2422 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2423 cachedelta=cachedelta,
2423 cachedelta=cachedelta,
2424 node=node, flags=flags,
2424 node=node, flags=flags,
2425 deltacomputer=deltacomputer)
2425 deltacomputer=deltacomputer)
2426 else:
2426 else:
2427 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2427 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2428 checkambig=False)
2428 checkambig=False)
2429 dfh = None
2429 dfh = None
2430 if not destrevlog._inline:
2430 if not destrevlog._inline:
2431 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2431 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2432 try:
2432 try:
2433 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2433 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2434 p2, flags, cachedelta, ifh, dfh,
2434 p2, flags, cachedelta, ifh, dfh,
2435 deltacomputer=deltacomputer)
2435 deltacomputer=deltacomputer)
2436 finally:
2436 finally:
2437 if dfh:
2437 if dfh:
2438 dfh.close()
2438 dfh.close()
2439 ifh.close()
2439 ifh.close()
2440
2440
2441 if addrevisioncb:
2441 if addrevisioncb:
2442 addrevisioncb(self, rev, node)
2442 addrevisioncb(self, rev, node)
2443 finally:
2443 finally:
2444 destrevlog._lazydelta = oldlazydelta
2444 destrevlog._lazydelta = oldlazydelta
2445 destrevlog._lazydeltabase = oldlazydeltabase
2445 destrevlog._lazydeltabase = oldlazydeltabase
2446 destrevlog._deltabothparents = oldamd
2446 destrevlog._deltabothparents = oldamd
2447
2447
2448 def censorrevision(self, tr, censornode, tombstone=b''):
2448 def censorrevision(self, tr, censornode, tombstone=b''):
2449 if (self.version & 0xFFFF) == REVLOGV0:
2449 if (self.version & 0xFFFF) == REVLOGV0:
2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2451 self.version)
2451 self.version)
2452
2452
2453 censorrev = self.rev(censornode)
2453 censorrev = self.rev(censornode)
2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2455
2455
2456 if len(tombstone) > self.rawsize(censorrev):
2456 if len(tombstone) > self.rawsize(censorrev):
2457 raise error.Abort(_('censor tombstone must be no longer than '
2457 raise error.Abort(_('censor tombstone must be no longer than '
2458 'censored data'))
2458 'censored data'))
2459
2459
2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2461 # to create a new revlog, copy all revisions to it, then replace the
2461 # to create a new revlog, copy all revisions to it, then replace the
2462 # revlogs on transaction close.
2462 # revlogs on transaction close.
2463
2463
2464 newindexfile = self.indexfile + b'.tmpcensored'
2464 newindexfile = self.indexfile + b'.tmpcensored'
2465 newdatafile = self.datafile + b'.tmpcensored'
2465 newdatafile = self.datafile + b'.tmpcensored'
2466
2466
2467 # This is a bit dangerous. We could easily have a mismatch of state.
2467 # This is a bit dangerous. We could easily have a mismatch of state.
2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2469 censorable=True)
2469 censorable=True)
2470 newrl.version = self.version
2470 newrl.version = self.version
2471 newrl._generaldelta = self._generaldelta
2471 newrl._generaldelta = self._generaldelta
2472 newrl._io = self._io
2472 newrl._io = self._io
2473
2473
2474 for rev in self.revs():
2474 for rev in self.revs():
2475 node = self.node(rev)
2475 node = self.node(rev)
2476 p1, p2 = self.parents(node)
2476 p1, p2 = self.parents(node)
2477
2477
2478 if rev == censorrev:
2478 if rev == censorrev:
2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2480 p1, p2, censornode, REVIDX_ISCENSORED)
2480 p1, p2, censornode, REVIDX_ISCENSORED)
2481
2481
2482 if newrl.deltaparent(rev) != nullrev:
2482 if newrl.deltaparent(rev) != nullrev:
2483 raise error.Abort(_('censored revision stored as delta; '
2483 raise error.Abort(_('censored revision stored as delta; '
2484 'cannot censor'),
2484 'cannot censor'),
2485 hint=_('censoring of revlogs is not '
2485 hint=_('censoring of revlogs is not '
2486 'fully implemented; please report '
2486 'fully implemented; please report '
2487 'this bug'))
2487 'this bug'))
2488 continue
2488 continue
2489
2489
2490 if self.iscensored(rev):
2490 if self.iscensored(rev):
2491 if self.deltaparent(rev) != nullrev:
2491 if self.deltaparent(rev) != nullrev:
2492 raise error.Abort(_('cannot censor due to censored '
2492 raise error.Abort(_('cannot censor due to censored '
2493 'revision having delta stored'))
2493 'revision having delta stored'))
2494 rawtext = self._chunk(rev)
2494 rawtext = self._chunk(rev)
2495 else:
2495 else:
2496 rawtext = self.rawdata(rev)
2496 rawtext = self.rawdata(rev)
2497
2497
2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2499 self.flags(rev))
2499 self.flags(rev))
2500
2500
2501 tr.addbackup(self.indexfile, location='store')
2501 tr.addbackup(self.indexfile, location='store')
2502 if not self._inline:
2502 if not self._inline:
2503 tr.addbackup(self.datafile, location='store')
2503 tr.addbackup(self.datafile, location='store')
2504
2504
2505 self.opener.rename(newrl.indexfile, self.indexfile)
2505 self.opener.rename(newrl.indexfile, self.indexfile)
2506 if not self._inline:
2506 if not self._inline:
2507 self.opener.rename(newrl.datafile, self.datafile)
2507 self.opener.rename(newrl.datafile, self.datafile)
2508
2508
2509 self.clearcaches()
2509 self.clearcaches()
2510 self._loadindex()
2510 self._loadindex()
2511
2511
2512 def verifyintegrity(self, state):
2512 def verifyintegrity(self, state):
2513 """Verifies the integrity of the revlog.
2513 """Verifies the integrity of the revlog.
2514
2514
2515 Yields ``revlogproblem`` instances describing problems that are
2515 Yields ``revlogproblem`` instances describing problems that are
2516 found.
2516 found.
2517 """
2517 """
2518 dd, di = self.checksize()
2518 dd, di = self.checksize()
2519 if dd:
2519 if dd:
2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2521 if di:
2521 if di:
2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2523
2523
2524 version = self.version & 0xFFFF
2524 version = self.version & 0xFFFF
2525
2525
2526 # The verifier tells us what version revlog we should be.
2526 # The verifier tells us what version revlog we should be.
2527 if version != state['expectedversion']:
2527 if version != state['expectedversion']:
2528 yield revlogproblem(
2528 yield revlogproblem(
2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2530 (self.indexfile, version, state['expectedversion']))
2530 (self.indexfile, version, state['expectedversion']))
2531
2531
2532 state['skipread'] = set()
2532 state['skipread'] = set()
2533
2533
2534 for rev in self:
2534 for rev in self:
2535 node = self.node(rev)
2535 node = self.node(rev)
2536
2536
2537 # Verify contents. 4 cases to care about:
2537 # Verify contents. 4 cases to care about:
2538 #
2538 #
2539 # common: the most common case
2539 # common: the most common case
2540 # rename: with a rename
2540 # rename: with a rename
2541 # meta: file content starts with b'\1\n', the metadata
2541 # meta: file content starts with b'\1\n', the metadata
2542 # header defined in filelog.py, but without a rename
2542 # header defined in filelog.py, but without a rename
2543 # ext: content stored externally
2543 # ext: content stored externally
2544 #
2544 #
2545 # More formally, their differences are shown below:
2545 # More formally, their differences are shown below:
2546 #
2546 #
2547 # | common | rename | meta | ext
2547 # | common | rename | meta | ext
2548 # -------------------------------------------------------
2548 # -------------------------------------------------------
2549 # flags() | 0 | 0 | 0 | not 0
2549 # flags() | 0 | 0 | 0 | not 0
2550 # renamed() | False | True | False | ?
2550 # renamed() | False | True | False | ?
2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2552 #
2552 #
2553 # "rawtext" means the raw text stored in revlog data, which
2553 # "rawtext" means the raw text stored in revlog data, which
2554 # could be retrieved by "rawdata(rev)". "text"
2554 # could be retrieved by "rawdata(rev)". "text"
2555 # mentioned below is "revision(rev)".
2555 # mentioned below is "revision(rev)".
2556 #
2556 #
2557 # There are 3 different lengths stored physically:
2557 # There are 3 different lengths stored physically:
2558 # 1. L1: rawsize, stored in revlog index
2558 # 1. L1: rawsize, stored in revlog index
2559 # 2. L2: len(rawtext), stored in revlog data
2559 # 2. L2: len(rawtext), stored in revlog data
2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2561 # possibly somewhere else if flags!=0
2561 # possibly somewhere else if flags!=0
2562 #
2562 #
2563 # L1 should be equal to L2. L3 could be different from them.
2563 # L1 should be equal to L2. L3 could be different from them.
2564 # "text" may or may not affect commit hash depending on flag
2564 # "text" may or may not affect commit hash depending on flag
2565 # processors (see flagutil.addflagprocessor).
2565 # processors (see flagutil.addflagprocessor).
2566 #
2566 #
2567 # | common | rename | meta | ext
2567 # | common | rename | meta | ext
2568 # -------------------------------------------------
2568 # -------------------------------------------------
2569 # rawsize() | L1 | L1 | L1 | L1
2569 # rawsize() | L1 | L1 | L1 | L1
2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2571 # len(rawtext) | L2 | L2 | L2 | L2
2571 # len(rawtext) | L2 | L2 | L2 | L2
2572 # len(text) | L2 | L2 | L2 | L3
2572 # len(text) | L2 | L2 | L2 | L3
2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2574 #
2574 #
2575 # LM: length of metadata, depending on rawtext
2575 # LM: length of metadata, depending on rawtext
2576 # (*): not ideal, see comment in filelog.size
2576 # (*): not ideal, see comment in filelog.size
2577 # (?): could be "- len(meta)" if the resolved content has
2577 # (?): could be "- len(meta)" if the resolved content has
2578 # rename metadata
2578 # rename metadata
2579 #
2579 #
2580 # Checks needed to be done:
2580 # Checks needed to be done:
2581 # 1. length check: L1 == L2, in all cases.
2581 # 1. length check: L1 == L2, in all cases.
2582 # 2. hash check: depending on flag processor, we may need to
2582 # 2. hash check: depending on flag processor, we may need to
2583 # use either "text" (external), or "rawtext" (in revlog).
2583 # use either "text" (external), or "rawtext" (in revlog).
2584
2584
2585 try:
2585 try:
2586 skipflags = state.get('skipflags', 0)
2586 skipflags = state.get('skipflags', 0)
2587 if skipflags:
2587 if skipflags:
2588 skipflags &= self.flags(rev)
2588 skipflags &= self.flags(rev)
2589
2589
2590 if skipflags:
2590 if skipflags:
2591 state['skipread'].add(node)
2591 state['skipread'].add(node)
2592 else:
2592 else:
2593 # Side-effect: read content and verify hash.
2593 # Side-effect: read content and verify hash.
2594 self.revision(node)
2594 self.revision(node)
2595
2595
2596 l1 = self.rawsize(rev)
2596 l1 = self.rawsize(rev)
2597 l2 = len(self.rawdata(node))
2597 l2 = len(self.rawdata(node))
2598
2598
2599 if l1 != l2:
2599 if l1 != l2:
2600 yield revlogproblem(
2600 yield revlogproblem(
2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2602 node=node)
2602 node=node)
2603
2603
2604 except error.CensoredNodeError:
2604 except error.CensoredNodeError:
2605 if state['erroroncensored']:
2605 if state['erroroncensored']:
2606 yield revlogproblem(error=_('censored file data'),
2606 yield revlogproblem(error=_('censored file data'),
2607 node=node)
2607 node=node)
2608 state['skipread'].add(node)
2608 state['skipread'].add(node)
2609 except Exception as e:
2609 except Exception as e:
2610 yield revlogproblem(
2610 yield revlogproblem(
2611 error=_('unpacking %s: %s') % (short(node),
2611 error=_('unpacking %s: %s') % (short(node),
2612 stringutil.forcebytestr(e)),
2612 stringutil.forcebytestr(e)),
2613 node=node)
2613 node=node)
2614 state['skipread'].add(node)
2614 state['skipread'].add(node)
2615
2615
2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2617 revisionscount=False, trackedsize=False,
2617 revisionscount=False, trackedsize=False,
2618 storedsize=False):
2618 storedsize=False):
2619 d = {}
2619 d = {}
2620
2620
2621 if exclusivefiles:
2621 if exclusivefiles:
2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2623 if not self._inline:
2623 if not self._inline:
2624 d['exclusivefiles'].append((self.opener, self.datafile))
2624 d['exclusivefiles'].append((self.opener, self.datafile))
2625
2625
2626 if sharedfiles:
2626 if sharedfiles:
2627 d['sharedfiles'] = []
2627 d['sharedfiles'] = []
2628
2628
2629 if revisionscount:
2629 if revisionscount:
2630 d['revisionscount'] = len(self)
2630 d['revisionscount'] = len(self)
2631
2631
2632 if trackedsize:
2632 if trackedsize:
2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2634
2634
2635 if storedsize:
2635 if storedsize:
2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2637 for path in self.files())
2637 for path in self.files())
2638
2638
2639 return d
2639 return d
@@ -1,1047 +1,1051 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 util,
33 util,
34 )
34 )
35
35
36 from . import (
37 flagutil,
38 )
39
36 # maximum <delta-chain-data>/<revision-text-length> ratio
40 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
41 LIMIT_DELTA2TEXT = 2
38
42
39 class _testrevlog(object):
43 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
44 """minimalist fake revlog to use in doctests"""
41
45
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
46 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
47 """data is an list of revision payload boundaries"""
44 self._data = data
48 self._data = data
45 self._srdensitythreshold = density
49 self._srdensitythreshold = density
46 self._srmingapsize = mingap
50 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
51 self._snapshot = set(snapshot)
48 self.index = None
52 self.index = None
49
53
50 def start(self, rev):
54 def start(self, rev):
51 if rev == nullrev:
55 if rev == nullrev:
52 return 0
56 return 0
53 if rev == 0:
57 if rev == 0:
54 return 0
58 return 0
55 return self._data[rev - 1]
59 return self._data[rev - 1]
56
60
57 def end(self, rev):
61 def end(self, rev):
58 if rev == nullrev:
62 if rev == nullrev:
59 return 0
63 return 0
60 return self._data[rev]
64 return self._data[rev]
61
65
62 def length(self, rev):
66 def length(self, rev):
63 return self.end(rev) - self.start(rev)
67 return self.end(rev) - self.start(rev)
64
68
65 def __len__(self):
69 def __len__(self):
66 return len(self._data)
70 return len(self._data)
67
71
68 def issnapshot(self, rev):
72 def issnapshot(self, rev):
69 if rev == nullrev:
73 if rev == nullrev:
70 return True
74 return True
71 return rev in self._snapshot
75 return rev in self._snapshot
72
76
73 def slicechunk(revlog, revs, targetsize=None):
77 def slicechunk(revlog, revs, targetsize=None):
74 """slice revs to reduce the amount of unrelated data to be read from disk.
78 """slice revs to reduce the amount of unrelated data to be read from disk.
75
79
76 ``revs`` is sliced into groups that should be read in one time.
80 ``revs`` is sliced into groups that should be read in one time.
77 Assume that revs are sorted.
81 Assume that revs are sorted.
78
82
79 The initial chunk is sliced until the overall density (payload/chunks-span
83 The initial chunk is sliced until the overall density (payload/chunks-span
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
84 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 `revlog._srmingapsize` is skipped.
85 `revlog._srmingapsize` is skipped.
82
86
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
87 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 For consistency with other slicing choice, this limit won't go lower than
88 For consistency with other slicing choice, this limit won't go lower than
85 `revlog._srmingapsize`.
89 `revlog._srmingapsize`.
86
90
87 If individual revisions chunk are larger than this limit, they will still
91 If individual revisions chunk are larger than this limit, they will still
88 be raised individually.
92 be raised individually.
89
93
90 >>> data = [
94 >>> data = [
91 ... 5, #00 (5)
95 ... 5, #00 (5)
92 ... 10, #01 (5)
96 ... 10, #01 (5)
93 ... 12, #02 (2)
97 ... 12, #02 (2)
94 ... 12, #03 (empty)
98 ... 12, #03 (empty)
95 ... 27, #04 (15)
99 ... 27, #04 (15)
96 ... 31, #05 (4)
100 ... 31, #05 (4)
97 ... 31, #06 (empty)
101 ... 31, #06 (empty)
98 ... 42, #07 (11)
102 ... 42, #07 (11)
99 ... 47, #08 (5)
103 ... 47, #08 (5)
100 ... 47, #09 (empty)
104 ... 47, #09 (empty)
101 ... 48, #10 (1)
105 ... 48, #10 (1)
102 ... 51, #11 (3)
106 ... 51, #11 (3)
103 ... 74, #12 (23)
107 ... 74, #12 (23)
104 ... 85, #13 (11)
108 ... 85, #13 (11)
105 ... 86, #14 (1)
109 ... 86, #14 (1)
106 ... 91, #15 (5)
110 ... 91, #15 (5)
107 ... ]
111 ... ]
108 >>> revlog = _testrevlog(data, snapshot=range(16))
112 >>> revlog = _testrevlog(data, snapshot=range(16))
109
113
110 >>> list(slicechunk(revlog, list(range(16))))
114 >>> list(slicechunk(revlog, list(range(16))))
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
115 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 >>> list(slicechunk(revlog, [0, 15]))
116 >>> list(slicechunk(revlog, [0, 15]))
113 [[0], [15]]
117 [[0], [15]]
114 >>> list(slicechunk(revlog, [0, 11, 15]))
118 >>> list(slicechunk(revlog, [0, 11, 15]))
115 [[0], [11], [15]]
119 [[0], [11], [15]]
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 [[0], [11, 13, 15]]
121 [[0], [11, 13, 15]]
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
122 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 [[1, 2], [5, 8, 10, 11], [14]]
123 [[1, 2], [5, 8, 10, 11], [14]]
120
124
121 Slicing with a maximum chunk size
125 Slicing with a maximum chunk size
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
126 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 [[0], [11], [13], [15]]
127 [[0], [11], [13], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 [[0], [11], [13, 15]]
129 [[0], [11], [13, 15]]
126
130
127 Slicing involving nullrev
131 Slicing involving nullrev
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 [[-1, 0], [11], [13, 15]]
133 [[-1, 0], [11], [13, 15]]
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
134 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 [[-1], [13], [15]]
135 [[-1], [13], [15]]
132 """
136 """
133 if targetsize is not None:
137 if targetsize is not None:
134 targetsize = max(targetsize, revlog._srmingapsize)
138 targetsize = max(targetsize, revlog._srmingapsize)
135 # targetsize should not be specified when evaluating delta candidates:
139 # targetsize should not be specified when evaluating delta candidates:
136 # * targetsize is used to ensure we stay within specification when reading,
140 # * targetsize is used to ensure we stay within specification when reading,
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
141 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 if densityslicing is None:
142 if densityslicing is None:
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
143 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 for chunk in densityslicing(revs,
144 for chunk in densityslicing(revs,
141 revlog._srdensitythreshold,
145 revlog._srdensitythreshold,
142 revlog._srmingapsize):
146 revlog._srmingapsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
147 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 yield subchunk
148 yield subchunk
145
149
146 def _slicechunktosize(revlog, revs, targetsize=None):
150 def _slicechunktosize(revlog, revs, targetsize=None):
147 """slice revs to match the target size
151 """slice revs to match the target size
148
152
149 This is intended to be used on chunk that density slicing selected by that
153 This is intended to be used on chunk that density slicing selected by that
150 are still too large compared to the read garantee of revlog. This might
154 are still too large compared to the read garantee of revlog. This might
151 happens when "minimal gap size" interrupted the slicing or when chain are
155 happens when "minimal gap size" interrupted the slicing or when chain are
152 built in a way that create large blocks next to each other.
156 built in a way that create large blocks next to each other.
153
157
154 >>> data = [
158 >>> data = [
155 ... 3, #0 (3)
159 ... 3, #0 (3)
156 ... 5, #1 (2)
160 ... 5, #1 (2)
157 ... 6, #2 (1)
161 ... 6, #2 (1)
158 ... 8, #3 (2)
162 ... 8, #3 (2)
159 ... 8, #4 (empty)
163 ... 8, #4 (empty)
160 ... 11, #5 (3)
164 ... 11, #5 (3)
161 ... 12, #6 (1)
165 ... 12, #6 (1)
162 ... 13, #7 (1)
166 ... 13, #7 (1)
163 ... 14, #8 (1)
167 ... 14, #8 (1)
164 ... ]
168 ... ]
165
169
166 == All snapshots cases ==
170 == All snapshots cases ==
167 >>> revlog = _testrevlog(data, snapshot=range(9))
171 >>> revlog = _testrevlog(data, snapshot=range(9))
168
172
169 Cases where chunk is already small enough
173 Cases where chunk is already small enough
170 >>> list(_slicechunktosize(revlog, [0], 3))
174 >>> list(_slicechunktosize(revlog, [0], 3))
171 [[0]]
175 [[0]]
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
176 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 [[6, 7]]
177 [[6, 7]]
174 >>> list(_slicechunktosize(revlog, [0], None))
178 >>> list(_slicechunktosize(revlog, [0], None))
175 [[0]]
179 [[0]]
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
180 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 [[6, 7]]
181 [[6, 7]]
178
182
179 cases where we need actual slicing
183 cases where we need actual slicing
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
184 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 [[0], [1]]
185 [[0], [1]]
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
186 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 [[1], [3]]
187 [[1], [3]]
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
188 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 [[1, 2], [3]]
189 [[1, 2], [3]]
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
190 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 [[3], [5]]
191 [[3], [5]]
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
192 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 [[3], [5]]
193 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
194 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 [[5], [6, 7, 8]]
195 [[5], [6, 7, 8]]
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
196 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
197 [[0], [1, 2], [3], [5], [6, 7, 8]]
194
198
195 Case with too large individual chunk (must return valid chunk)
199 Case with too large individual chunk (must return valid chunk)
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
200 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 [[0], [1]]
201 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
202 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 [[1], [3]]
203 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 [[3], [5]]
205 [[3], [5]]
202
206
203 == No Snapshot cases ==
207 == No Snapshot cases ==
204 >>> revlog = _testrevlog(data)
208 >>> revlog = _testrevlog(data)
205
209
206 Cases where chunk is already small enough
210 Cases where chunk is already small enough
207 >>> list(_slicechunktosize(revlog, [0], 3))
211 >>> list(_slicechunktosize(revlog, [0], 3))
208 [[0]]
212 [[0]]
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
213 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 [[6, 7]]
214 [[6, 7]]
211 >>> list(_slicechunktosize(revlog, [0], None))
215 >>> list(_slicechunktosize(revlog, [0], None))
212 [[0]]
216 [[0]]
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
217 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 [[6, 7]]
218 [[6, 7]]
215
219
216 cases where we need actual slicing
220 cases where we need actual slicing
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
221 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 [[0], [1]]
222 [[0], [1]]
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
223 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 [[1], [3]]
224 [[1], [3]]
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
225 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 [[1], [2, 3]]
226 [[1], [2, 3]]
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
227 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 [[3], [5]]
228 [[3], [5]]
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
229 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 [[3], [4, 5]]
230 [[3], [4, 5]]
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
231 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 [[5], [6, 7, 8]]
232 [[5], [6, 7, 8]]
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
233 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
234 [[0], [1, 2], [3], [5], [6, 7, 8]]
231
235
232 Case with too large individual chunk (must return valid chunk)
236 Case with too large individual chunk (must return valid chunk)
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
237 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 [[0], [1]]
238 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
239 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 [[1], [3]]
240 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 [[3], [5]]
242 [[3], [5]]
239
243
240 == mixed case ==
244 == mixed case ==
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
245 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
246 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
247 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 """
248 """
245 assert targetsize is None or 0 <= targetsize
249 assert targetsize is None or 0 <= targetsize
246 startdata = revlog.start(revs[0])
250 startdata = revlog.start(revs[0])
247 enddata = revlog.end(revs[-1])
251 enddata = revlog.end(revs[-1])
248 fullspan = enddata - startdata
252 fullspan = enddata - startdata
249 if targetsize is None or fullspan <= targetsize:
253 if targetsize is None or fullspan <= targetsize:
250 yield revs
254 yield revs
251 return
255 return
252
256
253 startrevidx = 0
257 startrevidx = 0
254 endrevidx = 1
258 endrevidx = 1
255 iterrevs = enumerate(revs)
259 iterrevs = enumerate(revs)
256 next(iterrevs) # skip first rev.
260 next(iterrevs) # skip first rev.
257 # first step: get snapshots out of the way
261 # first step: get snapshots out of the way
258 for idx, r in iterrevs:
262 for idx, r in iterrevs:
259 span = revlog.end(r) - startdata
263 span = revlog.end(r) - startdata
260 snapshot = revlog.issnapshot(r)
264 snapshot = revlog.issnapshot(r)
261 if span <= targetsize and snapshot:
265 if span <= targetsize and snapshot:
262 endrevidx = idx + 1
266 endrevidx = idx + 1
263 else:
267 else:
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
268 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 if chunk:
269 if chunk:
266 yield chunk
270 yield chunk
267 startrevidx = idx
271 startrevidx = idx
268 startdata = revlog.start(r)
272 startdata = revlog.start(r)
269 endrevidx = idx + 1
273 endrevidx = idx + 1
270 if not snapshot:
274 if not snapshot:
271 break
275 break
272
276
273 # for the others, we use binary slicing to quickly converge toward valid
277 # for the others, we use binary slicing to quickly converge toward valid
274 # chunks (otherwise, we might end up looking for start/end of many
278 # chunks (otherwise, we might end up looking for start/end of many
275 # revisions). This logic is not looking for the perfect slicing point, it
279 # revisions). This logic is not looking for the perfect slicing point, it
276 # focuses on quickly converging toward valid chunks.
280 # focuses on quickly converging toward valid chunks.
277 nbitem = len(revs)
281 nbitem = len(revs)
278 while (enddata - startdata) > targetsize:
282 while (enddata - startdata) > targetsize:
279 endrevidx = nbitem
283 endrevidx = nbitem
280 if nbitem - startrevidx <= 1:
284 if nbitem - startrevidx <= 1:
281 break # protect against individual chunk larger than limit
285 break # protect against individual chunk larger than limit
282 localenddata = revlog.end(revs[endrevidx - 1])
286 localenddata = revlog.end(revs[endrevidx - 1])
283 span = localenddata - startdata
287 span = localenddata - startdata
284 while span > targetsize:
288 while span > targetsize:
285 if endrevidx - startrevidx <= 1:
289 if endrevidx - startrevidx <= 1:
286 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
287 endrevidx -= (endrevidx - startrevidx) // 2
291 endrevidx -= (endrevidx - startrevidx) // 2
288 localenddata = revlog.end(revs[endrevidx - 1])
292 localenddata = revlog.end(revs[endrevidx - 1])
289 span = localenddata - startdata
293 span = localenddata - startdata
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
294 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 if chunk:
295 if chunk:
292 yield chunk
296 yield chunk
293 startrevidx = endrevidx
297 startrevidx = endrevidx
294 startdata = revlog.start(revs[startrevidx])
298 startdata = revlog.start(revs[startrevidx])
295
299
296 chunk = _trimchunk(revlog, revs, startrevidx)
300 chunk = _trimchunk(revlog, revs, startrevidx)
297 if chunk:
301 if chunk:
298 yield chunk
302 yield chunk
299
303
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
304 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 mingapsize=0):
305 mingapsize=0):
302 """slice revs to reduce the amount of unrelated data to be read from disk.
306 """slice revs to reduce the amount of unrelated data to be read from disk.
303
307
304 ``revs`` is sliced into groups that should be read in one time.
308 ``revs`` is sliced into groups that should be read in one time.
305 Assume that revs are sorted.
309 Assume that revs are sorted.
306
310
307 The initial chunk is sliced until the overall density (payload/chunks-span
311 The initial chunk is sliced until the overall density (payload/chunks-span
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
312 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 skipped.
313 skipped.
310
314
311 >>> revlog = _testrevlog([
315 >>> revlog = _testrevlog([
312 ... 5, #00 (5)
316 ... 5, #00 (5)
313 ... 10, #01 (5)
317 ... 10, #01 (5)
314 ... 12, #02 (2)
318 ... 12, #02 (2)
315 ... 12, #03 (empty)
319 ... 12, #03 (empty)
316 ... 27, #04 (15)
320 ... 27, #04 (15)
317 ... 31, #05 (4)
321 ... 31, #05 (4)
318 ... 31, #06 (empty)
322 ... 31, #06 (empty)
319 ... 42, #07 (11)
323 ... 42, #07 (11)
320 ... 47, #08 (5)
324 ... 47, #08 (5)
321 ... 47, #09 (empty)
325 ... 47, #09 (empty)
322 ... 48, #10 (1)
326 ... 48, #10 (1)
323 ... 51, #11 (3)
327 ... 51, #11 (3)
324 ... 74, #12 (23)
328 ... 74, #12 (23)
325 ... 85, #13 (11)
329 ... 85, #13 (11)
326 ... 86, #14 (1)
330 ... 86, #14 (1)
327 ... 91, #15 (5)
331 ... 91, #15 (5)
328 ... ])
332 ... ])
329
333
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
334 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
335 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 [[0], [15]]
337 [[0], [15]]
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
338 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 [[0], [11], [15]]
339 [[0], [11], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
340 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 [[0], [11, 13, 15]]
341 [[0], [11, 13, 15]]
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 [[1, 2], [5, 8, 10, 11], [14]]
343 [[1, 2], [5, 8, 10, 11], [14]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 ... mingapsize=20))
345 ... mingapsize=20))
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
346 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 ... targetdensity=0.95))
348 ... targetdensity=0.95))
345 [[1, 2], [5], [8, 10, 11], [14]]
349 [[1, 2], [5], [8, 10, 11], [14]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 ... targetdensity=0.95, mingapsize=12))
351 ... targetdensity=0.95, mingapsize=12))
348 [[1, 2], [5, 8, 10, 11], [14]]
352 [[1, 2], [5, 8, 10, 11], [14]]
349 """
353 """
350 start = revlog.start
354 start = revlog.start
351 length = revlog.length
355 length = revlog.length
352
356
353 if len(revs) <= 1:
357 if len(revs) <= 1:
354 yield revs
358 yield revs
355 return
359 return
356
360
357 deltachainspan = segmentspan(revlog, revs)
361 deltachainspan = segmentspan(revlog, revs)
358
362
359 if deltachainspan < mingapsize:
363 if deltachainspan < mingapsize:
360 yield revs
364 yield revs
361 return
365 return
362
366
363 readdata = deltachainspan
367 readdata = deltachainspan
364 chainpayload = sum(length(r) for r in revs)
368 chainpayload = sum(length(r) for r in revs)
365
369
366 if deltachainspan:
370 if deltachainspan:
367 density = chainpayload / float(deltachainspan)
371 density = chainpayload / float(deltachainspan)
368 else:
372 else:
369 density = 1.0
373 density = 1.0
370
374
371 if density >= targetdensity:
375 if density >= targetdensity:
372 yield revs
376 yield revs
373 return
377 return
374
378
375 # Store the gaps in a heap to have them sorted by decreasing size
379 # Store the gaps in a heap to have them sorted by decreasing size
376 gaps = []
380 gaps = []
377 prevend = None
381 prevend = None
378 for i, rev in enumerate(revs):
382 for i, rev in enumerate(revs):
379 revstart = start(rev)
383 revstart = start(rev)
380 revlen = length(rev)
384 revlen = length(rev)
381
385
382 # Skip empty revisions to form larger holes
386 # Skip empty revisions to form larger holes
383 if revlen == 0:
387 if revlen == 0:
384 continue
388 continue
385
389
386 if prevend is not None:
390 if prevend is not None:
387 gapsize = revstart - prevend
391 gapsize = revstart - prevend
388 # only consider holes that are large enough
392 # only consider holes that are large enough
389 if gapsize > mingapsize:
393 if gapsize > mingapsize:
390 gaps.append((gapsize, i))
394 gaps.append((gapsize, i))
391
395
392 prevend = revstart + revlen
396 prevend = revstart + revlen
393 # sort the gaps to pop them from largest to small
397 # sort the gaps to pop them from largest to small
394 gaps.sort()
398 gaps.sort()
395
399
396 # Collect the indices of the largest holes until the density is acceptable
400 # Collect the indices of the largest holes until the density is acceptable
397 selected = []
401 selected = []
398 while gaps and density < targetdensity:
402 while gaps and density < targetdensity:
399 gapsize, gapidx = gaps.pop()
403 gapsize, gapidx = gaps.pop()
400
404
401 selected.append(gapidx)
405 selected.append(gapidx)
402
406
403 # the gap sizes are stored as negatives to be sorted decreasingly
407 # the gap sizes are stored as negatives to be sorted decreasingly
404 # by the heap
408 # by the heap
405 readdata -= gapsize
409 readdata -= gapsize
406 if readdata > 0:
410 if readdata > 0:
407 density = chainpayload / float(readdata)
411 density = chainpayload / float(readdata)
408 else:
412 else:
409 density = 1.0
413 density = 1.0
410 selected.sort()
414 selected.sort()
411
415
412 # Cut the revs at collected indices
416 # Cut the revs at collected indices
413 previdx = 0
417 previdx = 0
414 for idx in selected:
418 for idx in selected:
415
419
416 chunk = _trimchunk(revlog, revs, previdx, idx)
420 chunk = _trimchunk(revlog, revs, previdx, idx)
417 if chunk:
421 if chunk:
418 yield chunk
422 yield chunk
419
423
420 previdx = idx
424 previdx = idx
421
425
422 chunk = _trimchunk(revlog, revs, previdx)
426 chunk = _trimchunk(revlog, revs, previdx)
423 if chunk:
427 if chunk:
424 yield chunk
428 yield chunk
425
429
426 def _trimchunk(revlog, revs, startidx, endidx=None):
430 def _trimchunk(revlog, revs, startidx, endidx=None):
427 """returns revs[startidx:endidx] without empty trailing revs
431 """returns revs[startidx:endidx] without empty trailing revs
428
432
429 Doctest Setup
433 Doctest Setup
430 >>> revlog = _testrevlog([
434 >>> revlog = _testrevlog([
431 ... 5, #0
435 ... 5, #0
432 ... 10, #1
436 ... 10, #1
433 ... 12, #2
437 ... 12, #2
434 ... 12, #3 (empty)
438 ... 12, #3 (empty)
435 ... 17, #4
439 ... 17, #4
436 ... 21, #5
440 ... 21, #5
437 ... 21, #6 (empty)
441 ... 21, #6 (empty)
438 ... ])
442 ... ])
439
443
440 Contiguous cases:
444 Contiguous cases:
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 [0, 1, 2, 3, 4, 5]
446 [0, 1, 2, 3, 4, 5]
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 [0, 1, 2, 3, 4]
448 [0, 1, 2, 3, 4]
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 [0, 1, 2]
450 [0, 1, 2]
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 [2]
452 [2]
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 [3, 4, 5]
454 [3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 [3, 4]
456 [3, 4]
453
457
454 Discontiguous cases:
458 Discontiguous cases:
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 [1, 3, 5]
460 [1, 3, 5]
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 [1]
462 [1]
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 [3, 5]
464 [3, 5]
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 [3, 5]
466 [3, 5]
463 """
467 """
464 length = revlog.length
468 length = revlog.length
465
469
466 if endidx is None:
470 if endidx is None:
467 endidx = len(revs)
471 endidx = len(revs)
468
472
469 # If we have a non-emtpy delta candidate, there are nothing to trim
473 # If we have a non-emtpy delta candidate, there are nothing to trim
470 if revs[endidx - 1] < len(revlog):
474 if revs[endidx - 1] < len(revlog):
471 # Trim empty revs at the end, except the very first revision of a chain
475 # Trim empty revs at the end, except the very first revision of a chain
472 while (endidx > 1
476 while (endidx > 1
473 and endidx > startidx
477 and endidx > startidx
474 and length(revs[endidx - 1]) == 0):
478 and length(revs[endidx - 1]) == 0):
475 endidx -= 1
479 endidx -= 1
476
480
477 return revs[startidx:endidx]
481 return revs[startidx:endidx]
478
482
479 def segmentspan(revlog, revs):
483 def segmentspan(revlog, revs):
480 """Get the byte span of a segment of revisions
484 """Get the byte span of a segment of revisions
481
485
482 revs is a sorted array of revision numbers
486 revs is a sorted array of revision numbers
483
487
484 >>> revlog = _testrevlog([
488 >>> revlog = _testrevlog([
485 ... 5, #0
489 ... 5, #0
486 ... 10, #1
490 ... 10, #1
487 ... 12, #2
491 ... 12, #2
488 ... 12, #3 (empty)
492 ... 12, #3 (empty)
489 ... 17, #4
493 ... 17, #4
490 ... ])
494 ... ])
491
495
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 17
497 17
494 >>> segmentspan(revlog, [0, 4])
498 >>> segmentspan(revlog, [0, 4])
495 17
499 17
496 >>> segmentspan(revlog, [3, 4])
500 >>> segmentspan(revlog, [3, 4])
497 5
501 5
498 >>> segmentspan(revlog, [1, 2, 3,])
502 >>> segmentspan(revlog, [1, 2, 3,])
499 7
503 7
500 >>> segmentspan(revlog, [1, 3])
504 >>> segmentspan(revlog, [1, 3])
501 7
505 7
502 """
506 """
503 if not revs:
507 if not revs:
504 return 0
508 return 0
505 end = revlog.end(revs[-1])
509 end = revlog.end(revs[-1])
506 return end - revlog.start(revs[0])
510 return end - revlog.start(revs[0])
507
511
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
512 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 """build full text from a (base, delta) pair and other metadata"""
513 """build full text from a (base, delta) pair and other metadata"""
510 # special case deltas which replace entire base; no need to decode
514 # special case deltas which replace entire base; no need to decode
511 # base revision. this neatly avoids censored bases, which throw when
515 # base revision. this neatly avoids censored bases, which throw when
512 # they're decoded.
516 # they're decoded.
513 hlen = struct.calcsize(">lll")
517 hlen = struct.calcsize(">lll")
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
518 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 len(delta) - hlen):
519 len(delta) - hlen):
516 fulltext = delta[hlen:]
520 fulltext = delta[hlen:]
517 else:
521 else:
518 # deltabase is rawtext before changed by flag processors, which is
522 # deltabase is rawtext before changed by flag processors, which is
519 # equivalent to non-raw text
523 # equivalent to non-raw text
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
524 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 fulltext = mdiff.patch(basetext, delta)
525 fulltext = mdiff.patch(basetext, delta)
522
526
523 try:
527 try:
524 validatehash = revlog._processflagsraw(fulltext, flags)
528 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
525 if validatehash:
529 if validatehash:
526 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
530 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 if flags & REVIDX_ISCENSORED:
531 if flags & REVIDX_ISCENSORED:
528 raise error.StorageError(_('node %s is not censored') %
532 raise error.StorageError(_('node %s is not censored') %
529 expectednode)
533 expectednode)
530 except error.CensoredNodeError:
534 except error.CensoredNodeError:
531 # must pass the censored index flag to add censored revisions
535 # must pass the censored index flag to add censored revisions
532 if not flags & REVIDX_ISCENSORED:
536 if not flags & REVIDX_ISCENSORED:
533 raise
537 raise
534 return fulltext
538 return fulltext
535
539
536 @attr.s(slots=True, frozen=True)
540 @attr.s(slots=True, frozen=True)
537 class _deltainfo(object):
541 class _deltainfo(object):
538 distance = attr.ib()
542 distance = attr.ib()
539 deltalen = attr.ib()
543 deltalen = attr.ib()
540 data = attr.ib()
544 data = attr.ib()
541 base = attr.ib()
545 base = attr.ib()
542 chainbase = attr.ib()
546 chainbase = attr.ib()
543 chainlen = attr.ib()
547 chainlen = attr.ib()
544 compresseddeltalen = attr.ib()
548 compresseddeltalen = attr.ib()
545 snapshotdepth = attr.ib()
549 snapshotdepth = attr.ib()
546
550
547 def isgooddeltainfo(revlog, deltainfo, revinfo):
551 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 """Returns True if the given delta is good. Good means that it is within
552 """Returns True if the given delta is good. Good means that it is within
549 the disk span, disk size, and chain length bounds that we know to be
553 the disk span, disk size, and chain length bounds that we know to be
550 performant."""
554 performant."""
551 if deltainfo is None:
555 if deltainfo is None:
552 return False
556 return False
553
557
554 # - 'deltainfo.distance' is the distance from the base revision --
558 # - 'deltainfo.distance' is the distance from the base revision --
555 # bounding it limits the amount of I/O we need to do.
559 # bounding it limits the amount of I/O we need to do.
556 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
560 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 # deltas we need to apply -- bounding it limits the amount of CPU
561 # deltas we need to apply -- bounding it limits the amount of CPU
558 # we consume.
562 # we consume.
559
563
560 textlen = revinfo.textlen
564 textlen = revinfo.textlen
561 defaultmax = textlen * 4
565 defaultmax = textlen * 4
562 maxdist = revlog._maxdeltachainspan
566 maxdist = revlog._maxdeltachainspan
563 if not maxdist:
567 if not maxdist:
564 maxdist = deltainfo.distance # ensure the conditional pass
568 maxdist = deltainfo.distance # ensure the conditional pass
565 maxdist = max(maxdist, defaultmax)
569 maxdist = max(maxdist, defaultmax)
566
570
567 # Bad delta from read span:
571 # Bad delta from read span:
568 #
572 #
569 # If the span of data read is larger than the maximum allowed.
573 # If the span of data read is larger than the maximum allowed.
570 #
574 #
571 # In the sparse-revlog case, we rely on the associated "sparse reading"
575 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 # to avoid issue related to the span of data. In theory, it would be
576 # to avoid issue related to the span of data. In theory, it would be
573 # possible to build pathological revlog where delta pattern would lead
577 # possible to build pathological revlog where delta pattern would lead
574 # to too many reads. However, they do not happen in practice at all. So
578 # to too many reads. However, they do not happen in practice at all. So
575 # we skip the span check entirely.
579 # we skip the span check entirely.
576 if not revlog._sparserevlog and maxdist < deltainfo.distance:
580 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 return False
581 return False
578
582
579 # Bad delta from new delta size:
583 # Bad delta from new delta size:
580 #
584 #
581 # If the delta size is larger than the target text, storing the
585 # If the delta size is larger than the target text, storing the
582 # delta will be inefficient.
586 # delta will be inefficient.
583 if textlen < deltainfo.deltalen:
587 if textlen < deltainfo.deltalen:
584 return False
588 return False
585
589
586 # Bad delta from cumulated payload size:
590 # Bad delta from cumulated payload size:
587 #
591 #
588 # If the sum of delta get larger than K * target text length.
592 # If the sum of delta get larger than K * target text length.
589 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
593 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 return False
594 return False
591
595
592 # Bad delta from chain length:
596 # Bad delta from chain length:
593 #
597 #
594 # If the number of delta in the chain gets too high.
598 # If the number of delta in the chain gets too high.
595 if (revlog._maxchainlen
599 if (revlog._maxchainlen
596 and revlog._maxchainlen < deltainfo.chainlen):
600 and revlog._maxchainlen < deltainfo.chainlen):
597 return False
601 return False
598
602
599 # bad delta from intermediate snapshot size limit
603 # bad delta from intermediate snapshot size limit
600 #
604 #
601 # If an intermediate snapshot size is higher than the limit. The
605 # If an intermediate snapshot size is higher than the limit. The
602 # limit exist to prevent endless chain of intermediate delta to be
606 # limit exist to prevent endless chain of intermediate delta to be
603 # created.
607 # created.
604 if (deltainfo.snapshotdepth is not None and
608 if (deltainfo.snapshotdepth is not None and
605 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
609 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 return False
610 return False
607
611
608 # bad delta if new intermediate snapshot is larger than the previous
612 # bad delta if new intermediate snapshot is larger than the previous
609 # snapshot
613 # snapshot
610 if (deltainfo.snapshotdepth
614 if (deltainfo.snapshotdepth
611 and revlog.length(deltainfo.base) < deltainfo.deltalen):
615 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 return False
616 return False
613
617
614 return True
618 return True
615
619
616 # If a revision's full text is that much bigger than a base candidate full
620 # If a revision's full text is that much bigger than a base candidate full
617 # text's, it is very unlikely that it will produce a valid delta. We no longer
621 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 # consider these candidates.
622 # consider these candidates.
619 LIMIT_BASE2TEXT = 500
623 LIMIT_BASE2TEXT = 500
620
624
621 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
625 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 """Provides group of revision to be tested as delta base
626 """Provides group of revision to be tested as delta base
623
627
624 This top level function focus on emitting groups with unique and worthwhile
628 This top level function focus on emitting groups with unique and worthwhile
625 content. See _raw_candidate_groups for details about the group order.
629 content. See _raw_candidate_groups for details about the group order.
626 """
630 """
627 # should we try to build a delta?
631 # should we try to build a delta?
628 if not (len(revlog) and revlog._storedeltachains):
632 if not (len(revlog) and revlog._storedeltachains):
629 yield None
633 yield None
630 return
634 return
631
635
632 deltalength = revlog.length
636 deltalength = revlog.length
633 deltaparent = revlog.deltaparent
637 deltaparent = revlog.deltaparent
634 sparse = revlog._sparserevlog
638 sparse = revlog._sparserevlog
635 good = None
639 good = None
636
640
637 deltas_limit = textlen * LIMIT_DELTA2TEXT
641 deltas_limit = textlen * LIMIT_DELTA2TEXT
638
642
639 tested = {nullrev}
643 tested = {nullrev}
640 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
644 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 while True:
645 while True:
642 temptative = candidates.send(good)
646 temptative = candidates.send(good)
643 if temptative is None:
647 if temptative is None:
644 break
648 break
645 group = []
649 group = []
646 for rev in temptative:
650 for rev in temptative:
647 # skip over empty delta (no need to include them in a chain)
651 # skip over empty delta (no need to include them in a chain)
648 while (revlog._generaldelta
652 while (revlog._generaldelta
649 and not (rev == nullrev
653 and not (rev == nullrev
650 or rev in tested
654 or rev in tested
651 or deltalength(rev))):
655 or deltalength(rev))):
652 tested.add(rev)
656 tested.add(rev)
653 rev = deltaparent(rev)
657 rev = deltaparent(rev)
654 # no need to try a delta against nullrev, this will be done as a
658 # no need to try a delta against nullrev, this will be done as a
655 # last resort.
659 # last resort.
656 if rev == nullrev:
660 if rev == nullrev:
657 continue
661 continue
658 # filter out revision we tested already
662 # filter out revision we tested already
659 if rev in tested:
663 if rev in tested:
660 continue
664 continue
661 tested.add(rev)
665 tested.add(rev)
662 # filter out delta base that will never produce good delta
666 # filter out delta base that will never produce good delta
663 if deltas_limit < revlog.length(rev):
667 if deltas_limit < revlog.length(rev):
664 continue
668 continue
665 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
669 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 continue
670 continue
667 # no delta for rawtext-changing revs (see "candelta" for why)
671 # no delta for rawtext-changing revs (see "candelta" for why)
668 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
672 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 continue
673 continue
670 # If we reach here, we are about to build and test a delta.
674 # If we reach here, we are about to build and test a delta.
671 # The delta building process will compute the chaininfo in all
675 # The delta building process will compute the chaininfo in all
672 # case, since that computation is cached, it is fine to access it
676 # case, since that computation is cached, it is fine to access it
673 # here too.
677 # here too.
674 chainlen, chainsize = revlog._chaininfo(rev)
678 chainlen, chainsize = revlog._chaininfo(rev)
675 # if chain will be too long, skip base
679 # if chain will be too long, skip base
676 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
680 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 continue
681 continue
678 # if chain already have too much data, skip base
682 # if chain already have too much data, skip base
679 if deltas_limit < chainsize:
683 if deltas_limit < chainsize:
680 continue
684 continue
681 if sparse and revlog.upperboundcomp is not None:
685 if sparse and revlog.upperboundcomp is not None:
682 maxcomp = revlog.upperboundcomp
686 maxcomp = revlog.upperboundcomp
683 basenotsnap = (p1, p2, nullrev)
687 basenotsnap = (p1, p2, nullrev)
684 if rev not in basenotsnap and revlog.issnapshot(rev):
688 if rev not in basenotsnap and revlog.issnapshot(rev):
685 snapshotdepth = revlog.snapshotdepth(rev)
689 snapshotdepth = revlog.snapshotdepth(rev)
686 # If text is significantly larger than the base, we can
690 # If text is significantly larger than the base, we can
687 # expect the resulting delta to be proportional to the size
691 # expect the resulting delta to be proportional to the size
688 # difference
692 # difference
689 revsize = revlog.rawsize(rev)
693 revsize = revlog.rawsize(rev)
690 rawsizedistance = max(textlen - revsize, 0)
694 rawsizedistance = max(textlen - revsize, 0)
691 # use an estimate of the compression upper bound.
695 # use an estimate of the compression upper bound.
692 lowestrealisticdeltalen = rawsizedistance // maxcomp
696 lowestrealisticdeltalen = rawsizedistance // maxcomp
693
697
694 # check the absolute constraint on the delta size
698 # check the absolute constraint on the delta size
695 snapshotlimit = textlen >> snapshotdepth
699 snapshotlimit = textlen >> snapshotdepth
696 if snapshotlimit < lowestrealisticdeltalen:
700 if snapshotlimit < lowestrealisticdeltalen:
697 # delta lower bound is larger than accepted upper bound
701 # delta lower bound is larger than accepted upper bound
698 continue
702 continue
699
703
700 # check the relative constraint on the delta size
704 # check the relative constraint on the delta size
701 revlength = revlog.length(rev)
705 revlength = revlog.length(rev)
702 if revlength < lowestrealisticdeltalen:
706 if revlength < lowestrealisticdeltalen:
703 # delta probable lower bound is larger than target base
707 # delta probable lower bound is larger than target base
704 continue
708 continue
705
709
706 group.append(rev)
710 group.append(rev)
707 if group:
711 if group:
708 # XXX: in the sparse revlog case, group can become large,
712 # XXX: in the sparse revlog case, group can become large,
709 # impacting performances. Some bounding or slicing mecanism
713 # impacting performances. Some bounding or slicing mecanism
710 # would help to reduce this impact.
714 # would help to reduce this impact.
711 good = yield tuple(group)
715 good = yield tuple(group)
712 yield None
716 yield None
713
717
714 def _findsnapshots(revlog, cache, start_rev):
718 def _findsnapshots(revlog, cache, start_rev):
715 """find snapshot from start_rev to tip"""
719 """find snapshot from start_rev to tip"""
716 if util.safehasattr(revlog.index, 'findsnapshots'):
720 if util.safehasattr(revlog.index, 'findsnapshots'):
717 revlog.index.findsnapshots(cache, start_rev)
721 revlog.index.findsnapshots(cache, start_rev)
718 else:
722 else:
719 deltaparent = revlog.deltaparent
723 deltaparent = revlog.deltaparent
720 issnapshot = revlog.issnapshot
724 issnapshot = revlog.issnapshot
721 for rev in revlog.revs(start_rev):
725 for rev in revlog.revs(start_rev):
722 if issnapshot(rev):
726 if issnapshot(rev):
723 cache[deltaparent(rev)].append(rev)
727 cache[deltaparent(rev)].append(rev)
724
728
725 def _refinedgroups(revlog, p1, p2, cachedelta):
729 def _refinedgroups(revlog, p1, p2, cachedelta):
726 good = None
730 good = None
727 # First we try to reuse a the delta contained in the bundle.
731 # First we try to reuse a the delta contained in the bundle.
728 # (or from the source revlog)
732 # (or from the source revlog)
729 #
733 #
730 # This logic only applies to general delta repositories and can be disabled
734 # This logic only applies to general delta repositories and can be disabled
731 # through configuration. Disabling reuse source delta is useful when
735 # through configuration. Disabling reuse source delta is useful when
732 # we want to make sure we recomputed "optimal" deltas.
736 # we want to make sure we recomputed "optimal" deltas.
733 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
737 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
734 # Assume what we received from the server is a good choice
738 # Assume what we received from the server is a good choice
735 # build delta will reuse the cache
739 # build delta will reuse the cache
736 good = yield (cachedelta[0],)
740 good = yield (cachedelta[0],)
737 if good is not None:
741 if good is not None:
738 yield None
742 yield None
739 return
743 return
740 snapshots = collections.defaultdict(list)
744 snapshots = collections.defaultdict(list)
741 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
745 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
742 good = yield candidates
746 good = yield candidates
743 if good is not None:
747 if good is not None:
744 break
748 break
745
749
746 # If sparse revlog is enabled, we can try to refine the available deltas
750 # If sparse revlog is enabled, we can try to refine the available deltas
747 if not revlog._sparserevlog:
751 if not revlog._sparserevlog:
748 yield None
752 yield None
749 return
753 return
750
754
751 # if we have a refinable value, try to refine it
755 # if we have a refinable value, try to refine it
752 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
756 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
753 # refine snapshot down
757 # refine snapshot down
754 previous = None
758 previous = None
755 while previous != good:
759 while previous != good:
756 previous = good
760 previous = good
757 base = revlog.deltaparent(good)
761 base = revlog.deltaparent(good)
758 if base == nullrev:
762 if base == nullrev:
759 break
763 break
760 good = yield (base,)
764 good = yield (base,)
761 # refine snapshot up
765 # refine snapshot up
762 if not snapshots:
766 if not snapshots:
763 _findsnapshots(revlog, snapshots, good + 1)
767 _findsnapshots(revlog, snapshots, good + 1)
764 previous = None
768 previous = None
765 while good != previous:
769 while good != previous:
766 previous = good
770 previous = good
767 children = tuple(sorted(c for c in snapshots[good]))
771 children = tuple(sorted(c for c in snapshots[good]))
768 good = yield children
772 good = yield children
769
773
770 # we have found nothing
774 # we have found nothing
771 yield None
775 yield None
772
776
773 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
777 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
774 """Provides group of revision to be tested as delta base
778 """Provides group of revision to be tested as delta base
775
779
776 This lower level function focus on emitting delta theorically interresting
780 This lower level function focus on emitting delta theorically interresting
777 without looking it any practical details.
781 without looking it any practical details.
778
782
779 The group order aims at providing fast or small candidates first.
783 The group order aims at providing fast or small candidates first.
780 """
784 """
781 gdelta = revlog._generaldelta
785 gdelta = revlog._generaldelta
782 # gate sparse behind general-delta because of issue6056
786 # gate sparse behind general-delta because of issue6056
783 sparse = gdelta and revlog._sparserevlog
787 sparse = gdelta and revlog._sparserevlog
784 curr = len(revlog)
788 curr = len(revlog)
785 prev = curr - 1
789 prev = curr - 1
786 deltachain = lambda rev: revlog._deltachain(rev)[0]
790 deltachain = lambda rev: revlog._deltachain(rev)[0]
787
791
788 if gdelta:
792 if gdelta:
789 # exclude already lazy tested base if any
793 # exclude already lazy tested base if any
790 parents = [p for p in (p1, p2) if p != nullrev]
794 parents = [p for p in (p1, p2) if p != nullrev]
791
795
792 if not revlog._deltabothparents and len(parents) == 2:
796 if not revlog._deltabothparents and len(parents) == 2:
793 parents.sort()
797 parents.sort()
794 # To minimize the chance of having to build a fulltext,
798 # To minimize the chance of having to build a fulltext,
795 # pick first whichever parent is closest to us (max rev)
799 # pick first whichever parent is closest to us (max rev)
796 yield (parents[1],)
800 yield (parents[1],)
797 # then the other one (min rev) if the first did not fit
801 # then the other one (min rev) if the first did not fit
798 yield (parents[0],)
802 yield (parents[0],)
799 elif len(parents) > 0:
803 elif len(parents) > 0:
800 # Test all parents (1 or 2), and keep the best candidate
804 # Test all parents (1 or 2), and keep the best candidate
801 yield parents
805 yield parents
802
806
803 if sparse and parents:
807 if sparse and parents:
804 if snapshots is None:
808 if snapshots is None:
805 # map: base-rev: snapshot-rev
809 # map: base-rev: snapshot-rev
806 snapshots = collections.defaultdict(list)
810 snapshots = collections.defaultdict(list)
807 # See if we can use an existing snapshot in the parent chains to use as
811 # See if we can use an existing snapshot in the parent chains to use as
808 # a base for a new intermediate-snapshot
812 # a base for a new intermediate-snapshot
809 #
813 #
810 # search for snapshot in parents delta chain
814 # search for snapshot in parents delta chain
811 # map: snapshot-level: snapshot-rev
815 # map: snapshot-level: snapshot-rev
812 parents_snaps = collections.defaultdict(set)
816 parents_snaps = collections.defaultdict(set)
813 candidate_chains = [deltachain(p) for p in parents]
817 candidate_chains = [deltachain(p) for p in parents]
814 for chain in candidate_chains:
818 for chain in candidate_chains:
815 for idx, s in enumerate(chain):
819 for idx, s in enumerate(chain):
816 if not revlog.issnapshot(s):
820 if not revlog.issnapshot(s):
817 break
821 break
818 parents_snaps[idx].add(s)
822 parents_snaps[idx].add(s)
819 snapfloor = min(parents_snaps[0]) + 1
823 snapfloor = min(parents_snaps[0]) + 1
820 _findsnapshots(revlog, snapshots, snapfloor)
824 _findsnapshots(revlog, snapshots, snapfloor)
821 # search for the highest "unrelated" revision
825 # search for the highest "unrelated" revision
822 #
826 #
823 # Adding snapshots used by "unrelated" revision increase the odd we
827 # Adding snapshots used by "unrelated" revision increase the odd we
824 # reuse an independant, yet better snapshot chain.
828 # reuse an independant, yet better snapshot chain.
825 #
829 #
826 # XXX instead of building a set of revisions, we could lazily enumerate
830 # XXX instead of building a set of revisions, we could lazily enumerate
827 # over the chains. That would be more efficient, however we stick to
831 # over the chains. That would be more efficient, however we stick to
828 # simple code for now.
832 # simple code for now.
829 all_revs = set()
833 all_revs = set()
830 for chain in candidate_chains:
834 for chain in candidate_chains:
831 all_revs.update(chain)
835 all_revs.update(chain)
832 other = None
836 other = None
833 for r in revlog.revs(prev, snapfloor):
837 for r in revlog.revs(prev, snapfloor):
834 if r not in all_revs:
838 if r not in all_revs:
835 other = r
839 other = r
836 break
840 break
837 if other is not None:
841 if other is not None:
838 # To avoid unfair competition, we won't use unrelated intermediate
842 # To avoid unfair competition, we won't use unrelated intermediate
839 # snapshot that are deeper than the ones from the parent delta
843 # snapshot that are deeper than the ones from the parent delta
840 # chain.
844 # chain.
841 max_depth = max(parents_snaps.keys())
845 max_depth = max(parents_snaps.keys())
842 chain = deltachain(other)
846 chain = deltachain(other)
843 for idx, s in enumerate(chain):
847 for idx, s in enumerate(chain):
844 if s < snapfloor:
848 if s < snapfloor:
845 continue
849 continue
846 if max_depth < idx:
850 if max_depth < idx:
847 break
851 break
848 if not revlog.issnapshot(s):
852 if not revlog.issnapshot(s):
849 break
853 break
850 parents_snaps[idx].add(s)
854 parents_snaps[idx].add(s)
851 # Test them as possible intermediate snapshot base
855 # Test them as possible intermediate snapshot base
852 # We test them from highest to lowest level. High level one are more
856 # We test them from highest to lowest level. High level one are more
853 # likely to result in small delta
857 # likely to result in small delta
854 floor = None
858 floor = None
855 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
859 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
856 siblings = set()
860 siblings = set()
857 for s in snaps:
861 for s in snaps:
858 siblings.update(snapshots[s])
862 siblings.update(snapshots[s])
859 # Before considering making a new intermediate snapshot, we check
863 # Before considering making a new intermediate snapshot, we check
860 # if an existing snapshot, children of base we consider, would be
864 # if an existing snapshot, children of base we consider, would be
861 # suitable.
865 # suitable.
862 #
866 #
863 # It give a change to reuse a delta chain "unrelated" to the
867 # It give a change to reuse a delta chain "unrelated" to the
864 # current revision instead of starting our own. Without such
868 # current revision instead of starting our own. Without such
865 # re-use, topological branches would keep reopening new chains.
869 # re-use, topological branches would keep reopening new chains.
866 # Creating more and more snapshot as the repository grow.
870 # Creating more and more snapshot as the repository grow.
867
871
868 if floor is not None:
872 if floor is not None:
869 # We only do this for siblings created after the one in our
873 # We only do this for siblings created after the one in our
870 # parent's delta chain. Those created before has less chances
874 # parent's delta chain. Those created before has less chances
871 # to be valid base since our ancestors had to create a new
875 # to be valid base since our ancestors had to create a new
872 # snapshot.
876 # snapshot.
873 siblings = [r for r in siblings if floor < r]
877 siblings = [r for r in siblings if floor < r]
874 yield tuple(sorted(siblings))
878 yield tuple(sorted(siblings))
875 # then test the base from our parent's delta chain.
879 # then test the base from our parent's delta chain.
876 yield tuple(sorted(snaps))
880 yield tuple(sorted(snaps))
877 floor = min(snaps)
881 floor = min(snaps)
878 # No suitable base found in the parent chain, search if any full
882 # No suitable base found in the parent chain, search if any full
879 # snapshots emitted since parent's base would be a suitable base for an
883 # snapshots emitted since parent's base would be a suitable base for an
880 # intermediate snapshot.
884 # intermediate snapshot.
881 #
885 #
882 # It give a chance to reuse a delta chain unrelated to the current
886 # It give a chance to reuse a delta chain unrelated to the current
883 # revisions instead of starting our own. Without such re-use,
887 # revisions instead of starting our own. Without such re-use,
884 # topological branches would keep reopening new full chains. Creating
888 # topological branches would keep reopening new full chains. Creating
885 # more and more snapshot as the repository grow.
889 # more and more snapshot as the repository grow.
886 yield tuple(snapshots[nullrev])
890 yield tuple(snapshots[nullrev])
887
891
888 if not sparse:
892 if not sparse:
889 # other approach failed try against prev to hopefully save us a
893 # other approach failed try against prev to hopefully save us a
890 # fulltext.
894 # fulltext.
891 yield (prev,)
895 yield (prev,)
892
896
893 class deltacomputer(object):
897 class deltacomputer(object):
894 def __init__(self, revlog):
898 def __init__(self, revlog):
895 self.revlog = revlog
899 self.revlog = revlog
896
900
897 def buildtext(self, revinfo, fh):
901 def buildtext(self, revinfo, fh):
898 """Builds a fulltext version of a revision
902 """Builds a fulltext version of a revision
899
903
900 revinfo: _revisioninfo instance that contains all needed info
904 revinfo: _revisioninfo instance that contains all needed info
901 fh: file handle to either the .i or the .d revlog file,
905 fh: file handle to either the .i or the .d revlog file,
902 depending on whether it is inlined or not
906 depending on whether it is inlined or not
903 """
907 """
904 btext = revinfo.btext
908 btext = revinfo.btext
905 if btext[0] is not None:
909 if btext[0] is not None:
906 return btext[0]
910 return btext[0]
907
911
908 revlog = self.revlog
912 revlog = self.revlog
909 cachedelta = revinfo.cachedelta
913 cachedelta = revinfo.cachedelta
910 baserev = cachedelta[0]
914 baserev = cachedelta[0]
911 delta = cachedelta[1]
915 delta = cachedelta[1]
912
916
913 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
917 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
914 revinfo.p1, revinfo.p2,
918 revinfo.p1, revinfo.p2,
915 revinfo.flags, revinfo.node)
919 revinfo.flags, revinfo.node)
916 return fulltext
920 return fulltext
917
921
918 def _builddeltadiff(self, base, revinfo, fh):
922 def _builddeltadiff(self, base, revinfo, fh):
919 revlog = self.revlog
923 revlog = self.revlog
920 t = self.buildtext(revinfo, fh)
924 t = self.buildtext(revinfo, fh)
921 if revlog.iscensored(base):
925 if revlog.iscensored(base):
922 # deltas based on a censored revision must replace the
926 # deltas based on a censored revision must replace the
923 # full content in one patch, so delta works everywhere
927 # full content in one patch, so delta works everywhere
924 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
928 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
925 delta = header + t
929 delta = header + t
926 else:
930 else:
927 ptext = revlog.rawdata(base, _df=fh)
931 ptext = revlog.rawdata(base, _df=fh)
928 delta = mdiff.textdiff(ptext, t)
932 delta = mdiff.textdiff(ptext, t)
929
933
930 return delta
934 return delta
931
935
932 def _builddeltainfo(self, revinfo, base, fh):
936 def _builddeltainfo(self, revinfo, base, fh):
933 # can we use the cached delta?
937 # can we use the cached delta?
934 revlog = self.revlog
938 revlog = self.revlog
935 chainbase = revlog.chainbase(base)
939 chainbase = revlog.chainbase(base)
936 if revlog._generaldelta:
940 if revlog._generaldelta:
937 deltabase = base
941 deltabase = base
938 else:
942 else:
939 deltabase = chainbase
943 deltabase = chainbase
940 snapshotdepth = None
944 snapshotdepth = None
941 if revlog._sparserevlog and deltabase == nullrev:
945 if revlog._sparserevlog and deltabase == nullrev:
942 snapshotdepth = 0
946 snapshotdepth = 0
943 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
947 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
944 # A delta chain should always be one full snapshot,
948 # A delta chain should always be one full snapshot,
945 # zero or more semi-snapshots, and zero or more deltas
949 # zero or more semi-snapshots, and zero or more deltas
946 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
950 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
947 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
951 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
948 snapshotdepth = len(revlog._deltachain(deltabase)[0])
952 snapshotdepth = len(revlog._deltachain(deltabase)[0])
949 delta = None
953 delta = None
950 if revinfo.cachedelta:
954 if revinfo.cachedelta:
951 cachebase, cachediff = revinfo.cachedelta
955 cachebase, cachediff = revinfo.cachedelta
952 #check if the diff still apply
956 #check if the diff still apply
953 currentbase = cachebase
957 currentbase = cachebase
954 while (currentbase != nullrev
958 while (currentbase != nullrev
955 and currentbase != base
959 and currentbase != base
956 and self.revlog.length(currentbase) == 0):
960 and self.revlog.length(currentbase) == 0):
957 currentbase = self.revlog.deltaparent(currentbase)
961 currentbase = self.revlog.deltaparent(currentbase)
958 if self.revlog._lazydelta and currentbase == base:
962 if self.revlog._lazydelta and currentbase == base:
959 delta = revinfo.cachedelta[1]
963 delta = revinfo.cachedelta[1]
960 if delta is None:
964 if delta is None:
961 delta = self._builddeltadiff(base, revinfo, fh)
965 delta = self._builddeltadiff(base, revinfo, fh)
962 # snapshotdept need to be neither None nor 0 level snapshot
966 # snapshotdept need to be neither None nor 0 level snapshot
963 if revlog.upperboundcomp is not None and snapshotdepth:
967 if revlog.upperboundcomp is not None and snapshotdepth:
964 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
968 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
965 snapshotlimit = revinfo.textlen >> snapshotdepth
969 snapshotlimit = revinfo.textlen >> snapshotdepth
966 if snapshotlimit < lowestrealisticdeltalen:
970 if snapshotlimit < lowestrealisticdeltalen:
967 return None
971 return None
968 if revlog.length(base) < lowestrealisticdeltalen:
972 if revlog.length(base) < lowestrealisticdeltalen:
969 return None
973 return None
970 header, data = revlog.compress(delta)
974 header, data = revlog.compress(delta)
971 deltalen = len(header) + len(data)
975 deltalen = len(header) + len(data)
972 offset = revlog.end(len(revlog) - 1)
976 offset = revlog.end(len(revlog) - 1)
973 dist = deltalen + offset - revlog.start(chainbase)
977 dist = deltalen + offset - revlog.start(chainbase)
974 chainlen, compresseddeltalen = revlog._chaininfo(base)
978 chainlen, compresseddeltalen = revlog._chaininfo(base)
975 chainlen += 1
979 chainlen += 1
976 compresseddeltalen += deltalen
980 compresseddeltalen += deltalen
977
981
978 return _deltainfo(dist, deltalen, (header, data), deltabase,
982 return _deltainfo(dist, deltalen, (header, data), deltabase,
979 chainbase, chainlen, compresseddeltalen,
983 chainbase, chainlen, compresseddeltalen,
980 snapshotdepth)
984 snapshotdepth)
981
985
982 def _fullsnapshotinfo(self, fh, revinfo):
986 def _fullsnapshotinfo(self, fh, revinfo):
983 curr = len(self.revlog)
987 curr = len(self.revlog)
984 rawtext = self.buildtext(revinfo, fh)
988 rawtext = self.buildtext(revinfo, fh)
985 data = self.revlog.compress(rawtext)
989 data = self.revlog.compress(rawtext)
986 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
990 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
987 deltabase = chainbase = curr
991 deltabase = chainbase = curr
988 snapshotdepth = 0
992 snapshotdepth = 0
989 chainlen = 1
993 chainlen = 1
990
994
991 return _deltainfo(dist, deltalen, data, deltabase,
995 return _deltainfo(dist, deltalen, data, deltabase,
992 chainbase, chainlen, compresseddeltalen,
996 chainbase, chainlen, compresseddeltalen,
993 snapshotdepth)
997 snapshotdepth)
994
998
995 def finddeltainfo(self, revinfo, fh):
999 def finddeltainfo(self, revinfo, fh):
996 """Find an acceptable delta against a candidate revision
1000 """Find an acceptable delta against a candidate revision
997
1001
998 revinfo: information about the revision (instance of _revisioninfo)
1002 revinfo: information about the revision (instance of _revisioninfo)
999 fh: file handle to either the .i or the .d revlog file,
1003 fh: file handle to either the .i or the .d revlog file,
1000 depending on whether it is inlined or not
1004 depending on whether it is inlined or not
1001
1005
1002 Returns the first acceptable candidate revision, as ordered by
1006 Returns the first acceptable candidate revision, as ordered by
1003 _candidategroups
1007 _candidategroups
1004
1008
1005 If no suitable deltabase is found, we return delta info for a full
1009 If no suitable deltabase is found, we return delta info for a full
1006 snapshot.
1010 snapshot.
1007 """
1011 """
1008 if not revinfo.textlen:
1012 if not revinfo.textlen:
1009 return self._fullsnapshotinfo(fh, revinfo)
1013 return self._fullsnapshotinfo(fh, revinfo)
1010
1014
1011 # no delta for flag processor revision (see "candelta" for why)
1015 # no delta for flag processor revision (see "candelta" for why)
1012 # not calling candelta since only one revision needs test, also to
1016 # not calling candelta since only one revision needs test, also to
1013 # avoid overhead fetching flags again.
1017 # avoid overhead fetching flags again.
1014 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1018 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1015 return self._fullsnapshotinfo(fh, revinfo)
1019 return self._fullsnapshotinfo(fh, revinfo)
1016
1020
1017 cachedelta = revinfo.cachedelta
1021 cachedelta = revinfo.cachedelta
1018 p1 = revinfo.p1
1022 p1 = revinfo.p1
1019 p2 = revinfo.p2
1023 p2 = revinfo.p2
1020 revlog = self.revlog
1024 revlog = self.revlog
1021
1025
1022 deltainfo = None
1026 deltainfo = None
1023 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1027 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1024 groups = _candidategroups(self.revlog, revinfo.textlen,
1028 groups = _candidategroups(self.revlog, revinfo.textlen,
1025 p1r, p2r, cachedelta)
1029 p1r, p2r, cachedelta)
1026 candidaterevs = next(groups)
1030 candidaterevs = next(groups)
1027 while candidaterevs is not None:
1031 while candidaterevs is not None:
1028 nominateddeltas = []
1032 nominateddeltas = []
1029 if deltainfo is not None:
1033 if deltainfo is not None:
1030 # if we already found a good delta,
1034 # if we already found a good delta,
1031 # challenge it against refined candidates
1035 # challenge it against refined candidates
1032 nominateddeltas.append(deltainfo)
1036 nominateddeltas.append(deltainfo)
1033 for candidaterev in candidaterevs:
1037 for candidaterev in candidaterevs:
1034 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1038 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1035 if candidatedelta is not None:
1039 if candidatedelta is not None:
1036 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1040 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1037 nominateddeltas.append(candidatedelta)
1041 nominateddeltas.append(candidatedelta)
1038 if nominateddeltas:
1042 if nominateddeltas:
1039 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1043 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1040 if deltainfo is not None:
1044 if deltainfo is not None:
1041 candidaterevs = groups.send(deltainfo.base)
1045 candidaterevs = groups.send(deltainfo.base)
1042 else:
1046 else:
1043 candidaterevs = next(groups)
1047 candidaterevs = next(groups)
1044
1048
1045 if deltainfo is None:
1049 if deltainfo is None:
1046 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1050 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1047 return deltainfo
1051 return deltainfo
@@ -1,206 +1,206 b''
1 # flagutils.py - code to deal with revlog flags and their processors
1 # flagutils.py - code to deal with revlog flags and their processors
2 #
2 #
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 from ..i18n import _
11 from ..i18n import _
12
12
13 from .constants import (
13 from .constants import (
14 REVIDX_DEFAULT_FLAGS,
14 REVIDX_DEFAULT_FLAGS,
15 REVIDX_ELLIPSIS,
15 REVIDX_ELLIPSIS,
16 REVIDX_EXTSTORED,
16 REVIDX_EXTSTORED,
17 REVIDX_FLAGS_ORDER,
17 REVIDX_FLAGS_ORDER,
18 REVIDX_ISCENSORED,
18 REVIDX_ISCENSORED,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 )
20 )
21
21
22 from .. import (
22 from .. import (
23 error,
23 error,
24 util
24 util
25 )
25 )
26
26
27 # blanked usage of all the name to prevent pyflakes constraints
27 # blanked usage of all the name to prevent pyflakes constraints
28 # We need these name available in the module for extensions.
28 # We need these name available in the module for extensions.
29 REVIDX_ISCENSORED
29 REVIDX_ISCENSORED
30 REVIDX_ELLIPSIS
30 REVIDX_ELLIPSIS
31 REVIDX_EXTSTORED
31 REVIDX_EXTSTORED
32 REVIDX_DEFAULT_FLAGS
32 REVIDX_DEFAULT_FLAGS
33 REVIDX_FLAGS_ORDER
33 REVIDX_FLAGS_ORDER
34 REVIDX_RAWTEXT_CHANGING_FLAGS
34 REVIDX_RAWTEXT_CHANGING_FLAGS
35
35
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37
37
38 # Store flag processors (cf. 'addflagprocessor()' to register)
38 # Store flag processors (cf. 'addflagprocessor()' to register)
39 flagprocessors = {
39 flagprocessors = {
40 REVIDX_ISCENSORED: None,
40 REVIDX_ISCENSORED: None,
41 }
41 }
42
42
43 def addflagprocessor(flag, processor):
43 def addflagprocessor(flag, processor):
44 """Register a flag processor on a revision data flag.
44 """Register a flag processor on a revision data flag.
45
45
46 Invariant:
46 Invariant:
47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 - Only one flag processor can be registered on a specific flag.
49 - Only one flag processor can be registered on a specific flag.
50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 following signatures:
51 following signatures:
52 - (read) f(self, rawtext) -> text, bool
52 - (read) f(self, rawtext) -> text, bool
53 - (write) f(self, text) -> rawtext, bool
53 - (write) f(self, text) -> rawtext, bool
54 - (raw) f(self, rawtext) -> bool
54 - (raw) f(self, rawtext) -> bool
55 "text" is presented to the user. "rawtext" is stored in revlog data, not
55 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 directly visible to the user.
56 directly visible to the user.
57 The boolean returned by these transforms is used to determine whether
57 The boolean returned by these transforms is used to determine whether
58 the returned text can be used for hash integrity checking. For example,
58 the returned text can be used for hash integrity checking. For example,
59 if "write" returns False, then "text" is used to generate hash. If
59 if "write" returns False, then "text" is used to generate hash. If
60 "write" returns True, that basically means "rawtext" returned by "write"
60 "write" returns True, that basically means "rawtext" returned by "write"
61 should be used to generate hash. Usually, "write" and "read" return
61 should be used to generate hash. Usually, "write" and "read" return
62 different booleans. And "raw" returns a same boolean as "write".
62 different booleans. And "raw" returns a same boolean as "write".
63
63
64 Note: The 'raw' transform is used for changegroup generation and in some
64 Note: The 'raw' transform is used for changegroup generation and in some
65 debug commands. In this case the transform only indicates whether the
65 debug commands. In this case the transform only indicates whether the
66 contents can be used for hash integrity checks.
66 contents can be used for hash integrity checks.
67 """
67 """
68 insertflagprocessor(flag, processor, flagprocessors)
68 insertflagprocessor(flag, processor, flagprocessors)
69
69
70 def insertflagprocessor(flag, processor, flagprocessors):
70 def insertflagprocessor(flag, processor, flagprocessors):
71 if not flag & REVIDX_KNOWN_FLAGS:
71 if not flag & REVIDX_KNOWN_FLAGS:
72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 raise error.ProgrammingError(msg)
73 raise error.ProgrammingError(msg)
74 if flag not in REVIDX_FLAGS_ORDER:
74 if flag not in REVIDX_FLAGS_ORDER:
75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 raise error.ProgrammingError(msg)
76 raise error.ProgrammingError(msg)
77 if flag in flagprocessors:
77 if flag in flagprocessors:
78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 raise error.Abort(msg)
79 raise error.Abort(msg)
80 flagprocessors[flag] = processor
80 flagprocessors[flag] = processor
81
81
82 class flagprocessorsmixin(object):
82 class flagprocessorsmixin(object):
83 """basic mixin to support revlog flag processing
83 """basic mixin to support revlog flag processing
84
84
85 Make sure the `_flagprocessors` attribute is set at ``__init__`` time.
85 Make sure the `_flagprocessors` attribute is set at ``__init__`` time.
86
86
87 See the documentation of the ``_processflags`` method for details.
87 See the documentation of the ``_processflags`` method for details.
88 """
88 """
89
89
90 _flagserrorclass = error.RevlogError
90 _flagserrorclass = error.RevlogError
91
91
92 def _processflags(self, text, flags, operation, raw=False):
92 def _processflags(self, text, flags, operation, raw=False):
93 """deprecated entry point to access flag processors"""
93 """deprecated entry point to access flag processors"""
94 msg = ('_processflag(...) use the specialized variant')
94 msg = ('_processflag(...) use the specialized variant')
95 util.nouideprecwarn(msg, '5.2', stacklevel=2)
95 util.nouideprecwarn(msg, '5.2', stacklevel=2)
96 if raw:
96 if raw:
97 return text, self._processflagsraw(text, flags)
97 return text, processflagsraw(self, text, flags)
98 elif operation == 'read':
98 elif operation == 'read':
99 return processflagsread(self, text, flags)
99 return processflagsread(self, text, flags)
100 else: # write operation
100 else: # write operation
101 return processflagswrite(self, text, flags)
101 return processflagswrite(self, text, flags)
102
102
103 def _processflagsraw(self, text, flags):
104 """Inspect revision data flags to check is the content hash should be
105 validated.
106
107 ``text`` - the revision data to process
108 ``flags`` - the revision flags
109
110 This method processes the flags in the order (or reverse order if
111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
112 flag processors registered for present flags. The order of flags defined
113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
114
115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
116 processed text and ``validatehash`` is a bool indicating whether the
117 returned text should be checked for hash integrity.
118 """
119 return _processflagsfunc(self, text, flags, 'raw')[1]
120
121 def processflagswrite(revlog, text, flags, sidedata):
103 def processflagswrite(revlog, text, flags, sidedata):
122 """Inspect revision data flags and applies write transformations defined
104 """Inspect revision data flags and applies write transformations defined
123 by registered flag processors.
105 by registered flag processors.
124
106
125 ``text`` - the revision data to process
107 ``text`` - the revision data to process
126 ``flags`` - the revision flags
108 ``flags`` - the revision flags
127
109
128 This method processes the flags in the order (or reverse order if
110 This method processes the flags in the order (or reverse order if
129 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
130 flag processors registered for present flags. The order of flags defined
112 flag processors registered for present flags. The order of flags defined
131 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
132
114
133 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
134 processed text and ``validatehash`` is a bool indicating whether the
116 processed text and ``validatehash`` is a bool indicating whether the
135 returned text should be checked for hash integrity.
117 returned text should be checked for hash integrity.
136 """
118 """
137 return _processflagsfunc(revlog, text, flags, 'write',
119 return _processflagsfunc(revlog, text, flags, 'write',
138 sidedata=sidedata)[:2]
120 sidedata=sidedata)[:2]
139
121
140 def processflagsread(revlog, text, flags):
122 def processflagsread(revlog, text, flags):
141 """Inspect revision data flags and applies read transformations defined
123 """Inspect revision data flags and applies read transformations defined
142 by registered flag processors.
124 by registered flag processors.
143
125
144 ``text`` - the revision data to process
126 ``text`` - the revision data to process
145 ``flags`` - the revision flags
127 ``flags`` - the revision flags
146 ``raw`` - an optional argument describing if the raw transform should be
128 ``raw`` - an optional argument describing if the raw transform should be
147 applied.
129 applied.
148
130
149 This method processes the flags in the order (or reverse order if
131 This method processes the flags in the order (or reverse order if
150 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
132 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
151 flag processors registered for present flags. The order of flags defined
133 flag processors registered for present flags. The order of flags defined
152 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
134 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
153
135
154 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
136 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
155 processed text and ``validatehash`` is a bool indicating whether the
137 processed text and ``validatehash`` is a bool indicating whether the
156 returned text should be checked for hash integrity.
138 returned text should be checked for hash integrity.
157 """
139 """
158 return _processflagsfunc(revlog, text, flags, 'read')
140 return _processflagsfunc(revlog, text, flags, 'read')
159
141
142 def processflagsraw(revlog, text, flags):
143 """Inspect revision data flags to check is the content hash should be
144 validated.
145
146 ``text`` - the revision data to process
147 ``flags`` - the revision flags
148
149 This method processes the flags in the order (or reverse order if
150 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
151 flag processors registered for present flags. The order of flags defined
152 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
153
154 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
155 processed text and ``validatehash`` is a bool indicating whether the
156 returned text should be checked for hash integrity.
157 """
158 return _processflagsfunc(revlog, text, flags, 'raw')[1]
159
160 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
160 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
161 """internal function to process flag on a revlog
161 """internal function to process flag on a revlog
162
162
163 This function is private to this module, code should never needs to call it
163 This function is private to this module, code should never needs to call it
164 directly."""
164 directly."""
165 # fast path: no flag processors will run
165 # fast path: no flag processors will run
166 if flags == 0:
166 if flags == 0:
167 return text, True, {}
167 return text, True, {}
168 if operation not in ('read', 'write', 'raw'):
168 if operation not in ('read', 'write', 'raw'):
169 raise error.ProgrammingError(_("invalid '%s' operation") %
169 raise error.ProgrammingError(_("invalid '%s' operation") %
170 operation)
170 operation)
171 # Check all flags are known.
171 # Check all flags are known.
172 if flags & ~REVIDX_KNOWN_FLAGS:
172 if flags & ~REVIDX_KNOWN_FLAGS:
173 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
173 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
174 (flags & ~REVIDX_KNOWN_FLAGS))
174 (flags & ~REVIDX_KNOWN_FLAGS))
175 validatehash = True
175 validatehash = True
176 # Depending on the operation (read or write), the order might be
176 # Depending on the operation (read or write), the order might be
177 # reversed due to non-commutative transforms.
177 # reversed due to non-commutative transforms.
178 orderedflags = REVIDX_FLAGS_ORDER
178 orderedflags = REVIDX_FLAGS_ORDER
179 if operation == 'write':
179 if operation == 'write':
180 orderedflags = reversed(orderedflags)
180 orderedflags = reversed(orderedflags)
181
181
182 outsidedata = {}
182 outsidedata = {}
183 for flag in orderedflags:
183 for flag in orderedflags:
184 # If a flagprocessor has been registered for a known flag, apply the
184 # If a flagprocessor has been registered for a known flag, apply the
185 # related operation transform and update result tuple.
185 # related operation transform and update result tuple.
186 if flag & flags:
186 if flag & flags:
187 vhash = True
187 vhash = True
188
188
189 if flag not in revlog._flagprocessors:
189 if flag not in revlog._flagprocessors:
190 message = _("missing processor for flag '%#x'") % (flag)
190 message = _("missing processor for flag '%#x'") % (flag)
191 raise revlog._flagserrorclass(message)
191 raise revlog._flagserrorclass(message)
192
192
193 processor = revlog._flagprocessors[flag]
193 processor = revlog._flagprocessors[flag]
194 if processor is not None:
194 if processor is not None:
195 readtransform, writetransform, rawtransform = processor
195 readtransform, writetransform, rawtransform = processor
196
196
197 if operation == 'raw':
197 if operation == 'raw':
198 vhash = rawtransform(revlog, text)
198 vhash = rawtransform(revlog, text)
199 elif operation == 'read':
199 elif operation == 'read':
200 text, vhash, s = readtransform(revlog, text)
200 text, vhash, s = readtransform(revlog, text)
201 outsidedata.update(s)
201 outsidedata.update(s)
202 else: # write operation
202 else: # write operation
203 text, vhash = writetransform(revlog, text, sidedata)
203 text, vhash = writetransform(revlog, text, sidedata)
204 validatehash = validatehash and vhash
204 validatehash = validatehash and vhash
205
205
206 return text, validatehash, outsidedata
206 return text, validatehash, outsidedata
@@ -1,683 +1,683 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 attr,
25 attr,
26 )
26 )
27 from mercurial import (
27 from mercurial import (
28 ancestor,
28 ancestor,
29 bundlerepo,
29 bundlerepo,
30 error,
30 error,
31 extensions,
31 extensions,
32 localrepo,
32 localrepo,
33 mdiff,
33 mdiff,
34 pycompat,
34 pycompat,
35 revlog,
35 revlog,
36 store,
36 store,
37 verify,
37 verify,
38 )
38 )
39 from mercurial.interfaces import (
39 from mercurial.interfaces import (
40 repository,
40 repository,
41 util as interfaceutil,
41 util as interfaceutil,
42 )
42 )
43 from mercurial.utils import (
43 from mercurial.utils import (
44 cborutil,
44 cborutil,
45 storageutil,
45 storageutil,
46 )
46 )
47 from mercurial.revlogutils import (
47 from mercurial.revlogutils import (
48 flagutil,
48 flagutil,
49 )
49 )
50
50
51 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
51 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
52 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
52 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
53 # be specifying the version(s) of Mercurial they are tested with, or
53 # be specifying the version(s) of Mercurial they are tested with, or
54 # leave the attribute unspecified.
54 # leave the attribute unspecified.
55 testedwith = 'ships-with-hg-core'
55 testedwith = 'ships-with-hg-core'
56
56
57 REQUIREMENT = 'testonly-simplestore'
57 REQUIREMENT = 'testonly-simplestore'
58
58
59 def validatenode(node):
59 def validatenode(node):
60 if isinstance(node, int):
60 if isinstance(node, int):
61 raise ValueError('expected node; got int')
61 raise ValueError('expected node; got int')
62
62
63 if len(node) != 20:
63 if len(node) != 20:
64 raise ValueError('expected 20 byte node')
64 raise ValueError('expected 20 byte node')
65
65
66 def validaterev(rev):
66 def validaterev(rev):
67 if not isinstance(rev, int):
67 if not isinstance(rev, int):
68 raise ValueError('expected int')
68 raise ValueError('expected int')
69
69
70 class simplestoreerror(error.StorageError):
70 class simplestoreerror(error.StorageError):
71 pass
71 pass
72
72
73 @interfaceutil.implementer(repository.irevisiondelta)
73 @interfaceutil.implementer(repository.irevisiondelta)
74 @attr.s(slots=True)
74 @attr.s(slots=True)
75 class simplestorerevisiondelta(object):
75 class simplestorerevisiondelta(object):
76 node = attr.ib()
76 node = attr.ib()
77 p1node = attr.ib()
77 p1node = attr.ib()
78 p2node = attr.ib()
78 p2node = attr.ib()
79 basenode = attr.ib()
79 basenode = attr.ib()
80 flags = attr.ib()
80 flags = attr.ib()
81 baserevisionsize = attr.ib()
81 baserevisionsize = attr.ib()
82 revision = attr.ib()
82 revision = attr.ib()
83 delta = attr.ib()
83 delta = attr.ib()
84 linknode = attr.ib(default=None)
84 linknode = attr.ib(default=None)
85
85
86 @interfaceutil.implementer(repository.iverifyproblem)
86 @interfaceutil.implementer(repository.iverifyproblem)
87 @attr.s(frozen=True)
87 @attr.s(frozen=True)
88 class simplefilestoreproblem(object):
88 class simplefilestoreproblem(object):
89 warning = attr.ib(default=None)
89 warning = attr.ib(default=None)
90 error = attr.ib(default=None)
90 error = attr.ib(default=None)
91 node = attr.ib(default=None)
91 node = attr.ib(default=None)
92
92
93 @interfaceutil.implementer(repository.ifilestorage)
93 @interfaceutil.implementer(repository.ifilestorage)
94 class filestorage(flagutil.flagprocessorsmixin):
94 class filestorage(flagutil.flagprocessorsmixin):
95 """Implements storage for a tracked path.
95 """Implements storage for a tracked path.
96
96
97 Data is stored in the VFS in a directory corresponding to the tracked
97 Data is stored in the VFS in a directory corresponding to the tracked
98 path.
98 path.
99
99
100 Index data is stored in an ``index`` file using CBOR.
100 Index data is stored in an ``index`` file using CBOR.
101
101
102 Fulltext data is stored in files having names of the node.
102 Fulltext data is stored in files having names of the node.
103 """
103 """
104
104
105 _flagserrorclass = simplestoreerror
105 _flagserrorclass = simplestoreerror
106
106
107 def __init__(self, svfs, path):
107 def __init__(self, svfs, path):
108 self._svfs = svfs
108 self._svfs = svfs
109 self._path = path
109 self._path = path
110
110
111 self._storepath = b'/'.join([b'data', path])
111 self._storepath = b'/'.join([b'data', path])
112 self._indexpath = b'/'.join([self._storepath, b'index'])
112 self._indexpath = b'/'.join([self._storepath, b'index'])
113
113
114 indexdata = self._svfs.tryread(self._indexpath)
114 indexdata = self._svfs.tryread(self._indexpath)
115 if indexdata:
115 if indexdata:
116 indexdata = cborutil.decodeall(indexdata)
116 indexdata = cborutil.decodeall(indexdata)
117
117
118 self._indexdata = indexdata or []
118 self._indexdata = indexdata or []
119 self._indexbynode = {}
119 self._indexbynode = {}
120 self._indexbyrev = {}
120 self._indexbyrev = {}
121 self._index = []
121 self._index = []
122 self._refreshindex()
122 self._refreshindex()
123
123
124 self._flagprocessors = dict(flagutil.flagprocessors)
124 self._flagprocessors = dict(flagutil.flagprocessors)
125
125
126 def _refreshindex(self):
126 def _refreshindex(self):
127 self._indexbynode.clear()
127 self._indexbynode.clear()
128 self._indexbyrev.clear()
128 self._indexbyrev.clear()
129 self._index = []
129 self._index = []
130
130
131 for i, entry in enumerate(self._indexdata):
131 for i, entry in enumerate(self._indexdata):
132 self._indexbynode[entry[b'node']] = entry
132 self._indexbynode[entry[b'node']] = entry
133 self._indexbyrev[i] = entry
133 self._indexbyrev[i] = entry
134
134
135 self._indexbynode[nullid] = {
135 self._indexbynode[nullid] = {
136 b'node': nullid,
136 b'node': nullid,
137 b'p1': nullid,
137 b'p1': nullid,
138 b'p2': nullid,
138 b'p2': nullid,
139 b'linkrev': nullrev,
139 b'linkrev': nullrev,
140 b'flags': 0,
140 b'flags': 0,
141 }
141 }
142
142
143 self._indexbyrev[nullrev] = {
143 self._indexbyrev[nullrev] = {
144 b'node': nullid,
144 b'node': nullid,
145 b'p1': nullid,
145 b'p1': nullid,
146 b'p2': nullid,
146 b'p2': nullid,
147 b'linkrev': nullrev,
147 b'linkrev': nullrev,
148 b'flags': 0,
148 b'flags': 0,
149 }
149 }
150
150
151 for i, entry in enumerate(self._indexdata):
151 for i, entry in enumerate(self._indexdata):
152 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
152 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
153
153
154 # start, length, rawsize, chainbase, linkrev, p1, p2, node
154 # start, length, rawsize, chainbase, linkrev, p1, p2, node
155 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
155 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
156 entry[b'node']))
156 entry[b'node']))
157
157
158 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
158 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
159
159
160 def __len__(self):
160 def __len__(self):
161 return len(self._indexdata)
161 return len(self._indexdata)
162
162
163 def __iter__(self):
163 def __iter__(self):
164 return iter(range(len(self)))
164 return iter(range(len(self)))
165
165
166 def revs(self, start=0, stop=None):
166 def revs(self, start=0, stop=None):
167 step = 1
167 step = 1
168 if stop is not None:
168 if stop is not None:
169 if start > stop:
169 if start > stop:
170 step = -1
170 step = -1
171
171
172 stop += step
172 stop += step
173 else:
173 else:
174 stop = len(self)
174 stop = len(self)
175
175
176 return range(start, stop, step)
176 return range(start, stop, step)
177
177
178 def parents(self, node):
178 def parents(self, node):
179 validatenode(node)
179 validatenode(node)
180
180
181 if node not in self._indexbynode:
181 if node not in self._indexbynode:
182 raise KeyError('unknown node')
182 raise KeyError('unknown node')
183
183
184 entry = self._indexbynode[node]
184 entry = self._indexbynode[node]
185
185
186 return entry[b'p1'], entry[b'p2']
186 return entry[b'p1'], entry[b'p2']
187
187
188 def parentrevs(self, rev):
188 def parentrevs(self, rev):
189 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
189 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
190 return self.rev(p1), self.rev(p2)
190 return self.rev(p1), self.rev(p2)
191
191
192 def rev(self, node):
192 def rev(self, node):
193 validatenode(node)
193 validatenode(node)
194
194
195 try:
195 try:
196 self._indexbynode[node]
196 self._indexbynode[node]
197 except KeyError:
197 except KeyError:
198 raise error.LookupError(node, self._indexpath, _('no node'))
198 raise error.LookupError(node, self._indexpath, _('no node'))
199
199
200 for rev, entry in self._indexbyrev.items():
200 for rev, entry in self._indexbyrev.items():
201 if entry[b'node'] == node:
201 if entry[b'node'] == node:
202 return rev
202 return rev
203
203
204 raise error.ProgrammingError('this should not occur')
204 raise error.ProgrammingError('this should not occur')
205
205
206 def node(self, rev):
206 def node(self, rev):
207 validaterev(rev)
207 validaterev(rev)
208
208
209 return self._indexbyrev[rev][b'node']
209 return self._indexbyrev[rev][b'node']
210
210
211 def hasnode(self, node):
211 def hasnode(self, node):
212 validatenode(node)
212 validatenode(node)
213 return node in self._indexbynode
213 return node in self._indexbynode
214
214
215 def censorrevision(self, tr, censornode, tombstone=b''):
215 def censorrevision(self, tr, censornode, tombstone=b''):
216 raise NotImplementedError('TODO')
216 raise NotImplementedError('TODO')
217
217
218 def lookup(self, node):
218 def lookup(self, node):
219 if isinstance(node, int):
219 if isinstance(node, int):
220 return self.node(node)
220 return self.node(node)
221
221
222 if len(node) == 20:
222 if len(node) == 20:
223 self.rev(node)
223 self.rev(node)
224 return node
224 return node
225
225
226 try:
226 try:
227 rev = int(node)
227 rev = int(node)
228 if '%d' % rev != node:
228 if '%d' % rev != node:
229 raise ValueError
229 raise ValueError
230
230
231 if rev < 0:
231 if rev < 0:
232 rev = len(self) + rev
232 rev = len(self) + rev
233 if rev < 0 or rev >= len(self):
233 if rev < 0 or rev >= len(self):
234 raise ValueError
234 raise ValueError
235
235
236 return self.node(rev)
236 return self.node(rev)
237 except (ValueError, OverflowError):
237 except (ValueError, OverflowError):
238 pass
238 pass
239
239
240 if len(node) == 40:
240 if len(node) == 40:
241 try:
241 try:
242 rawnode = bin(node)
242 rawnode = bin(node)
243 self.rev(rawnode)
243 self.rev(rawnode)
244 return rawnode
244 return rawnode
245 except TypeError:
245 except TypeError:
246 pass
246 pass
247
247
248 raise error.LookupError(node, self._path, _('invalid lookup input'))
248 raise error.LookupError(node, self._path, _('invalid lookup input'))
249
249
250 def linkrev(self, rev):
250 def linkrev(self, rev):
251 validaterev(rev)
251 validaterev(rev)
252
252
253 return self._indexbyrev[rev][b'linkrev']
253 return self._indexbyrev[rev][b'linkrev']
254
254
255 def _flags(self, rev):
255 def _flags(self, rev):
256 validaterev(rev)
256 validaterev(rev)
257
257
258 return self._indexbyrev[rev][b'flags']
258 return self._indexbyrev[rev][b'flags']
259
259
260 def _candelta(self, baserev, rev):
260 def _candelta(self, baserev, rev):
261 validaterev(baserev)
261 validaterev(baserev)
262 validaterev(rev)
262 validaterev(rev)
263
263
264 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
264 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
265 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
265 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
266 return False
266 return False
267
267
268 return True
268 return True
269
269
270 def checkhash(self, text, node, p1=None, p2=None, rev=None):
270 def checkhash(self, text, node, p1=None, p2=None, rev=None):
271 if p1 is None and p2 is None:
271 if p1 is None and p2 is None:
272 p1, p2 = self.parents(node)
272 p1, p2 = self.parents(node)
273 if node != storageutil.hashrevisionsha1(text, p1, p2):
273 if node != storageutil.hashrevisionsha1(text, p1, p2):
274 raise simplestoreerror(_("integrity check failed on %s") %
274 raise simplestoreerror(_("integrity check failed on %s") %
275 self._path)
275 self._path)
276
276
277 def revision(self, nodeorrev, raw=False):
277 def revision(self, nodeorrev, raw=False):
278 if isinstance(nodeorrev, int):
278 if isinstance(nodeorrev, int):
279 node = self.node(nodeorrev)
279 node = self.node(nodeorrev)
280 else:
280 else:
281 node = nodeorrev
281 node = nodeorrev
282 validatenode(node)
282 validatenode(node)
283
283
284 if node == nullid:
284 if node == nullid:
285 return b''
285 return b''
286
286
287 rev = self.rev(node)
287 rev = self.rev(node)
288 flags = self._flags(rev)
288 flags = self._flags(rev)
289
289
290 path = b'/'.join([self._storepath, hex(node)])
290 path = b'/'.join([self._storepath, hex(node)])
291 rawtext = self._svfs.read(path)
291 rawtext = self._svfs.read(path)
292
292
293 if raw:
293 if raw:
294 validatehash = self._processflagsraw(rawtext, flags)
294 validatehash = flagutil.processflagsraw(self, rawtext, flags)
295 text = rawtext
295 text = rawtext
296 else:
296 else:
297 r = flagutil.processflagsread(self, rawtext, flags)
297 r = flagutil.processflagsread(self, rawtext, flags)
298 text, validatehash, sidedata = r
298 text, validatehash, sidedata = r
299 if validatehash:
299 if validatehash:
300 self.checkhash(text, node, rev=rev)
300 self.checkhash(text, node, rev=rev)
301
301
302 return text
302 return text
303
303
304 def rawdata(self, nodeorrev):
304 def rawdata(self, nodeorrev):
305 return self.revision(raw=True)
305 return self.revision(raw=True)
306
306
307 def read(self, node):
307 def read(self, node):
308 validatenode(node)
308 validatenode(node)
309
309
310 revision = self.revision(node)
310 revision = self.revision(node)
311
311
312 if not revision.startswith(b'\1\n'):
312 if not revision.startswith(b'\1\n'):
313 return revision
313 return revision
314
314
315 start = revision.index(b'\1\n', 2)
315 start = revision.index(b'\1\n', 2)
316 return revision[start + 2:]
316 return revision[start + 2:]
317
317
318 def renamed(self, node):
318 def renamed(self, node):
319 validatenode(node)
319 validatenode(node)
320
320
321 if self.parents(node)[0] != nullid:
321 if self.parents(node)[0] != nullid:
322 return False
322 return False
323
323
324 fulltext = self.revision(node)
324 fulltext = self.revision(node)
325 m = storageutil.parsemeta(fulltext)[0]
325 m = storageutil.parsemeta(fulltext)[0]
326
326
327 if m and 'copy' in m:
327 if m and 'copy' in m:
328 return m['copy'], bin(m['copyrev'])
328 return m['copy'], bin(m['copyrev'])
329
329
330 return False
330 return False
331
331
332 def cmp(self, node, text):
332 def cmp(self, node, text):
333 validatenode(node)
333 validatenode(node)
334
334
335 t = text
335 t = text
336
336
337 if text.startswith(b'\1\n'):
337 if text.startswith(b'\1\n'):
338 t = b'\1\n\1\n' + text
338 t = b'\1\n\1\n' + text
339
339
340 p1, p2 = self.parents(node)
340 p1, p2 = self.parents(node)
341
341
342 if storageutil.hashrevisionsha1(t, p1, p2) == node:
342 if storageutil.hashrevisionsha1(t, p1, p2) == node:
343 return False
343 return False
344
344
345 if self.iscensored(self.rev(node)):
345 if self.iscensored(self.rev(node)):
346 return text != b''
346 return text != b''
347
347
348 if self.renamed(node):
348 if self.renamed(node):
349 t2 = self.read(node)
349 t2 = self.read(node)
350 return t2 != text
350 return t2 != text
351
351
352 return True
352 return True
353
353
354 def size(self, rev):
354 def size(self, rev):
355 validaterev(rev)
355 validaterev(rev)
356
356
357 node = self._indexbyrev[rev][b'node']
357 node = self._indexbyrev[rev][b'node']
358
358
359 if self.renamed(node):
359 if self.renamed(node):
360 return len(self.read(node))
360 return len(self.read(node))
361
361
362 if self.iscensored(rev):
362 if self.iscensored(rev):
363 return 0
363 return 0
364
364
365 return len(self.revision(node))
365 return len(self.revision(node))
366
366
367 def iscensored(self, rev):
367 def iscensored(self, rev):
368 validaterev(rev)
368 validaterev(rev)
369
369
370 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
370 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
371
371
372 def commonancestorsheads(self, a, b):
372 def commonancestorsheads(self, a, b):
373 validatenode(a)
373 validatenode(a)
374 validatenode(b)
374 validatenode(b)
375
375
376 a = self.rev(a)
376 a = self.rev(a)
377 b = self.rev(b)
377 b = self.rev(b)
378
378
379 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
379 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
380 return pycompat.maplist(self.node, ancestors)
380 return pycompat.maplist(self.node, ancestors)
381
381
382 def descendants(self, revs):
382 def descendants(self, revs):
383 # This is a copy of revlog.descendants()
383 # This is a copy of revlog.descendants()
384 first = min(revs)
384 first = min(revs)
385 if first == nullrev:
385 if first == nullrev:
386 for i in self:
386 for i in self:
387 yield i
387 yield i
388 return
388 return
389
389
390 seen = set(revs)
390 seen = set(revs)
391 for i in self.revs(start=first + 1):
391 for i in self.revs(start=first + 1):
392 for x in self.parentrevs(i):
392 for x in self.parentrevs(i):
393 if x != nullrev and x in seen:
393 if x != nullrev and x in seen:
394 seen.add(i)
394 seen.add(i)
395 yield i
395 yield i
396 break
396 break
397
397
398 # Required by verify.
398 # Required by verify.
399 def files(self):
399 def files(self):
400 entries = self._svfs.listdir(self._storepath)
400 entries = self._svfs.listdir(self._storepath)
401
401
402 # Strip out undo.backup.* files created as part of transaction
402 # Strip out undo.backup.* files created as part of transaction
403 # recording.
403 # recording.
404 entries = [f for f in entries if not f.startswith('undo.backup.')]
404 entries = [f for f in entries if not f.startswith('undo.backup.')]
405
405
406 return [b'/'.join((self._storepath, f)) for f in entries]
406 return [b'/'.join((self._storepath, f)) for f in entries]
407
407
408 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
408 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
409 revisionscount=False, trackedsize=False,
409 revisionscount=False, trackedsize=False,
410 storedsize=False):
410 storedsize=False):
411 # TODO do a real implementation of this
411 # TODO do a real implementation of this
412 return {
412 return {
413 'exclusivefiles': [],
413 'exclusivefiles': [],
414 'sharedfiles': [],
414 'sharedfiles': [],
415 'revisionscount': len(self),
415 'revisionscount': len(self),
416 'trackedsize': 0,
416 'trackedsize': 0,
417 'storedsize': None,
417 'storedsize': None,
418 }
418 }
419
419
420 def verifyintegrity(self, state):
420 def verifyintegrity(self, state):
421 state['skipread'] = set()
421 state['skipread'] = set()
422 for rev in self:
422 for rev in self:
423 node = self.node(rev)
423 node = self.node(rev)
424 try:
424 try:
425 self.revision(node)
425 self.revision(node)
426 except Exception as e:
426 except Exception as e:
427 yield simplefilestoreproblem(
427 yield simplefilestoreproblem(
428 error='unpacking %s: %s' % (node, e),
428 error='unpacking %s: %s' % (node, e),
429 node=node)
429 node=node)
430 state['skipread'].add(node)
430 state['skipread'].add(node)
431
431
432 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
432 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
433 assumehaveparentrevisions=False,
433 assumehaveparentrevisions=False,
434 deltamode=repository.CG_DELTAMODE_STD):
434 deltamode=repository.CG_DELTAMODE_STD):
435 # TODO this will probably break on some ordering options.
435 # TODO this will probably break on some ordering options.
436 nodes = [n for n in nodes if n != nullid]
436 nodes = [n for n in nodes if n != nullid]
437 if not nodes:
437 if not nodes:
438 return
438 return
439 for delta in storageutil.emitrevisions(
439 for delta in storageutil.emitrevisions(
440 self, nodes, nodesorder, simplestorerevisiondelta,
440 self, nodes, nodesorder, simplestorerevisiondelta,
441 revisiondata=revisiondata,
441 revisiondata=revisiondata,
442 assumehaveparentrevisions=assumehaveparentrevisions,
442 assumehaveparentrevisions=assumehaveparentrevisions,
443 deltamode=deltamode):
443 deltamode=deltamode):
444 yield delta
444 yield delta
445
445
446 def add(self, text, meta, transaction, linkrev, p1, p2):
446 def add(self, text, meta, transaction, linkrev, p1, p2):
447 if meta or text.startswith(b'\1\n'):
447 if meta or text.startswith(b'\1\n'):
448 text = storageutil.packmeta(meta, text)
448 text = storageutil.packmeta(meta, text)
449
449
450 return self.addrevision(text, transaction, linkrev, p1, p2)
450 return self.addrevision(text, transaction, linkrev, p1, p2)
451
451
452 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
452 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
453 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
453 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
454 validatenode(p1)
454 validatenode(p1)
455 validatenode(p2)
455 validatenode(p2)
456
456
457 if flags:
457 if flags:
458 node = node or storageutil.hashrevisionsha1(text, p1, p2)
458 node = node or storageutil.hashrevisionsha1(text, p1, p2)
459
459
460 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
460 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
461
461
462 node = node or storageutil.hashrevisionsha1(text, p1, p2)
462 node = node or storageutil.hashrevisionsha1(text, p1, p2)
463
463
464 if node in self._indexbynode:
464 if node in self._indexbynode:
465 return node
465 return node
466
466
467 if validatehash:
467 if validatehash:
468 self.checkhash(rawtext, node, p1=p1, p2=p2)
468 self.checkhash(rawtext, node, p1=p1, p2=p2)
469
469
470 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
470 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
471 flags)
471 flags)
472
472
473 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
473 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
474 transaction.addbackup(self._indexpath)
474 transaction.addbackup(self._indexpath)
475
475
476 path = b'/'.join([self._storepath, hex(node)])
476 path = b'/'.join([self._storepath, hex(node)])
477
477
478 self._svfs.write(path, rawtext)
478 self._svfs.write(path, rawtext)
479
479
480 self._indexdata.append({
480 self._indexdata.append({
481 b'node': node,
481 b'node': node,
482 b'p1': p1,
482 b'p1': p1,
483 b'p2': p2,
483 b'p2': p2,
484 b'linkrev': link,
484 b'linkrev': link,
485 b'flags': flags,
485 b'flags': flags,
486 })
486 })
487
487
488 self._reflectindexupdate()
488 self._reflectindexupdate()
489
489
490 return node
490 return node
491
491
492 def _reflectindexupdate(self):
492 def _reflectindexupdate(self):
493 self._refreshindex()
493 self._refreshindex()
494 self._svfs.write(self._indexpath,
494 self._svfs.write(self._indexpath,
495 ''.join(cborutil.streamencode(self._indexdata)))
495 ''.join(cborutil.streamencode(self._indexdata)))
496
496
497 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
497 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
498 maybemissingparents=False):
498 maybemissingparents=False):
499 if maybemissingparents:
499 if maybemissingparents:
500 raise error.Abort(_('simple store does not support missing parents '
500 raise error.Abort(_('simple store does not support missing parents '
501 'write mode'))
501 'write mode'))
502
502
503 nodes = []
503 nodes = []
504
504
505 transaction.addbackup(self._indexpath)
505 transaction.addbackup(self._indexpath)
506
506
507 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
507 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
508 linkrev = linkmapper(linknode)
508 linkrev = linkmapper(linknode)
509 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
509 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
510
510
511 nodes.append(node)
511 nodes.append(node)
512
512
513 if node in self._indexbynode:
513 if node in self._indexbynode:
514 continue
514 continue
515
515
516 # Need to resolve the fulltext from the delta base.
516 # Need to resolve the fulltext from the delta base.
517 if deltabase == nullid:
517 if deltabase == nullid:
518 text = mdiff.patch(b'', delta)
518 text = mdiff.patch(b'', delta)
519 else:
519 else:
520 text = mdiff.patch(self.revision(deltabase), delta)
520 text = mdiff.patch(self.revision(deltabase), delta)
521
521
522 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
522 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
523 flags)
523 flags)
524
524
525 if addrevisioncb:
525 if addrevisioncb:
526 addrevisioncb(self, node)
526 addrevisioncb(self, node)
527 return nodes
527 return nodes
528
528
529 def _headrevs(self):
529 def _headrevs(self):
530 # Assume all revisions are heads by default.
530 # Assume all revisions are heads by default.
531 revishead = {rev: True for rev in self._indexbyrev}
531 revishead = {rev: True for rev in self._indexbyrev}
532
532
533 for rev, entry in self._indexbyrev.items():
533 for rev, entry in self._indexbyrev.items():
534 # Unset head flag for all seen parents.
534 # Unset head flag for all seen parents.
535 revishead[self.rev(entry[b'p1'])] = False
535 revishead[self.rev(entry[b'p1'])] = False
536 revishead[self.rev(entry[b'p2'])] = False
536 revishead[self.rev(entry[b'p2'])] = False
537
537
538 return [rev for rev, ishead in sorted(revishead.items())
538 return [rev for rev, ishead in sorted(revishead.items())
539 if ishead]
539 if ishead]
540
540
541 def heads(self, start=None, stop=None):
541 def heads(self, start=None, stop=None):
542 # This is copied from revlog.py.
542 # This is copied from revlog.py.
543 if start is None and stop is None:
543 if start is None and stop is None:
544 if not len(self):
544 if not len(self):
545 return [nullid]
545 return [nullid]
546 return [self.node(r) for r in self._headrevs()]
546 return [self.node(r) for r in self._headrevs()]
547
547
548 if start is None:
548 if start is None:
549 start = nullid
549 start = nullid
550 if stop is None:
550 if stop is None:
551 stop = []
551 stop = []
552 stoprevs = set([self.rev(n) for n in stop])
552 stoprevs = set([self.rev(n) for n in stop])
553 startrev = self.rev(start)
553 startrev = self.rev(start)
554 reachable = {startrev}
554 reachable = {startrev}
555 heads = {startrev}
555 heads = {startrev}
556
556
557 parentrevs = self.parentrevs
557 parentrevs = self.parentrevs
558 for r in self.revs(start=startrev + 1):
558 for r in self.revs(start=startrev + 1):
559 for p in parentrevs(r):
559 for p in parentrevs(r):
560 if p in reachable:
560 if p in reachable:
561 if r not in stoprevs:
561 if r not in stoprevs:
562 reachable.add(r)
562 reachable.add(r)
563 heads.add(r)
563 heads.add(r)
564 if p in heads and p not in stoprevs:
564 if p in heads and p not in stoprevs:
565 heads.remove(p)
565 heads.remove(p)
566
566
567 return [self.node(r) for r in heads]
567 return [self.node(r) for r in heads]
568
568
569 def children(self, node):
569 def children(self, node):
570 validatenode(node)
570 validatenode(node)
571
571
572 # This is a copy of revlog.children().
572 # This is a copy of revlog.children().
573 c = []
573 c = []
574 p = self.rev(node)
574 p = self.rev(node)
575 for r in self.revs(start=p + 1):
575 for r in self.revs(start=p + 1):
576 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
576 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
577 if prevs:
577 if prevs:
578 for pr in prevs:
578 for pr in prevs:
579 if pr == p:
579 if pr == p:
580 c.append(self.node(r))
580 c.append(self.node(r))
581 elif p == nullrev:
581 elif p == nullrev:
582 c.append(self.node(r))
582 c.append(self.node(r))
583 return c
583 return c
584
584
585 def getstrippoint(self, minlink):
585 def getstrippoint(self, minlink):
586 return storageutil.resolvestripinfo(
586 return storageutil.resolvestripinfo(
587 minlink, len(self) - 1, self._headrevs(), self.linkrev,
587 minlink, len(self) - 1, self._headrevs(), self.linkrev,
588 self.parentrevs)
588 self.parentrevs)
589
589
590 def strip(self, minlink, transaction):
590 def strip(self, minlink, transaction):
591 if not len(self):
591 if not len(self):
592 return
592 return
593
593
594 rev, _ignored = self.getstrippoint(minlink)
594 rev, _ignored = self.getstrippoint(minlink)
595 if rev == len(self):
595 if rev == len(self):
596 return
596 return
597
597
598 # Purge index data starting at the requested revision.
598 # Purge index data starting at the requested revision.
599 self._indexdata[rev:] = []
599 self._indexdata[rev:] = []
600 self._reflectindexupdate()
600 self._reflectindexupdate()
601
601
602 def issimplestorefile(f, kind, st):
602 def issimplestorefile(f, kind, st):
603 if kind != stat.S_IFREG:
603 if kind != stat.S_IFREG:
604 return False
604 return False
605
605
606 if store.isrevlog(f, kind, st):
606 if store.isrevlog(f, kind, st):
607 return False
607 return False
608
608
609 # Ignore transaction undo files.
609 # Ignore transaction undo files.
610 if f.startswith('undo.'):
610 if f.startswith('undo.'):
611 return False
611 return False
612
612
613 # Otherwise assume it belongs to the simple store.
613 # Otherwise assume it belongs to the simple store.
614 return True
614 return True
615
615
616 class simplestore(store.encodedstore):
616 class simplestore(store.encodedstore):
617 def datafiles(self):
617 def datafiles(self):
618 for x in super(simplestore, self).datafiles():
618 for x in super(simplestore, self).datafiles():
619 yield x
619 yield x
620
620
621 # Supplement with non-revlog files.
621 # Supplement with non-revlog files.
622 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
622 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
623
623
624 for unencoded, encoded, size in extrafiles:
624 for unencoded, encoded, size in extrafiles:
625 try:
625 try:
626 unencoded = store.decodefilename(unencoded)
626 unencoded = store.decodefilename(unencoded)
627 except KeyError:
627 except KeyError:
628 unencoded = None
628 unencoded = None
629
629
630 yield unencoded, encoded, size
630 yield unencoded, encoded, size
631
631
632 def reposetup(ui, repo):
632 def reposetup(ui, repo):
633 if not repo.local():
633 if not repo.local():
634 return
634 return
635
635
636 if isinstance(repo, bundlerepo.bundlerepository):
636 if isinstance(repo, bundlerepo.bundlerepository):
637 raise error.Abort(_('cannot use simple store with bundlerepo'))
637 raise error.Abort(_('cannot use simple store with bundlerepo'))
638
638
639 class simplestorerepo(repo.__class__):
639 class simplestorerepo(repo.__class__):
640 def file(self, f):
640 def file(self, f):
641 return filestorage(self.svfs, f)
641 return filestorage(self.svfs, f)
642
642
643 repo.__class__ = simplestorerepo
643 repo.__class__ = simplestorerepo
644
644
645 def featuresetup(ui, supported):
645 def featuresetup(ui, supported):
646 supported.add(REQUIREMENT)
646 supported.add(REQUIREMENT)
647
647
648 def newreporequirements(orig, ui, createopts):
648 def newreporequirements(orig, ui, createopts):
649 """Modifies default requirements for new repos to use the simple store."""
649 """Modifies default requirements for new repos to use the simple store."""
650 requirements = orig(ui, createopts)
650 requirements = orig(ui, createopts)
651
651
652 # These requirements are only used to affect creation of the store
652 # These requirements are only used to affect creation of the store
653 # object. We have our own store. So we can remove them.
653 # object. We have our own store. So we can remove them.
654 # TODO do this once we feel like taking the test hit.
654 # TODO do this once we feel like taking the test hit.
655 #if 'fncache' in requirements:
655 #if 'fncache' in requirements:
656 # requirements.remove('fncache')
656 # requirements.remove('fncache')
657 #if 'dotencode' in requirements:
657 #if 'dotencode' in requirements:
658 # requirements.remove('dotencode')
658 # requirements.remove('dotencode')
659
659
660 requirements.add(REQUIREMENT)
660 requirements.add(REQUIREMENT)
661
661
662 return requirements
662 return requirements
663
663
664 def makestore(orig, requirements, path, vfstype):
664 def makestore(orig, requirements, path, vfstype):
665 if REQUIREMENT not in requirements:
665 if REQUIREMENT not in requirements:
666 return orig(requirements, path, vfstype)
666 return orig(requirements, path, vfstype)
667
667
668 return simplestore(path, vfstype)
668 return simplestore(path, vfstype)
669
669
670 def verifierinit(orig, self, *args, **kwargs):
670 def verifierinit(orig, self, *args, **kwargs):
671 orig(self, *args, **kwargs)
671 orig(self, *args, **kwargs)
672
672
673 # We don't care that files in the store don't align with what is
673 # We don't care that files in the store don't align with what is
674 # advertised. So suppress these warnings.
674 # advertised. So suppress these warnings.
675 self.warnorphanstorefiles = False
675 self.warnorphanstorefiles = False
676
676
677 def extsetup(ui):
677 def extsetup(ui):
678 localrepo.featuresetupfuncs.add(featuresetup)
678 localrepo.featuresetupfuncs.add(featuresetup)
679
679
680 extensions.wrapfunction(localrepo, 'newreporequirements',
680 extensions.wrapfunction(localrepo, 'newreporequirements',
681 newreporequirements)
681 newreporequirements)
682 extensions.wrapfunction(localrepo, 'makestore', makestore)
682 extensions.wrapfunction(localrepo, 'makestore', makestore)
683 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
683 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now