##// END OF EJS Templates
revlog: move the_revisioncache on the inner object...
marmoute -
r51989:8ec2de9c default
parent child Browse files
Show More
@@ -1,754 +1,757 b''
1 # bundlerepo.py - repository class for viewing uncompressed bundles
1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 #
2 #
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Repository class for viewing uncompressed bundles.
8 """Repository class for viewing uncompressed bundles.
9
9
10 This provides a read-only repository interface to bundles as if they
10 This provides a read-only repository interface to bundles as if they
11 were part of the actual repository.
11 were part of the actual repository.
12 """
12 """
13
13
14
14
15 import contextlib
15 import contextlib
16 import os
16 import os
17 import shutil
17 import shutil
18
18
19 from .i18n import _
19 from .i18n import _
20 from .node import (
20 from .node import (
21 hex,
21 hex,
22 nullrev,
22 nullrev,
23 )
23 )
24
24
25 from . import (
25 from . import (
26 bundle2,
26 bundle2,
27 changegroup,
27 changegroup,
28 changelog,
28 changelog,
29 cmdutil,
29 cmdutil,
30 discovery,
30 discovery,
31 encoding,
31 encoding,
32 error,
32 error,
33 exchange,
33 exchange,
34 filelog,
34 filelog,
35 localrepo,
35 localrepo,
36 manifest,
36 manifest,
37 mdiff,
37 mdiff,
38 pathutil,
38 pathutil,
39 phases,
39 phases,
40 pycompat,
40 pycompat,
41 revlog,
41 revlog,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 vfs as vfsmod,
44 vfs as vfsmod,
45 )
45 )
46 from .utils import (
46 from .utils import (
47 urlutil,
47 urlutil,
48 )
48 )
49
49
50 from .revlogutils import (
50 from .revlogutils import (
51 constants as revlog_constants,
51 constants as revlog_constants,
52 )
52 )
53
53
54
54
55 class bundlerevlog(revlog.revlog):
55 class bundlerevlog(revlog.revlog):
56 def __init__(self, opener, target, radix, cgunpacker, linkmapper):
56 def __init__(self, opener, target, radix, cgunpacker, linkmapper):
57 # How it works:
57 # How it works:
58 # To retrieve a revision, we need to know the offset of the revision in
58 # To retrieve a revision, we need to know the offset of the revision in
59 # the bundle (an unbundle object). We store this offset in the index
59 # the bundle (an unbundle object). We store this offset in the index
60 # (start). The base of the delta is stored in the base field.
60 # (start). The base of the delta is stored in the base field.
61 #
61 #
62 # To differentiate a rev in the bundle from a rev in the revlog, we
62 # To differentiate a rev in the bundle from a rev in the revlog, we
63 # check revision against repotiprev.
63 # check revision against repotiprev.
64 opener = vfsmod.readonlyvfs(opener)
64 opener = vfsmod.readonlyvfs(opener)
65 revlog.revlog.__init__(self, opener, target=target, radix=radix)
65 revlog.revlog.__init__(self, opener, target=target, radix=radix)
66 self.bundle = cgunpacker
66 self.bundle = cgunpacker
67 n = len(self)
67 n = len(self)
68 self.repotiprev = n - 1
68 self.repotiprev = n - 1
69 self.bundlerevs = set() # used by 'bundle()' revset expression
69 self.bundlerevs = set() # used by 'bundle()' revset expression
70 for deltadata in cgunpacker.deltaiter():
70 for deltadata in cgunpacker.deltaiter():
71 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
71 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
72
72
73 size = len(delta)
73 size = len(delta)
74 start = cgunpacker.tell() - size
74 start = cgunpacker.tell() - size
75
75
76 if self.index.has_node(node):
76 if self.index.has_node(node):
77 # this can happen if two branches make the same change
77 # this can happen if two branches make the same change
78 self.bundlerevs.add(self.index.rev(node))
78 self.bundlerevs.add(self.index.rev(node))
79 continue
79 continue
80 if cs == node:
80 if cs == node:
81 linkrev = nullrev
81 linkrev = nullrev
82 else:
82 else:
83 linkrev = linkmapper(cs)
83 linkrev = linkmapper(cs)
84
84
85 for p in (p1, p2):
85 for p in (p1, p2):
86 if not self.index.has_node(p):
86 if not self.index.has_node(p):
87 raise error.LookupError(
87 raise error.LookupError(
88 p, self.display_id, _(b"unknown parent")
88 p, self.display_id, _(b"unknown parent")
89 )
89 )
90
90
91 if not self.index.has_node(deltabase):
91 if not self.index.has_node(deltabase):
92 raise error.LookupError(
92 raise error.LookupError(
93 deltabase, self.display_id, _(b'unknown delta base')
93 deltabase, self.display_id, _(b'unknown delta base')
94 )
94 )
95
95
96 baserev = self.rev(deltabase)
96 baserev = self.rev(deltabase)
97 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
97 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
98 e = revlogutils.entry(
98 e = revlogutils.entry(
99 flags=flags,
99 flags=flags,
100 data_offset=start,
100 data_offset=start,
101 data_compressed_length=size,
101 data_compressed_length=size,
102 data_delta_base=baserev,
102 data_delta_base=baserev,
103 link_rev=linkrev,
103 link_rev=linkrev,
104 parent_rev_1=self.rev(p1),
104 parent_rev_1=self.rev(p1),
105 parent_rev_2=self.rev(p2),
105 parent_rev_2=self.rev(p2),
106 node_id=node,
106 node_id=node,
107 )
107 )
108 self.index.append(e)
108 self.index.append(e)
109 self.bundlerevs.add(n)
109 self.bundlerevs.add(n)
110 n += 1
110 n += 1
111
111
112 @contextlib.contextmanager
112 @contextlib.contextmanager
113 def reading(self):
113 def reading(self):
114 if self.repotiprev < 0:
114 if self.repotiprev < 0:
115 yield
115 yield
116 else:
116 else:
117 with super().reading() as x:
117 with super().reading() as x:
118 yield x
118 yield x
119
119
120 def _chunk(self, rev):
120 def _chunk(self, rev):
121 # Warning: in case of bundle, the diff is against what we stored as
121 # Warning: in case of bundle, the diff is against what we stored as
122 # delta base, not against rev - 1
122 # delta base, not against rev - 1
123 # XXX: could use some caching
123 # XXX: could use some caching
124 if rev <= self.repotiprev:
124 if rev <= self.repotiprev:
125 return revlog.revlog._chunk(self, rev)
125 return revlog.revlog._chunk(self, rev)
126 self.bundle.seek(self.start(rev))
126 self.bundle.seek(self.start(rev))
127 return self.bundle.read(self.length(rev))
127 return self.bundle.read(self.length(rev))
128
128
129 def revdiff(self, rev1, rev2):
129 def revdiff(self, rev1, rev2):
130 """return or calculate a delta between two revisions"""
130 """return or calculate a delta between two revisions"""
131 if rev1 > self.repotiprev and rev2 > self.repotiprev:
131 if rev1 > self.repotiprev and rev2 > self.repotiprev:
132 # hot path for bundle
132 # hot path for bundle
133 revb = self.index[rev2][3]
133 revb = self.index[rev2][3]
134 if revb == rev1:
134 if revb == rev1:
135 return self._chunk(rev2)
135 return self._chunk(rev2)
136 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
136 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
137 return revlog.revlog.revdiff(self, rev1, rev2)
137 return revlog.revlog.revdiff(self, rev1, rev2)
138
138
139 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
139 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
140
140
141 def _rawtext(self, node, rev):
141 def _rawtext(self, node, rev):
142 if rev is None:
142 if rev is None:
143 rev = self.rev(node)
143 rev = self.rev(node)
144 validated = False
144 validated = False
145 rawtext = None
145 rawtext = None
146 chain = []
146 chain = []
147 iterrev = rev
147 iterrev = rev
148 # reconstruct the revision if it is from a changegroup
148 # reconstruct the revision if it is from a changegroup
149 while iterrev > self.repotiprev:
149 while iterrev > self.repotiprev:
150 if self._revisioncache and self._revisioncache[1] == iterrev:
150 if (
151 rawtext = self._revisioncache[2]
151 self._inner._revisioncache
152 and self._inner._revisioncache[1] == iterrev
153 ):
154 rawtext = self._inner._revisioncache[2]
152 break
155 break
153 chain.append(iterrev)
156 chain.append(iterrev)
154 iterrev = self.index[iterrev][3]
157 iterrev = self.index[iterrev][3]
155 if iterrev == nullrev:
158 if iterrev == nullrev:
156 rawtext = b''
159 rawtext = b''
157 elif rawtext is None:
160 elif rawtext is None:
158 r = super(bundlerevlog, self)._rawtext(
161 r = super(bundlerevlog, self)._rawtext(
159 self.node(iterrev),
162 self.node(iterrev),
160 iterrev,
163 iterrev,
161 )
164 )
162 __, rawtext, validated = r
165 __, rawtext, validated = r
163 if chain:
166 if chain:
164 validated = False
167 validated = False
165 while chain:
168 while chain:
166 delta = self._chunk(chain.pop())
169 delta = self._chunk(chain.pop())
167 rawtext = mdiff.patches(rawtext, [delta])
170 rawtext = mdiff.patches(rawtext, [delta])
168 return rev, rawtext, validated
171 return rev, rawtext, validated
169
172
170 def addrevision(self, *args, **kwargs):
173 def addrevision(self, *args, **kwargs):
171 raise NotImplementedError
174 raise NotImplementedError
172
175
173 def addgroup(self, *args, **kwargs):
176 def addgroup(self, *args, **kwargs):
174 raise NotImplementedError
177 raise NotImplementedError
175
178
176 def strip(self, *args, **kwargs):
179 def strip(self, *args, **kwargs):
177 raise NotImplementedError
180 raise NotImplementedError
178
181
179 def checksize(self):
182 def checksize(self):
180 raise NotImplementedError
183 raise NotImplementedError
181
184
182
185
183 class bundlechangelog(bundlerevlog, changelog.changelog):
186 class bundlechangelog(bundlerevlog, changelog.changelog):
184 def __init__(self, opener, cgunpacker):
187 def __init__(self, opener, cgunpacker):
185 changelog.changelog.__init__(self, opener)
188 changelog.changelog.__init__(self, opener)
186 linkmapper = lambda x: x
189 linkmapper = lambda x: x
187 bundlerevlog.__init__(
190 bundlerevlog.__init__(
188 self,
191 self,
189 opener,
192 opener,
190 (revlog_constants.KIND_CHANGELOG, None),
193 (revlog_constants.KIND_CHANGELOG, None),
191 self.radix,
194 self.radix,
192 cgunpacker,
195 cgunpacker,
193 linkmapper,
196 linkmapper,
194 )
197 )
195
198
196
199
197 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
200 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
198 def __init__(
201 def __init__(
199 self,
202 self,
200 nodeconstants,
203 nodeconstants,
201 opener,
204 opener,
202 cgunpacker,
205 cgunpacker,
203 linkmapper,
206 linkmapper,
204 dirlogstarts=None,
207 dirlogstarts=None,
205 dir=b'',
208 dir=b'',
206 ):
209 ):
207 # XXX manifestrevlog is not actually a revlog , so mixing it with
210 # XXX manifestrevlog is not actually a revlog , so mixing it with
208 # bundlerevlog is not a good idea.
211 # bundlerevlog is not a good idea.
209 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
212 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
210 bundlerevlog.__init__(
213 bundlerevlog.__init__(
211 self,
214 self,
212 opener,
215 opener,
213 (revlog_constants.KIND_MANIFESTLOG, dir),
216 (revlog_constants.KIND_MANIFESTLOG, dir),
214 self._revlog.radix,
217 self._revlog.radix,
215 cgunpacker,
218 cgunpacker,
216 linkmapper,
219 linkmapper,
217 )
220 )
218 if dirlogstarts is None:
221 if dirlogstarts is None:
219 dirlogstarts = {}
222 dirlogstarts = {}
220 if self.bundle.version == b"03":
223 if self.bundle.version == b"03":
221 dirlogstarts = _getfilestarts(self.bundle)
224 dirlogstarts = _getfilestarts(self.bundle)
222 self._dirlogstarts = dirlogstarts
225 self._dirlogstarts = dirlogstarts
223 self._linkmapper = linkmapper
226 self._linkmapper = linkmapper
224
227
225 def dirlog(self, d):
228 def dirlog(self, d):
226 if d in self._dirlogstarts:
229 if d in self._dirlogstarts:
227 self.bundle.seek(self._dirlogstarts[d])
230 self.bundle.seek(self._dirlogstarts[d])
228 return bundlemanifest(
231 return bundlemanifest(
229 self.nodeconstants,
232 self.nodeconstants,
230 self.opener,
233 self.opener,
231 self.bundle,
234 self.bundle,
232 self._linkmapper,
235 self._linkmapper,
233 self._dirlogstarts,
236 self._dirlogstarts,
234 dir=d,
237 dir=d,
235 )
238 )
236 return super(bundlemanifest, self).dirlog(d)
239 return super(bundlemanifest, self).dirlog(d)
237
240
238
241
239 class bundlefilelog(filelog.filelog):
242 class bundlefilelog(filelog.filelog):
240 def __init__(self, opener, path, cgunpacker, linkmapper):
243 def __init__(self, opener, path, cgunpacker, linkmapper):
241 filelog.filelog.__init__(self, opener, path)
244 filelog.filelog.__init__(self, opener, path)
242 self._revlog = bundlerevlog(
245 self._revlog = bundlerevlog(
243 opener,
246 opener,
244 # XXX should use the unencoded path
247 # XXX should use the unencoded path
245 target=(revlog_constants.KIND_FILELOG, path),
248 target=(revlog_constants.KIND_FILELOG, path),
246 radix=self._revlog.radix,
249 radix=self._revlog.radix,
247 cgunpacker=cgunpacker,
250 cgunpacker=cgunpacker,
248 linkmapper=linkmapper,
251 linkmapper=linkmapper,
249 )
252 )
250
253
251
254
252 class bundlepeer(localrepo.localpeer):
255 class bundlepeer(localrepo.localpeer):
253 def canpush(self):
256 def canpush(self):
254 return False
257 return False
255
258
256
259
257 class bundlephasecache(phases.phasecache):
260 class bundlephasecache(phases.phasecache):
258 def __init__(self, *args, **kwargs):
261 def __init__(self, *args, **kwargs):
259 super(bundlephasecache, self).__init__(*args, **kwargs)
262 super(bundlephasecache, self).__init__(*args, **kwargs)
260 if hasattr(self, 'opener'):
263 if hasattr(self, 'opener'):
261 self.opener = vfsmod.readonlyvfs(self.opener)
264 self.opener = vfsmod.readonlyvfs(self.opener)
262
265
263 def write(self):
266 def write(self):
264 raise NotImplementedError
267 raise NotImplementedError
265
268
266 def _write(self, fp):
269 def _write(self, fp):
267 raise NotImplementedError
270 raise NotImplementedError
268
271
269 def _updateroots(self, phase, newroots, tr):
272 def _updateroots(self, phase, newroots, tr):
270 self.phaseroots[phase] = newroots
273 self.phaseroots[phase] = newroots
271 self.invalidate()
274 self.invalidate()
272 self.dirty = True
275 self.dirty = True
273
276
274
277
275 def _getfilestarts(cgunpacker):
278 def _getfilestarts(cgunpacker):
276 filespos = {}
279 filespos = {}
277 for chunkdata in iter(cgunpacker.filelogheader, {}):
280 for chunkdata in iter(cgunpacker.filelogheader, {}):
278 fname = chunkdata[b'filename']
281 fname = chunkdata[b'filename']
279 filespos[fname] = cgunpacker.tell()
282 filespos[fname] = cgunpacker.tell()
280 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
283 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
281 pass
284 pass
282 return filespos
285 return filespos
283
286
284
287
285 class bundlerepository:
288 class bundlerepository:
286 """A repository instance that is a union of a local repo and a bundle.
289 """A repository instance that is a union of a local repo and a bundle.
287
290
288 Instances represent a read-only repository composed of a local repository
291 Instances represent a read-only repository composed of a local repository
289 with the contents of a bundle file applied. The repository instance is
292 with the contents of a bundle file applied. The repository instance is
290 conceptually similar to the state of a repository after an
293 conceptually similar to the state of a repository after an
291 ``hg unbundle`` operation. However, the contents of the bundle are never
294 ``hg unbundle`` operation. However, the contents of the bundle are never
292 applied to the actual base repository.
295 applied to the actual base repository.
293
296
294 Instances constructed directly are not usable as repository objects.
297 Instances constructed directly are not usable as repository objects.
295 Use instance() or makebundlerepository() to create instances.
298 Use instance() or makebundlerepository() to create instances.
296 """
299 """
297
300
298 def __init__(self, bundlepath, url, tempparent):
301 def __init__(self, bundlepath, url, tempparent):
299 self._tempparent = tempparent
302 self._tempparent = tempparent
300 self._url = url
303 self._url = url
301
304
302 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
305 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
303
306
304 # dict with the mapping 'filename' -> position in the changegroup.
307 # dict with the mapping 'filename' -> position in the changegroup.
305 self._cgfilespos = {}
308 self._cgfilespos = {}
306 self._bundlefile = None
309 self._bundlefile = None
307 self._cgunpacker = None
310 self._cgunpacker = None
308 self.tempfile = None
311 self.tempfile = None
309 f = util.posixfile(bundlepath, b"rb")
312 f = util.posixfile(bundlepath, b"rb")
310 bundle = exchange.readbundle(self.ui, f, bundlepath)
313 bundle = exchange.readbundle(self.ui, f, bundlepath)
311
314
312 if isinstance(bundle, bundle2.unbundle20):
315 if isinstance(bundle, bundle2.unbundle20):
313 self._bundlefile = bundle
316 self._bundlefile = bundle
314
317
315 cgpart = None
318 cgpart = None
316 for part in bundle.iterparts(seekable=True):
319 for part in bundle.iterparts(seekable=True):
317 if part.type == b'phase-heads':
320 if part.type == b'phase-heads':
318 self._handle_bundle2_phase_part(bundle, part)
321 self._handle_bundle2_phase_part(bundle, part)
319 elif part.type == b'changegroup':
322 elif part.type == b'changegroup':
320 if cgpart:
323 if cgpart:
321 raise NotImplementedError(
324 raise NotImplementedError(
322 b"can't process multiple changegroups"
325 b"can't process multiple changegroups"
323 )
326 )
324 cgpart = part
327 cgpart = part
325 self._handle_bundle2_cg_part(bundle, part)
328 self._handle_bundle2_cg_part(bundle, part)
326
329
327 if not cgpart:
330 if not cgpart:
328 raise error.Abort(_(b"No changegroups found"))
331 raise error.Abort(_(b"No changegroups found"))
329
332
330 # This is required to placate a later consumer, which expects
333 # This is required to placate a later consumer, which expects
331 # the payload offset to be at the beginning of the changegroup.
334 # the payload offset to be at the beginning of the changegroup.
332 # We need to do this after the iterparts() generator advances
335 # We need to do this after the iterparts() generator advances
333 # because iterparts() will seek to end of payload after the
336 # because iterparts() will seek to end of payload after the
334 # generator returns control to iterparts().
337 # generator returns control to iterparts().
335 cgpart.seek(0, os.SEEK_SET)
338 cgpart.seek(0, os.SEEK_SET)
336
339
337 elif isinstance(bundle, changegroup.cg1unpacker):
340 elif isinstance(bundle, changegroup.cg1unpacker):
338 self._handle_bundle1(bundle, bundlepath)
341 self._handle_bundle1(bundle, bundlepath)
339 else:
342 else:
340 raise error.Abort(
343 raise error.Abort(
341 _(b'bundle type %r cannot be read') % type(bundle)
344 _(b'bundle type %r cannot be read') % type(bundle)
342 )
345 )
343
346
344 def _handle_bundle1(self, bundle, bundlepath):
347 def _handle_bundle1(self, bundle, bundlepath):
345 if bundle.compressed():
348 if bundle.compressed():
346 f = self._writetempbundle(bundle.read, b'.hg10un', header=b'HG10UN')
349 f = self._writetempbundle(bundle.read, b'.hg10un', header=b'HG10UN')
347 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
350 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
348
351
349 self._bundlefile = bundle
352 self._bundlefile = bundle
350 self._cgunpacker = bundle
353 self._cgunpacker = bundle
351
354
352 self.firstnewrev = self.changelog.repotiprev + 1
355 self.firstnewrev = self.changelog.repotiprev + 1
353 phases.retractboundary(
356 phases.retractboundary(
354 self,
357 self,
355 None,
358 None,
356 phases.draft,
359 phases.draft,
357 [ctx.node() for ctx in self[self.firstnewrev :]],
360 [ctx.node() for ctx in self[self.firstnewrev :]],
358 )
361 )
359
362
360 def _handle_bundle2_cg_part(self, bundle, part):
363 def _handle_bundle2_cg_part(self, bundle, part):
361 assert part.type == b'changegroup'
364 assert part.type == b'changegroup'
362 cgstream = part
365 cgstream = part
363 targetphase = part.params.get(b'targetphase')
366 targetphase = part.params.get(b'targetphase')
364 try:
367 try:
365 targetphase = int(targetphase)
368 targetphase = int(targetphase)
366 except TypeError:
369 except TypeError:
367 pass
370 pass
368 if targetphase is None:
371 if targetphase is None:
369 targetphase = phases.draft
372 targetphase = phases.draft
370 if targetphase not in phases.allphases:
373 if targetphase not in phases.allphases:
371 m = _(b'unsupported targetphase: %d')
374 m = _(b'unsupported targetphase: %d')
372 m %= targetphase
375 m %= targetphase
373 raise error.Abort(m)
376 raise error.Abort(m)
374 version = part.params.get(b'version', b'01')
377 version = part.params.get(b'version', b'01')
375 legalcgvers = changegroup.supportedincomingversions(self)
378 legalcgvers = changegroup.supportedincomingversions(self)
376 if version not in legalcgvers:
379 if version not in legalcgvers:
377 msg = _(b'Unsupported changegroup version: %s')
380 msg = _(b'Unsupported changegroup version: %s')
378 raise error.Abort(msg % version)
381 raise error.Abort(msg % version)
379 if bundle.compressed():
382 if bundle.compressed():
380 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
383 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
381
384
382 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
385 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
383
386
384 self.firstnewrev = self.changelog.repotiprev + 1
387 self.firstnewrev = self.changelog.repotiprev + 1
385 phases.retractboundary(
388 phases.retractboundary(
386 self,
389 self,
387 None,
390 None,
388 targetphase,
391 targetphase,
389 [ctx.node() for ctx in self[self.firstnewrev :]],
392 [ctx.node() for ctx in self[self.firstnewrev :]],
390 )
393 )
391
394
392 def _handle_bundle2_phase_part(self, bundle, part):
395 def _handle_bundle2_phase_part(self, bundle, part):
393 assert part.type == b'phase-heads'
396 assert part.type == b'phase-heads'
394
397
395 unfi = self.unfiltered()
398 unfi = self.unfiltered()
396 headsbyphase = phases.binarydecode(part)
399 headsbyphase = phases.binarydecode(part)
397 phases.updatephases(unfi, lambda: None, headsbyphase)
400 phases.updatephases(unfi, lambda: None, headsbyphase)
398
401
399 def _writetempbundle(self, readfn, suffix, header=b''):
402 def _writetempbundle(self, readfn, suffix, header=b''):
400 """Write a temporary file to disk"""
403 """Write a temporary file to disk"""
401 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
404 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
402 self.tempfile = temp
405 self.tempfile = temp
403
406
404 with os.fdopen(fdtemp, 'wb') as fptemp:
407 with os.fdopen(fdtemp, 'wb') as fptemp:
405 fptemp.write(header)
408 fptemp.write(header)
406 while True:
409 while True:
407 chunk = readfn(2 ** 18)
410 chunk = readfn(2 ** 18)
408 if not chunk:
411 if not chunk:
409 break
412 break
410 fptemp.write(chunk)
413 fptemp.write(chunk)
411
414
412 return self.vfs.open(self.tempfile, mode=b"rb")
415 return self.vfs.open(self.tempfile, mode=b"rb")
413
416
414 @localrepo.unfilteredpropertycache
417 @localrepo.unfilteredpropertycache
415 def _phasecache(self):
418 def _phasecache(self):
416 return bundlephasecache(self, self._phasedefaults)
419 return bundlephasecache(self, self._phasedefaults)
417
420
418 @localrepo.unfilteredpropertycache
421 @localrepo.unfilteredpropertycache
419 def changelog(self):
422 def changelog(self):
420 # consume the header if it exists
423 # consume the header if it exists
421 self._cgunpacker.changelogheader()
424 self._cgunpacker.changelogheader()
422 c = bundlechangelog(self.svfs, self._cgunpacker)
425 c = bundlechangelog(self.svfs, self._cgunpacker)
423 self.manstart = self._cgunpacker.tell()
426 self.manstart = self._cgunpacker.tell()
424 return c
427 return c
425
428
426 def _refreshchangelog(self):
429 def _refreshchangelog(self):
427 # changelog for bundle repo are not filecache, this method is not
430 # changelog for bundle repo are not filecache, this method is not
428 # applicable.
431 # applicable.
429 pass
432 pass
430
433
431 @localrepo.unfilteredpropertycache
434 @localrepo.unfilteredpropertycache
432 def manifestlog(self):
435 def manifestlog(self):
433 self._cgunpacker.seek(self.manstart)
436 self._cgunpacker.seek(self.manstart)
434 # consume the header if it exists
437 # consume the header if it exists
435 self._cgunpacker.manifestheader()
438 self._cgunpacker.manifestheader()
436 linkmapper = self.unfiltered().changelog.rev
439 linkmapper = self.unfiltered().changelog.rev
437 rootstore = bundlemanifest(
440 rootstore = bundlemanifest(
438 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
441 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
439 )
442 )
440 self.filestart = self._cgunpacker.tell()
443 self.filestart = self._cgunpacker.tell()
441
444
442 return manifest.manifestlog(
445 return manifest.manifestlog(
443 self.svfs, self, rootstore, self.narrowmatch()
446 self.svfs, self, rootstore, self.narrowmatch()
444 )
447 )
445
448
446 def _consumemanifest(self):
449 def _consumemanifest(self):
447 """Consumes the manifest portion of the bundle, setting filestart so the
450 """Consumes the manifest portion of the bundle, setting filestart so the
448 file portion can be read."""
451 file portion can be read."""
449 self._cgunpacker.seek(self.manstart)
452 self._cgunpacker.seek(self.manstart)
450 self._cgunpacker.manifestheader()
453 self._cgunpacker.manifestheader()
451 for delta in self._cgunpacker.deltaiter():
454 for delta in self._cgunpacker.deltaiter():
452 pass
455 pass
453 self.filestart = self._cgunpacker.tell()
456 self.filestart = self._cgunpacker.tell()
454
457
455 @localrepo.unfilteredpropertycache
458 @localrepo.unfilteredpropertycache
456 def manstart(self):
459 def manstart(self):
457 self.changelog
460 self.changelog
458 return self.manstart
461 return self.manstart
459
462
460 @localrepo.unfilteredpropertycache
463 @localrepo.unfilteredpropertycache
461 def filestart(self):
464 def filestart(self):
462 self.manifestlog
465 self.manifestlog
463
466
464 # If filestart was not set by self.manifestlog, that means the
467 # If filestart was not set by self.manifestlog, that means the
465 # manifestlog implementation did not consume the manifests from the
468 # manifestlog implementation did not consume the manifests from the
466 # changegroup (ex: it might be consuming trees from a separate bundle2
469 # changegroup (ex: it might be consuming trees from a separate bundle2
467 # part instead). So we need to manually consume it.
470 # part instead). So we need to manually consume it.
468 if 'filestart' not in self.__dict__:
471 if 'filestart' not in self.__dict__:
469 self._consumemanifest()
472 self._consumemanifest()
470
473
471 return self.filestart
474 return self.filestart
472
475
473 def url(self):
476 def url(self):
474 return self._url
477 return self._url
475
478
476 def file(self, f):
479 def file(self, f):
477 if not self._cgfilespos:
480 if not self._cgfilespos:
478 self._cgunpacker.seek(self.filestart)
481 self._cgunpacker.seek(self.filestart)
479 self._cgfilespos = _getfilestarts(self._cgunpacker)
482 self._cgfilespos = _getfilestarts(self._cgunpacker)
480
483
481 if f in self._cgfilespos:
484 if f in self._cgfilespos:
482 self._cgunpacker.seek(self._cgfilespos[f])
485 self._cgunpacker.seek(self._cgfilespos[f])
483 linkmapper = self.unfiltered().changelog.rev
486 linkmapper = self.unfiltered().changelog.rev
484 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
487 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
485 else:
488 else:
486 return super(bundlerepository, self).file(f)
489 return super(bundlerepository, self).file(f)
487
490
488 def close(self):
491 def close(self):
489 """Close assigned bundle file immediately."""
492 """Close assigned bundle file immediately."""
490 self._bundlefile.close()
493 self._bundlefile.close()
491 if self.tempfile is not None:
494 if self.tempfile is not None:
492 self.vfs.unlink(self.tempfile)
495 self.vfs.unlink(self.tempfile)
493 if self._tempparent:
496 if self._tempparent:
494 shutil.rmtree(self._tempparent, True)
497 shutil.rmtree(self._tempparent, True)
495
498
496 def cancopy(self):
499 def cancopy(self):
497 return False
500 return False
498
501
499 def peer(self, path=None, remotehidden=False):
502 def peer(self, path=None, remotehidden=False):
500 return bundlepeer(self, path=path, remotehidden=remotehidden)
503 return bundlepeer(self, path=path, remotehidden=remotehidden)
501
504
502 def getcwd(self):
505 def getcwd(self):
503 return encoding.getcwd() # always outside the repo
506 return encoding.getcwd() # always outside the repo
504
507
505 # Check if parents exist in localrepo before setting
508 # Check if parents exist in localrepo before setting
506 def setparents(self, p1, p2=None):
509 def setparents(self, p1, p2=None):
507 if p2 is None:
510 if p2 is None:
508 p2 = self.nullid
511 p2 = self.nullid
509 p1rev = self.changelog.rev(p1)
512 p1rev = self.changelog.rev(p1)
510 p2rev = self.changelog.rev(p2)
513 p2rev = self.changelog.rev(p2)
511 msg = _(b"setting parent to node %s that only exists in the bundle\n")
514 msg = _(b"setting parent to node %s that only exists in the bundle\n")
512 if self.changelog.repotiprev < p1rev:
515 if self.changelog.repotiprev < p1rev:
513 self.ui.warn(msg % hex(p1))
516 self.ui.warn(msg % hex(p1))
514 if self.changelog.repotiprev < p2rev:
517 if self.changelog.repotiprev < p2rev:
515 self.ui.warn(msg % hex(p2))
518 self.ui.warn(msg % hex(p2))
516 return super(bundlerepository, self).setparents(p1, p2)
519 return super(bundlerepository, self).setparents(p1, p2)
517
520
518
521
519 def instance(ui, path, create, intents=None, createopts=None):
522 def instance(ui, path, create, intents=None, createopts=None):
520 if create:
523 if create:
521 raise error.Abort(_(b'cannot create new bundle repository'))
524 raise error.Abort(_(b'cannot create new bundle repository'))
522 # internal config: bundle.mainreporoot
525 # internal config: bundle.mainreporoot
523 parentpath = ui.config(b"bundle", b"mainreporoot")
526 parentpath = ui.config(b"bundle", b"mainreporoot")
524 if not parentpath:
527 if not parentpath:
525 # try to find the correct path to the working directory repo
528 # try to find the correct path to the working directory repo
526 parentpath = cmdutil.findrepo(encoding.getcwd())
529 parentpath = cmdutil.findrepo(encoding.getcwd())
527 if parentpath is None:
530 if parentpath is None:
528 parentpath = b''
531 parentpath = b''
529 if parentpath:
532 if parentpath:
530 # Try to make the full path relative so we get a nice, short URL.
533 # Try to make the full path relative so we get a nice, short URL.
531 # In particular, we don't want temp dir names in test outputs.
534 # In particular, we don't want temp dir names in test outputs.
532 cwd = encoding.getcwd()
535 cwd = encoding.getcwd()
533 if parentpath == cwd:
536 if parentpath == cwd:
534 parentpath = b''
537 parentpath = b''
535 else:
538 else:
536 cwd = pathutil.normasprefix(cwd)
539 cwd = pathutil.normasprefix(cwd)
537 if parentpath.startswith(cwd):
540 if parentpath.startswith(cwd):
538 parentpath = parentpath[len(cwd) :]
541 parentpath = parentpath[len(cwd) :]
539 u = urlutil.url(path)
542 u = urlutil.url(path)
540 path = u.localpath()
543 path = u.localpath()
541 if u.scheme == b'bundle':
544 if u.scheme == b'bundle':
542 s = path.split(b"+", 1)
545 s = path.split(b"+", 1)
543 if len(s) == 1:
546 if len(s) == 1:
544 repopath, bundlename = parentpath, s[0]
547 repopath, bundlename = parentpath, s[0]
545 else:
548 else:
546 repopath, bundlename = s
549 repopath, bundlename = s
547 else:
550 else:
548 repopath, bundlename = parentpath, path
551 repopath, bundlename = parentpath, path
549
552
550 return makebundlerepository(ui, repopath, bundlename)
553 return makebundlerepository(ui, repopath, bundlename)
551
554
552
555
553 def makebundlerepository(ui, repopath, bundlepath):
556 def makebundlerepository(ui, repopath, bundlepath):
554 """Make a bundle repository object based on repo and bundle paths."""
557 """Make a bundle repository object based on repo and bundle paths."""
555 if repopath:
558 if repopath:
556 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
559 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
557 else:
560 else:
558 url = b'bundle:%s' % bundlepath
561 url = b'bundle:%s' % bundlepath
559
562
560 # Because we can't make any guarantees about the type of the base
563 # Because we can't make any guarantees about the type of the base
561 # repository, we can't have a static class representing the bundle
564 # repository, we can't have a static class representing the bundle
562 # repository. We also can't make any guarantees about how to even
565 # repository. We also can't make any guarantees about how to even
563 # call the base repository's constructor!
566 # call the base repository's constructor!
564 #
567 #
565 # So, our strategy is to go through ``localrepo.instance()`` to construct
568 # So, our strategy is to go through ``localrepo.instance()`` to construct
566 # a repo instance. Then, we dynamically create a new type derived from
569 # a repo instance. Then, we dynamically create a new type derived from
567 # both it and our ``bundlerepository`` class which overrides some
570 # both it and our ``bundlerepository`` class which overrides some
568 # functionality. We then change the type of the constructed repository
571 # functionality. We then change the type of the constructed repository
569 # to this new type and initialize the bundle-specific bits of it.
572 # to this new type and initialize the bundle-specific bits of it.
570
573
571 try:
574 try:
572 repo = localrepo.instance(ui, repopath, create=False)
575 repo = localrepo.instance(ui, repopath, create=False)
573 tempparent = None
576 tempparent = None
574 except error.RequirementError:
577 except error.RequirementError:
575 raise # no fallback if the backing repo is unsupported
578 raise # no fallback if the backing repo is unsupported
576 except error.RepoError:
579 except error.RepoError:
577 tempparent = pycompat.mkdtemp()
580 tempparent = pycompat.mkdtemp()
578 try:
581 try:
579 repo = localrepo.instance(ui, tempparent, create=True)
582 repo = localrepo.instance(ui, tempparent, create=True)
580 except Exception:
583 except Exception:
581 shutil.rmtree(tempparent)
584 shutil.rmtree(tempparent)
582 raise
585 raise
583
586
584 class derivedbundlerepository(bundlerepository, repo.__class__):
587 class derivedbundlerepository(bundlerepository, repo.__class__):
585 pass
588 pass
586
589
587 repo.__class__ = derivedbundlerepository
590 repo.__class__ = derivedbundlerepository
588 bundlerepository.__init__(repo, bundlepath, url, tempparent)
591 bundlerepository.__init__(repo, bundlepath, url, tempparent)
589
592
590 return repo
593 return repo
591
594
592
595
593 class bundletransactionmanager:
596 class bundletransactionmanager:
594 def transaction(self):
597 def transaction(self):
595 return None
598 return None
596
599
597 def close(self):
600 def close(self):
598 raise NotImplementedError
601 raise NotImplementedError
599
602
600 def release(self):
603 def release(self):
601 raise NotImplementedError
604 raise NotImplementedError
602
605
603
606
604 def getremotechanges(
607 def getremotechanges(
605 ui, repo, peer, onlyheads=None, bundlename=None, force=False
608 ui, repo, peer, onlyheads=None, bundlename=None, force=False
606 ):
609 ):
607 """obtains a bundle of changes incoming from peer
610 """obtains a bundle of changes incoming from peer
608
611
609 "onlyheads" restricts the returned changes to those reachable from the
612 "onlyheads" restricts the returned changes to those reachable from the
610 specified heads.
613 specified heads.
611 "bundlename", if given, stores the bundle to this file path permanently;
614 "bundlename", if given, stores the bundle to this file path permanently;
612 otherwise it's stored to a temp file and gets deleted again when you call
615 otherwise it's stored to a temp file and gets deleted again when you call
613 the returned "cleanupfn".
616 the returned "cleanupfn".
614 "force" indicates whether to proceed on unrelated repos.
617 "force" indicates whether to proceed on unrelated repos.
615
618
616 Returns a tuple (local, csets, cleanupfn):
619 Returns a tuple (local, csets, cleanupfn):
617
620
618 "local" is a local repo from which to obtain the actual incoming
621 "local" is a local repo from which to obtain the actual incoming
619 changesets; it is a bundlerepo for the obtained bundle when the
622 changesets; it is a bundlerepo for the obtained bundle when the
620 original "peer" is remote.
623 original "peer" is remote.
621 "csets" lists the incoming changeset node ids.
624 "csets" lists the incoming changeset node ids.
622 "cleanupfn" must be called without arguments when you're done processing
625 "cleanupfn" must be called without arguments when you're done processing
623 the changes; it closes both the original "peer" and the one returned
626 the changes; it closes both the original "peer" and the one returned
624 here.
627 here.
625 """
628 """
626 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
629 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
627 common, incoming, rheads = tmp
630 common, incoming, rheads = tmp
628 if not incoming:
631 if not incoming:
629 try:
632 try:
630 if bundlename:
633 if bundlename:
631 os.unlink(bundlename)
634 os.unlink(bundlename)
632 except OSError:
635 except OSError:
633 pass
636 pass
634 return repo, [], peer.close
637 return repo, [], peer.close
635
638
636 commonset = set(common)
639 commonset = set(common)
637 rheads = [x for x in rheads if x not in commonset]
640 rheads = [x for x in rheads if x not in commonset]
638
641
639 bundle = None
642 bundle = None
640 bundlerepo = None
643 bundlerepo = None
641 localrepo = peer.local()
644 localrepo = peer.local()
642 if bundlename or not localrepo:
645 if bundlename or not localrepo:
643 # create a bundle (uncompressed if peer repo is not local)
646 # create a bundle (uncompressed if peer repo is not local)
644
647
645 # developer config: devel.legacy.exchange
648 # developer config: devel.legacy.exchange
646 legexc = ui.configlist(b'devel', b'legacy.exchange')
649 legexc = ui.configlist(b'devel', b'legacy.exchange')
647 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
650 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
648 canbundle2 = (
651 canbundle2 = (
649 not forcebundle1
652 not forcebundle1
650 and peer.capable(b'getbundle')
653 and peer.capable(b'getbundle')
651 and peer.capable(b'bundle2')
654 and peer.capable(b'bundle2')
652 )
655 )
653 if canbundle2:
656 if canbundle2:
654 with peer.commandexecutor() as e:
657 with peer.commandexecutor() as e:
655 b2 = e.callcommand(
658 b2 = e.callcommand(
656 b'getbundle',
659 b'getbundle',
657 {
660 {
658 b'source': b'incoming',
661 b'source': b'incoming',
659 b'common': common,
662 b'common': common,
660 b'heads': rheads,
663 b'heads': rheads,
661 b'bundlecaps': exchange.caps20to10(
664 b'bundlecaps': exchange.caps20to10(
662 repo, role=b'client'
665 repo, role=b'client'
663 ),
666 ),
664 b'cg': True,
667 b'cg': True,
665 },
668 },
666 ).result()
669 ).result()
667
670
668 fname = bundle = changegroup.writechunks(
671 fname = bundle = changegroup.writechunks(
669 ui, b2._forwardchunks(), bundlename
672 ui, b2._forwardchunks(), bundlename
670 )
673 )
671 else:
674 else:
672 if peer.capable(b'getbundle'):
675 if peer.capable(b'getbundle'):
673 with peer.commandexecutor() as e:
676 with peer.commandexecutor() as e:
674 cg = e.callcommand(
677 cg = e.callcommand(
675 b'getbundle',
678 b'getbundle',
676 {
679 {
677 b'source': b'incoming',
680 b'source': b'incoming',
678 b'common': common,
681 b'common': common,
679 b'heads': rheads,
682 b'heads': rheads,
680 },
683 },
681 ).result()
684 ).result()
682 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
685 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
683 # compat with older servers when pulling all remote heads
686 # compat with older servers when pulling all remote heads
684
687
685 with peer.commandexecutor() as e:
688 with peer.commandexecutor() as e:
686 cg = e.callcommand(
689 cg = e.callcommand(
687 b'changegroup',
690 b'changegroup',
688 {
691 {
689 b'nodes': incoming,
692 b'nodes': incoming,
690 b'source': b'incoming',
693 b'source': b'incoming',
691 },
694 },
692 ).result()
695 ).result()
693
696
694 rheads = None
697 rheads = None
695 else:
698 else:
696 with peer.commandexecutor() as e:
699 with peer.commandexecutor() as e:
697 cg = e.callcommand(
700 cg = e.callcommand(
698 b'changegroupsubset',
701 b'changegroupsubset',
699 {
702 {
700 b'bases': incoming,
703 b'bases': incoming,
701 b'heads': rheads,
704 b'heads': rheads,
702 b'source': b'incoming',
705 b'source': b'incoming',
703 },
706 },
704 ).result()
707 ).result()
705
708
706 if localrepo:
709 if localrepo:
707 bundletype = b"HG10BZ"
710 bundletype = b"HG10BZ"
708 else:
711 else:
709 bundletype = b"HG10UN"
712 bundletype = b"HG10UN"
710 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
713 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
711 # keep written bundle?
714 # keep written bundle?
712 if bundlename:
715 if bundlename:
713 bundle = None
716 bundle = None
714 if not localrepo:
717 if not localrepo:
715 # use the created uncompressed bundlerepo
718 # use the created uncompressed bundlerepo
716 localrepo = bundlerepo = makebundlerepository(
719 localrepo = bundlerepo = makebundlerepository(
717 repo.baseui, repo.root, fname
720 repo.baseui, repo.root, fname
718 )
721 )
719
722
720 # this repo contains local and peer now, so filter out local again
723 # this repo contains local and peer now, so filter out local again
721 common = repo.heads()
724 common = repo.heads()
722 if localrepo:
725 if localrepo:
723 # Part of common may be remotely filtered
726 # Part of common may be remotely filtered
724 # So use an unfiltered version
727 # So use an unfiltered version
725 # The discovery process probably need cleanup to avoid that
728 # The discovery process probably need cleanup to avoid that
726 localrepo = localrepo.unfiltered()
729 localrepo = localrepo.unfiltered()
727
730
728 csets = localrepo.changelog.findmissing(common, rheads)
731 csets = localrepo.changelog.findmissing(common, rheads)
729
732
730 if bundlerepo:
733 if bundlerepo:
731 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
734 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
732
735
733 with peer.commandexecutor() as e:
736 with peer.commandexecutor() as e:
734 remotephases = e.callcommand(
737 remotephases = e.callcommand(
735 b'listkeys',
738 b'listkeys',
736 {
739 {
737 b'namespace': b'phases',
740 b'namespace': b'phases',
738 },
741 },
739 ).result()
742 ).result()
740
743
741 pullop = exchange.pulloperation(
744 pullop = exchange.pulloperation(
742 bundlerepo, peer, path=None, heads=reponodes
745 bundlerepo, peer, path=None, heads=reponodes
743 )
746 )
744 pullop.trmanager = bundletransactionmanager()
747 pullop.trmanager = bundletransactionmanager()
745 exchange._pullapplyphases(pullop, remotephases)
748 exchange._pullapplyphases(pullop, remotephases)
746
749
747 def cleanup():
750 def cleanup():
748 if bundlerepo:
751 if bundlerepo:
749 bundlerepo.close()
752 bundlerepo.close()
750 if bundle:
753 if bundle:
751 os.unlink(bundle)
754 os.unlink(bundle)
752 peer.close()
755 peer.close()
753
756
754 return (localrepo, csets, cleanup)
757 return (localrepo, csets, cleanup)
@@ -1,3971 +1,3974 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
393
395
394 @property
396 @property
395 def index_file(self):
397 def index_file(self):
396 return self.__index_file
398 return self.__index_file
397
399
398 @index_file.setter
400 @index_file.setter
399 def index_file(self, new_index_file):
401 def index_file(self, new_index_file):
400 self.__index_file = new_index_file
402 self.__index_file = new_index_file
401 if self.inline:
403 if self.inline:
402 self._segmentfile.filename = new_index_file
404 self._segmentfile.filename = new_index_file
403
405
404 def __len__(self):
406 def __len__(self):
405 return len(self.index)
407 return len(self.index)
406
408
407 # Derived from index values.
409 # Derived from index values.
408
410
409 def start(self, rev):
411 def start(self, rev):
410 """the offset of the data chunk for this revision"""
412 """the offset of the data chunk for this revision"""
411 return int(self.index[rev][0] >> 16)
413 return int(self.index[rev][0] >> 16)
412
414
413 def length(self, rev):
415 def length(self, rev):
414 """the length of the data chunk for this revision"""
416 """the length of the data chunk for this revision"""
415 return self.index[rev][1]
417 return self.index[rev][1]
416
418
417 def end(self, rev):
419 def end(self, rev):
418 """the end of the data chunk for this revision"""
420 """the end of the data chunk for this revision"""
419 return self.start(rev) + self.length(rev)
421 return self.start(rev) + self.length(rev)
420
422
421 def deltaparent(self, rev):
423 def deltaparent(self, rev):
422 """return deltaparent of the given revision"""
424 """return deltaparent of the given revision"""
423 base = self.index[rev][3]
425 base = self.index[rev][3]
424 if base == rev:
426 if base == rev:
425 return nullrev
427 return nullrev
426 elif self.delta_config.general_delta:
428 elif self.delta_config.general_delta:
427 return base
429 return base
428 else:
430 else:
429 return rev - 1
431 return rev - 1
430
432
431 def issnapshot(self, rev):
433 def issnapshot(self, rev):
432 """tells whether rev is a snapshot"""
434 """tells whether rev is a snapshot"""
433 if not self.delta_config.sparse_revlog:
435 if not self.delta_config.sparse_revlog:
434 return self.deltaparent(rev) == nullrev
436 return self.deltaparent(rev) == nullrev
435 elif hasattr(self.index, 'issnapshot'):
437 elif hasattr(self.index, 'issnapshot'):
436 # directly assign the method to cache the testing and access
438 # directly assign the method to cache the testing and access
437 self.issnapshot = self.index.issnapshot
439 self.issnapshot = self.index.issnapshot
438 return self.issnapshot(rev)
440 return self.issnapshot(rev)
439 if rev == nullrev:
441 if rev == nullrev:
440 return True
442 return True
441 entry = self.index[rev]
443 entry = self.index[rev]
442 base = entry[3]
444 base = entry[3]
443 if base == rev:
445 if base == rev:
444 return True
446 return True
445 if base == nullrev:
447 if base == nullrev:
446 return True
448 return True
447 p1 = entry[5]
449 p1 = entry[5]
448 while self.length(p1) == 0:
450 while self.length(p1) == 0:
449 b = self.deltaparent(p1)
451 b = self.deltaparent(p1)
450 if b == p1:
452 if b == p1:
451 break
453 break
452 p1 = b
454 p1 = b
453 p2 = entry[6]
455 p2 = entry[6]
454 while self.length(p2) == 0:
456 while self.length(p2) == 0:
455 b = self.deltaparent(p2)
457 b = self.deltaparent(p2)
456 if b == p2:
458 if b == p2:
457 break
459 break
458 p2 = b
460 p2 = b
459 if base == p1 or base == p2:
461 if base == p1 or base == p2:
460 return False
462 return False
461 return self.issnapshot(base)
463 return self.issnapshot(base)
462
464
463 def _deltachain(self, rev, stoprev=None):
465 def _deltachain(self, rev, stoprev=None):
464 """Obtain the delta chain for a revision.
466 """Obtain the delta chain for a revision.
465
467
466 ``stoprev`` specifies a revision to stop at. If not specified, we
468 ``stoprev`` specifies a revision to stop at. If not specified, we
467 stop at the base of the chain.
469 stop at the base of the chain.
468
470
469 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
470 revs in ascending order and ``stopped`` is a bool indicating whether
472 revs in ascending order and ``stopped`` is a bool indicating whether
471 ``stoprev`` was hit.
473 ``stoprev`` was hit.
472 """
474 """
473 generaldelta = self.delta_config.general_delta
475 generaldelta = self.delta_config.general_delta
474 # Try C implementation.
476 # Try C implementation.
475 try:
477 try:
476 return self.index.deltachain(rev, stoprev, generaldelta)
478 return self.index.deltachain(rev, stoprev, generaldelta)
477 except AttributeError:
479 except AttributeError:
478 pass
480 pass
479
481
480 chain = []
482 chain = []
481
483
482 # Alias to prevent attribute lookup in tight loop.
484 # Alias to prevent attribute lookup in tight loop.
483 index = self.index
485 index = self.index
484
486
485 iterrev = rev
487 iterrev = rev
486 e = index[iterrev]
488 e = index[iterrev]
487 while iterrev != e[3] and iterrev != stoprev:
489 while iterrev != e[3] and iterrev != stoprev:
488 chain.append(iterrev)
490 chain.append(iterrev)
489 if generaldelta:
491 if generaldelta:
490 iterrev = e[3]
492 iterrev = e[3]
491 else:
493 else:
492 iterrev -= 1
494 iterrev -= 1
493 e = index[iterrev]
495 e = index[iterrev]
494
496
495 if iterrev == stoprev:
497 if iterrev == stoprev:
496 stopped = True
498 stopped = True
497 else:
499 else:
498 chain.append(iterrev)
500 chain.append(iterrev)
499 stopped = False
501 stopped = False
500
502
501 chain.reverse()
503 chain.reverse()
502 return chain, stopped
504 return chain, stopped
503
505
504 @util.propertycache
506 @util.propertycache
505 def _compressor(self):
507 def _compressor(self):
506 engine = util.compengines[self.feature_config.compression_engine]
508 engine = util.compengines[self.feature_config.compression_engine]
507 return engine.revlogcompressor(
509 return engine.revlogcompressor(
508 self.feature_config.compression_engine_options
510 self.feature_config.compression_engine_options
509 )
511 )
510
512
511 @util.propertycache
513 @util.propertycache
512 def _decompressor(self):
514 def _decompressor(self):
513 """the default decompressor"""
515 """the default decompressor"""
514 if self._default_compression_header is None:
516 if self._default_compression_header is None:
515 return None
517 return None
516 t = self._default_compression_header
518 t = self._default_compression_header
517 c = self._get_decompressor(t)
519 c = self._get_decompressor(t)
518 return c.decompress
520 return c.decompress
519
521
520 def _get_decompressor(self, t):
522 def _get_decompressor(self, t):
521 try:
523 try:
522 compressor = self._decompressors[t]
524 compressor = self._decompressors[t]
523 except KeyError:
525 except KeyError:
524 try:
526 try:
525 engine = util.compengines.forrevlogheader(t)
527 engine = util.compengines.forrevlogheader(t)
526 compressor = engine.revlogcompressor(
528 compressor = engine.revlogcompressor(
527 self.feature_config.compression_engine_options
529 self.feature_config.compression_engine_options
528 )
530 )
529 self._decompressors[t] = compressor
531 self._decompressors[t] = compressor
530 except KeyError:
532 except KeyError:
531 raise error.RevlogError(
533 raise error.RevlogError(
532 _(b'unknown compression type %s') % binascii.hexlify(t)
534 _(b'unknown compression type %s') % binascii.hexlify(t)
533 )
535 )
534 return compressor
536 return compressor
535
537
536 def compress(self, data):
538 def compress(self, data):
537 """Generate a possibly-compressed representation of data."""
539 """Generate a possibly-compressed representation of data."""
538 if not data:
540 if not data:
539 return b'', data
541 return b'', data
540
542
541 compressed = self._compressor.compress(data)
543 compressed = self._compressor.compress(data)
542
544
543 if compressed:
545 if compressed:
544 # The revlog compressor added the header in the returned data.
546 # The revlog compressor added the header in the returned data.
545 return b'', compressed
547 return b'', compressed
546
548
547 if data[0:1] == b'\0':
549 if data[0:1] == b'\0':
548 return b'', data
550 return b'', data
549 return b'u', data
551 return b'u', data
550
552
551 def decompress(self, data):
553 def decompress(self, data):
552 """Decompress a revlog chunk.
554 """Decompress a revlog chunk.
553
555
554 The chunk is expected to begin with a header identifying the
556 The chunk is expected to begin with a header identifying the
555 format type so it can be routed to an appropriate decompressor.
557 format type so it can be routed to an appropriate decompressor.
556 """
558 """
557 if not data:
559 if not data:
558 return data
560 return data
559
561
560 # Revlogs are read much more frequently than they are written and many
562 # Revlogs are read much more frequently than they are written and many
561 # chunks only take microseconds to decompress, so performance is
563 # chunks only take microseconds to decompress, so performance is
562 # important here.
564 # important here.
563 #
565 #
564 # We can make a few assumptions about revlogs:
566 # We can make a few assumptions about revlogs:
565 #
567 #
566 # 1) the majority of chunks will be compressed (as opposed to inline
568 # 1) the majority of chunks will be compressed (as opposed to inline
567 # raw data).
569 # raw data).
568 # 2) decompressing *any* data will likely by at least 10x slower than
570 # 2) decompressing *any* data will likely by at least 10x slower than
569 # returning raw inline data.
571 # returning raw inline data.
570 # 3) we want to prioritize common and officially supported compression
572 # 3) we want to prioritize common and officially supported compression
571 # engines
573 # engines
572 #
574 #
573 # It follows that we want to optimize for "decompress compressed data
575 # It follows that we want to optimize for "decompress compressed data
574 # when encoded with common and officially supported compression engines"
576 # when encoded with common and officially supported compression engines"
575 # case over "raw data" and "data encoded by less common or non-official
577 # case over "raw data" and "data encoded by less common or non-official
576 # compression engines." That is why we have the inline lookup first
578 # compression engines." That is why we have the inline lookup first
577 # followed by the compengines lookup.
579 # followed by the compengines lookup.
578 #
580 #
579 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
580 # compressed chunks. And this matters for changelog and manifest reads.
582 # compressed chunks. And this matters for changelog and manifest reads.
581 t = data[0:1]
583 t = data[0:1]
582
584
583 if t == b'x':
585 if t == b'x':
584 try:
586 try:
585 return _zlibdecompress(data)
587 return _zlibdecompress(data)
586 except zlib.error as e:
588 except zlib.error as e:
587 raise error.RevlogError(
589 raise error.RevlogError(
588 _(b'revlog decompress error: %s')
590 _(b'revlog decompress error: %s')
589 % stringutil.forcebytestr(e)
591 % stringutil.forcebytestr(e)
590 )
592 )
591 # '\0' is more common than 'u' so it goes first.
593 # '\0' is more common than 'u' so it goes first.
592 elif t == b'\0':
594 elif t == b'\0':
593 return data
595 return data
594 elif t == b'u':
596 elif t == b'u':
595 return util.buffer(data, 1)
597 return util.buffer(data, 1)
596
598
597 compressor = self._get_decompressor(t)
599 compressor = self._get_decompressor(t)
598
600
599 return compressor.decompress(data)
601 return compressor.decompress(data)
600
602
601 @contextlib.contextmanager
603 @contextlib.contextmanager
602 def reading(self):
604 def reading(self):
603 """Context manager that keeps data and sidedata files open for reading"""
605 """Context manager that keeps data and sidedata files open for reading"""
604 if len(self.index) == 0:
606 if len(self.index) == 0:
605 yield # nothing to be read
607 yield # nothing to be read
606 else:
608 else:
607 with self._segmentfile.reading():
609 with self._segmentfile.reading():
608 with self._segmentfile_sidedata.reading():
610 with self._segmentfile_sidedata.reading():
609 yield
611 yield
610
612
611 @property
613 @property
612 def is_writing(self):
614 def is_writing(self):
613 """True is a writing context is open"""
615 """True is a writing context is open"""
614 return self._writinghandles is not None
616 return self._writinghandles is not None
615
617
616 @contextlib.contextmanager
618 @contextlib.contextmanager
617 def writing(self, transaction, data_end=None, sidedata_end=None):
619 def writing(self, transaction, data_end=None, sidedata_end=None):
618 """Open the revlog files for writing
620 """Open the revlog files for writing
619
621
620 Add content to a revlog should be done within such context.
622 Add content to a revlog should be done within such context.
621 """
623 """
622 if self.is_writing:
624 if self.is_writing:
623 yield
625 yield
624 else:
626 else:
625 ifh = dfh = sdfh = None
627 ifh = dfh = sdfh = None
626 try:
628 try:
627 r = len(self.index)
629 r = len(self.index)
628 # opening the data file.
630 # opening the data file.
629 dsize = 0
631 dsize = 0
630 if r:
632 if r:
631 dsize = self.end(r - 1)
633 dsize = self.end(r - 1)
632 dfh = None
634 dfh = None
633 if not self.inline:
635 if not self.inline:
634 try:
636 try:
635 dfh = self.opener(self.data_file, mode=b"r+")
637 dfh = self.opener(self.data_file, mode=b"r+")
636 if data_end is None:
638 if data_end is None:
637 dfh.seek(0, os.SEEK_END)
639 dfh.seek(0, os.SEEK_END)
638 else:
640 else:
639 dfh.seek(data_end, os.SEEK_SET)
641 dfh.seek(data_end, os.SEEK_SET)
640 except FileNotFoundError:
642 except FileNotFoundError:
641 dfh = self.opener(self.data_file, mode=b"w+")
643 dfh = self.opener(self.data_file, mode=b"w+")
642 transaction.add(self.data_file, dsize)
644 transaction.add(self.data_file, dsize)
643 if self.sidedata_file is not None:
645 if self.sidedata_file is not None:
644 assert sidedata_end is not None
646 assert sidedata_end is not None
645 # revlog-v2 does not inline, help Pytype
647 # revlog-v2 does not inline, help Pytype
646 assert dfh is not None
648 assert dfh is not None
647 try:
649 try:
648 sdfh = self.opener(self.sidedata_file, mode=b"r+")
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
649 dfh.seek(sidedata_end, os.SEEK_SET)
651 dfh.seek(sidedata_end, os.SEEK_SET)
650 except FileNotFoundError:
652 except FileNotFoundError:
651 sdfh = self.opener(self.sidedata_file, mode=b"w+")
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
652 transaction.add(self.sidedata_file, sidedata_end)
654 transaction.add(self.sidedata_file, sidedata_end)
653
655
654 # opening the index file.
656 # opening the index file.
655 isize = r * self.index.entry_size
657 isize = r * self.index.entry_size
656 ifh = self.__index_write_fp()
658 ifh = self.__index_write_fp()
657 if self.inline:
659 if self.inline:
658 transaction.add(self.index_file, dsize + isize)
660 transaction.add(self.index_file, dsize + isize)
659 else:
661 else:
660 transaction.add(self.index_file, isize)
662 transaction.add(self.index_file, isize)
661 # exposing all file handle for writing.
663 # exposing all file handle for writing.
662 self._writinghandles = (ifh, dfh, sdfh)
664 self._writinghandles = (ifh, dfh, sdfh)
663 self._segmentfile.writing_handle = ifh if self.inline else dfh
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
664 self._segmentfile_sidedata.writing_handle = sdfh
666 self._segmentfile_sidedata.writing_handle = sdfh
665 yield
667 yield
666 finally:
668 finally:
667 self._writinghandles = None
669 self._writinghandles = None
668 self._segmentfile.writing_handle = None
670 self._segmentfile.writing_handle = None
669 self._segmentfile_sidedata.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
670 if dfh is not None:
672 if dfh is not None:
671 dfh.close()
673 dfh.close()
672 if sdfh is not None:
674 if sdfh is not None:
673 sdfh.close()
675 sdfh.close()
674 # closing the index file last to avoid exposing referent to
676 # closing the index file last to avoid exposing referent to
675 # potential unflushed data content.
677 # potential unflushed data content.
676 if ifh is not None:
678 if ifh is not None:
677 ifh.close()
679 ifh.close()
678
680
679 def __index_write_fp(self, index_end=None):
681 def __index_write_fp(self, index_end=None):
680 """internal method to open the index file for writing
682 """internal method to open the index file for writing
681
683
682 You should not use this directly and use `_writing` instead
684 You should not use this directly and use `_writing` instead
683 """
685 """
684 try:
686 try:
685 f = self.opener(
687 f = self.opener(
686 self.index_file,
688 self.index_file,
687 mode=b"r+",
689 mode=b"r+",
688 checkambig=self.data_config.check_ambig,
690 checkambig=self.data_config.check_ambig,
689 )
691 )
690 if index_end is None:
692 if index_end is None:
691 f.seek(0, os.SEEK_END)
693 f.seek(0, os.SEEK_END)
692 else:
694 else:
693 f.seek(index_end, os.SEEK_SET)
695 f.seek(index_end, os.SEEK_SET)
694 return f
696 return f
695 except FileNotFoundError:
697 except FileNotFoundError:
696 return self.opener(
698 return self.opener(
697 self.index_file,
699 self.index_file,
698 mode=b"w+",
700 mode=b"w+",
699 checkambig=self.data_config.check_ambig,
701 checkambig=self.data_config.check_ambig,
700 )
702 )
701
703
702 def __index_new_fp(self):
704 def __index_new_fp(self):
703 """internal method to create a new index file for writing
705 """internal method to create a new index file for writing
704
706
705 You should not use this unless you are upgrading from inline revlog
707 You should not use this unless you are upgrading from inline revlog
706 """
708 """
707 return self.opener(
709 return self.opener(
708 self.index_file,
710 self.index_file,
709 mode=b"w",
711 mode=b"w",
710 checkambig=self.data_config.check_ambig,
712 checkambig=self.data_config.check_ambig,
711 atomictemp=True,
713 atomictemp=True,
712 )
714 )
713
715
714 def split_inline(self, tr, header, new_index_file_path=None):
716 def split_inline(self, tr, header, new_index_file_path=None):
715 """split the data of an inline revlog into an index and a data file"""
717 """split the data of an inline revlog into an index and a data file"""
716 existing_handles = False
718 existing_handles = False
717 if self._writinghandles is not None:
719 if self._writinghandles is not None:
718 existing_handles = True
720 existing_handles = True
719 fp = self._writinghandles[0]
721 fp = self._writinghandles[0]
720 fp.flush()
722 fp.flush()
721 fp.close()
723 fp.close()
722 # We can't use the cached file handle after close(). So prevent
724 # We can't use the cached file handle after close(). So prevent
723 # its usage.
725 # its usage.
724 self._writinghandles = None
726 self._writinghandles = None
725 self._segmentfile.writing_handle = None
727 self._segmentfile.writing_handle = None
726 # No need to deal with sidedata writing handle as it is only
728 # No need to deal with sidedata writing handle as it is only
727 # relevant with revlog-v2 which is never inline, not reaching
729 # relevant with revlog-v2 which is never inline, not reaching
728 # this code
730 # this code
729
731
730 new_dfh = self.opener(self.data_file, mode=b"w+")
732 new_dfh = self.opener(self.data_file, mode=b"w+")
731 new_dfh.truncate(0) # drop any potentially existing data
733 new_dfh.truncate(0) # drop any potentially existing data
732 try:
734 try:
733 with self.reading():
735 with self.reading():
734 for r in range(len(self.index)):
736 for r in range(len(self.index)):
735 new_dfh.write(self.get_segment_for_revs(r, r)[1])
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
736 new_dfh.flush()
738 new_dfh.flush()
737
739
738 if new_index_file_path is not None:
740 if new_index_file_path is not None:
739 self.index_file = new_index_file_path
741 self.index_file = new_index_file_path
740 with self.__index_new_fp() as fp:
742 with self.__index_new_fp() as fp:
741 self.inline = False
743 self.inline = False
742 for i in range(len(self.index)):
744 for i in range(len(self.index)):
743 e = self.index.entry_binary(i)
745 e = self.index.entry_binary(i)
744 if i == 0:
746 if i == 0:
745 packed_header = self.index.pack_header(header)
747 packed_header = self.index.pack_header(header)
746 e = packed_header + e
748 e = packed_header + e
747 fp.write(e)
749 fp.write(e)
748
750
749 # If we don't use side-write, the temp file replace the real
751 # If we don't use side-write, the temp file replace the real
750 # index when we exit the context manager
752 # index when we exit the context manager
751
753
752 self._segmentfile = randomaccessfile.randomaccessfile(
754 self._segmentfile = randomaccessfile.randomaccessfile(
753 self.opener,
755 self.opener,
754 self.data_file,
756 self.data_file,
755 self.data_config.chunk_cache_size,
757 self.data_config.chunk_cache_size,
756 )
758 )
757
759
758 if existing_handles:
760 if existing_handles:
759 # switched from inline to conventional reopen the index
761 # switched from inline to conventional reopen the index
760 ifh = self.__index_write_fp()
762 ifh = self.__index_write_fp()
761 self._writinghandles = (ifh, new_dfh, None)
763 self._writinghandles = (ifh, new_dfh, None)
762 self._segmentfile.writing_handle = new_dfh
764 self._segmentfile.writing_handle = new_dfh
763 new_dfh = None
765 new_dfh = None
764 # No need to deal with sidedata writing handle as it is only
766 # No need to deal with sidedata writing handle as it is only
765 # relevant with revlog-v2 which is never inline, not reaching
767 # relevant with revlog-v2 which is never inline, not reaching
766 # this code
768 # this code
767 finally:
769 finally:
768 if new_dfh is not None:
770 if new_dfh is not None:
769 new_dfh.close()
771 new_dfh.close()
770 return self.index_file
772 return self.index_file
771
773
772 def get_segment_for_revs(self, startrev, endrev):
774 def get_segment_for_revs(self, startrev, endrev):
773 """Obtain a segment of raw data corresponding to a range of revisions.
775 """Obtain a segment of raw data corresponding to a range of revisions.
774
776
775 Accepts the start and end revisions and an optional already-open
777 Accepts the start and end revisions and an optional already-open
776 file handle to be used for reading. If the file handle is read, its
778 file handle to be used for reading. If the file handle is read, its
777 seek position will not be preserved.
779 seek position will not be preserved.
778
780
779 Requests for data may be satisfied by a cache.
781 Requests for data may be satisfied by a cache.
780
782
781 Returns a 2-tuple of (offset, data) for the requested range of
783 Returns a 2-tuple of (offset, data) for the requested range of
782 revisions. Offset is the integer offset from the beginning of the
784 revisions. Offset is the integer offset from the beginning of the
783 revlog and data is a str or buffer of the raw byte data.
785 revlog and data is a str or buffer of the raw byte data.
784
786
785 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
786 to determine where each revision's data begins and ends.
788 to determine where each revision's data begins and ends.
787
789
788 API: we should consider making this a private part of the InnerRevlog
790 API: we should consider making this a private part of the InnerRevlog
789 at some point.
791 at some point.
790 """
792 """
791 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
792 # (functions are expensive).
794 # (functions are expensive).
793 index = self.index
795 index = self.index
794 istart = index[startrev]
796 istart = index[startrev]
795 start = int(istart[0] >> 16)
797 start = int(istart[0] >> 16)
796 if startrev == endrev:
798 if startrev == endrev:
797 end = start + istart[1]
799 end = start + istart[1]
798 else:
800 else:
799 iend = index[endrev]
801 iend = index[endrev]
800 end = int(iend[0] >> 16) + iend[1]
802 end = int(iend[0] >> 16) + iend[1]
801
803
802 if self.inline:
804 if self.inline:
803 start += (startrev + 1) * self.index.entry_size
805 start += (startrev + 1) * self.index.entry_size
804 end += (endrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
805 length = end - start
807 length = end - start
806
808
807 return start, self._segmentfile.read_chunk(start, length)
809 return start, self._segmentfile.read_chunk(start, length)
808
810
809 def _chunk(self, rev):
811 def _chunk(self, rev):
810 """Obtain a single decompressed chunk for a revision.
812 """Obtain a single decompressed chunk for a revision.
811
813
812 Accepts an integer revision and an optional already-open file handle
814 Accepts an integer revision and an optional already-open file handle
813 to be used for reading. If used, the seek position of the file will not
815 to be used for reading. If used, the seek position of the file will not
814 be preserved.
816 be preserved.
815
817
816 Returns a str holding uncompressed data for the requested revision.
818 Returns a str holding uncompressed data for the requested revision.
817 """
819 """
818 compression_mode = self.index[rev][10]
820 compression_mode = self.index[rev][10]
819 data = self.get_segment_for_revs(rev, rev)[1]
821 data = self.get_segment_for_revs(rev, rev)[1]
820 if compression_mode == COMP_MODE_PLAIN:
822 if compression_mode == COMP_MODE_PLAIN:
821 return data
823 return data
822 elif compression_mode == COMP_MODE_DEFAULT:
824 elif compression_mode == COMP_MODE_DEFAULT:
823 return self._decompressor(data)
825 return self._decompressor(data)
824 elif compression_mode == COMP_MODE_INLINE:
826 elif compression_mode == COMP_MODE_INLINE:
825 return self.decompress(data)
827 return self.decompress(data)
826 else:
828 else:
827 msg = b'unknown compression mode %d'
829 msg = b'unknown compression mode %d'
828 msg %= compression_mode
830 msg %= compression_mode
829 raise error.RevlogError(msg)
831 raise error.RevlogError(msg)
830
832
831 def _chunks(self, revs, targetsize=None):
833 def _chunks(self, revs, targetsize=None):
832 """Obtain decompressed chunks for the specified revisions.
834 """Obtain decompressed chunks for the specified revisions.
833
835
834 Accepts an iterable of numeric revisions that are assumed to be in
836 Accepts an iterable of numeric revisions that are assumed to be in
835 ascending order. Also accepts an optional already-open file handle
837 ascending order. Also accepts an optional already-open file handle
836 to be used for reading. If used, the seek position of the file will
838 to be used for reading. If used, the seek position of the file will
837 not be preserved.
839 not be preserved.
838
840
839 This function is similar to calling ``self._chunk()`` multiple times,
841 This function is similar to calling ``self._chunk()`` multiple times,
840 but is faster.
842 but is faster.
841
843
842 Returns a list with decompressed data for each requested revision.
844 Returns a list with decompressed data for each requested revision.
843 """
845 """
844 if not revs:
846 if not revs:
845 return []
847 return []
846 start = self.start
848 start = self.start
847 length = self.length
849 length = self.length
848 inline = self.inline
850 inline = self.inline
849 iosize = self.index.entry_size
851 iosize = self.index.entry_size
850 buffer = util.buffer
852 buffer = util.buffer
851
853
852 l = []
854 l = []
853 ladd = l.append
855 ladd = l.append
854
856
855 if not self.data_config.with_sparse_read:
857 if not self.data_config.with_sparse_read:
856 slicedchunks = (revs,)
858 slicedchunks = (revs,)
857 else:
859 else:
858 slicedchunks = deltautil.slicechunk(
860 slicedchunks = deltautil.slicechunk(
859 self,
861 self,
860 revs,
862 revs,
861 targetsize=targetsize,
863 targetsize=targetsize,
862 )
864 )
863
865
864 for revschunk in slicedchunks:
866 for revschunk in slicedchunks:
865 firstrev = revschunk[0]
867 firstrev = revschunk[0]
866 # Skip trailing revisions with empty diff
868 # Skip trailing revisions with empty diff
867 for lastrev in revschunk[::-1]:
869 for lastrev in revschunk[::-1]:
868 if length(lastrev) != 0:
870 if length(lastrev) != 0:
869 break
871 break
870
872
871 try:
873 try:
872 offset, data = self.get_segment_for_revs(firstrev, lastrev)
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
873 except OverflowError:
875 except OverflowError:
874 # issue4215 - we can't cache a run of chunks greater than
876 # issue4215 - we can't cache a run of chunks greater than
875 # 2G on Windows
877 # 2G on Windows
876 return [self._chunk(rev) for rev in revschunk]
878 return [self._chunk(rev) for rev in revschunk]
877
879
878 decomp = self.decompress
880 decomp = self.decompress
879 # self._decompressor might be None, but will not be used in that case
881 # self._decompressor might be None, but will not be used in that case
880 def_decomp = self._decompressor
882 def_decomp = self._decompressor
881 for rev in revschunk:
883 for rev in revschunk:
882 chunkstart = start(rev)
884 chunkstart = start(rev)
883 if inline:
885 if inline:
884 chunkstart += (rev + 1) * iosize
886 chunkstart += (rev + 1) * iosize
885 chunklength = length(rev)
887 chunklength = length(rev)
886 comp_mode = self.index[rev][10]
888 comp_mode = self.index[rev][10]
887 c = buffer(data, chunkstart - offset, chunklength)
889 c = buffer(data, chunkstart - offset, chunklength)
888 if comp_mode == COMP_MODE_PLAIN:
890 if comp_mode == COMP_MODE_PLAIN:
889 ladd(c)
891 ladd(c)
890 elif comp_mode == COMP_MODE_INLINE:
892 elif comp_mode == COMP_MODE_INLINE:
891 ladd(decomp(c))
893 ladd(decomp(c))
892 elif comp_mode == COMP_MODE_DEFAULT:
894 elif comp_mode == COMP_MODE_DEFAULT:
893 ladd(def_decomp(c))
895 ladd(def_decomp(c))
894 else:
896 else:
895 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
896 msg %= comp_mode
898 msg %= comp_mode
897 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
898
900
899 return l
901 return l
900
902
901
903
902 class revlog:
904 class revlog:
903 """
905 """
904 the underlying revision storage object
906 the underlying revision storage object
905
907
906 A revlog consists of two parts, an index and the revision data.
908 A revlog consists of two parts, an index and the revision data.
907
909
908 The index is a file with a fixed record size containing
910 The index is a file with a fixed record size containing
909 information on each revision, including its nodeid (hash), the
911 information on each revision, including its nodeid (hash), the
910 nodeids of its parents, the position and offset of its data within
912 nodeids of its parents, the position and offset of its data within
911 the data file, and the revision it's based on. Finally, each entry
913 the data file, and the revision it's based on. Finally, each entry
912 contains a linkrev entry that can serve as a pointer to external
914 contains a linkrev entry that can serve as a pointer to external
913 data.
915 data.
914
916
915 The revision data itself is a linear collection of data chunks.
917 The revision data itself is a linear collection of data chunks.
916 Each chunk represents a revision and is usually represented as a
918 Each chunk represents a revision and is usually represented as a
917 delta against the previous chunk. To bound lookup time, runs of
919 delta against the previous chunk. To bound lookup time, runs of
918 deltas are limited to about 2 times the length of the original
920 deltas are limited to about 2 times the length of the original
919 version data. This makes retrieval of a version proportional to
921 version data. This makes retrieval of a version proportional to
920 its size, or O(1) relative to the number of revisions.
922 its size, or O(1) relative to the number of revisions.
921
923
922 Both pieces of the revlog are written to in an append-only
924 Both pieces of the revlog are written to in an append-only
923 fashion, which means we never need to rewrite a file to insert or
925 fashion, which means we never need to rewrite a file to insert or
924 remove data, and can use some simple techniques to avoid the need
926 remove data, and can use some simple techniques to avoid the need
925 for locking while reading.
927 for locking while reading.
926
928
927 If checkambig, indexfile is opened with checkambig=True at
929 If checkambig, indexfile is opened with checkambig=True at
928 writing, to avoid file stat ambiguity.
930 writing, to avoid file stat ambiguity.
929
931
930 If mmaplargeindex is True, and an mmapindexthreshold is set, the
932 If mmaplargeindex is True, and an mmapindexthreshold is set, the
931 index will be mmapped rather than read if it is larger than the
933 index will be mmapped rather than read if it is larger than the
932 configured threshold.
934 configured threshold.
933
935
934 If censorable is True, the revlog can have censored revisions.
936 If censorable is True, the revlog can have censored revisions.
935
937
936 If `upperboundcomp` is not None, this is the expected maximal gain from
938 If `upperboundcomp` is not None, this is the expected maximal gain from
937 compression for the data content.
939 compression for the data content.
938
940
939 `concurrencychecker` is an optional function that receives 3 arguments: a
941 `concurrencychecker` is an optional function that receives 3 arguments: a
940 file handle, a filename, and an expected position. It should check whether
942 file handle, a filename, and an expected position. It should check whether
941 the current position in the file handle is valid, and log/warn/fail (by
943 the current position in the file handle is valid, and log/warn/fail (by
942 raising).
944 raising).
943
945
944 See mercurial/revlogutils/contants.py for details about the content of an
946 See mercurial/revlogutils/contants.py for details about the content of an
945 index entry.
947 index entry.
946 """
948 """
947
949
948 _flagserrorclass = error.RevlogError
950 _flagserrorclass = error.RevlogError
949
951
950 @staticmethod
952 @staticmethod
951 def is_inline_index(header_bytes):
953 def is_inline_index(header_bytes):
952 """Determine if a revlog is inline from the initial bytes of the index"""
954 """Determine if a revlog is inline from the initial bytes of the index"""
953 header = INDEX_HEADER.unpack(header_bytes)[0]
955 header = INDEX_HEADER.unpack(header_bytes)[0]
954
956
955 _format_flags = header & ~0xFFFF
957 _format_flags = header & ~0xFFFF
956 _format_version = header & 0xFFFF
958 _format_version = header & 0xFFFF
957
959
958 features = FEATURES_BY_VERSION[_format_version]
960 features = FEATURES_BY_VERSION[_format_version]
959 return features[b'inline'](_format_flags)
961 return features[b'inline'](_format_flags)
960
962
961 def __init__(
963 def __init__(
962 self,
964 self,
963 opener,
965 opener,
964 target,
966 target,
965 radix,
967 radix,
966 postfix=None, # only exist for `tmpcensored` now
968 postfix=None, # only exist for `tmpcensored` now
967 checkambig=False,
969 checkambig=False,
968 mmaplargeindex=False,
970 mmaplargeindex=False,
969 censorable=False,
971 censorable=False,
970 upperboundcomp=None,
972 upperboundcomp=None,
971 persistentnodemap=False,
973 persistentnodemap=False,
972 concurrencychecker=None,
974 concurrencychecker=None,
973 trypending=False,
975 trypending=False,
974 try_split=False,
976 try_split=False,
975 canonical_parent_order=True,
977 canonical_parent_order=True,
976 ):
978 ):
977 """
979 """
978 create a revlog object
980 create a revlog object
979
981
980 opener is a function that abstracts the file opening operation
982 opener is a function that abstracts the file opening operation
981 and can be used to implement COW semantics or the like.
983 and can be used to implement COW semantics or the like.
982
984
983 `target`: a (KIND, ID) tuple that identify the content stored in
985 `target`: a (KIND, ID) tuple that identify the content stored in
984 this revlog. It help the rest of the code to understand what the revlog
986 this revlog. It help the rest of the code to understand what the revlog
985 is about without having to resort to heuristic and index filename
987 is about without having to resort to heuristic and index filename
986 analysis. Note: that this must be reliably be set by normal code, but
988 analysis. Note: that this must be reliably be set by normal code, but
987 that test, debug, or performance measurement code might not set this to
989 that test, debug, or performance measurement code might not set this to
988 accurate value.
990 accurate value.
989 """
991 """
990
992
991 self.radix = radix
993 self.radix = radix
992
994
993 self._docket_file = None
995 self._docket_file = None
994 self._indexfile = None
996 self._indexfile = None
995 self._datafile = None
997 self._datafile = None
996 self._sidedatafile = None
998 self._sidedatafile = None
997 self._nodemap_file = None
999 self._nodemap_file = None
998 self.postfix = postfix
1000 self.postfix = postfix
999 self._trypending = trypending
1001 self._trypending = trypending
1000 self._try_split = try_split
1002 self._try_split = try_split
1001 self.opener = opener
1003 self.opener = opener
1002 if persistentnodemap:
1004 if persistentnodemap:
1003 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1005 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1004
1006
1005 assert target[0] in ALL_KINDS
1007 assert target[0] in ALL_KINDS
1006 assert len(target) == 2
1008 assert len(target) == 2
1007 self.target = target
1009 self.target = target
1008 if b'feature-config' in self.opener.options:
1010 if b'feature-config' in self.opener.options:
1009 self.feature_config = self.opener.options[b'feature-config'].copy()
1011 self.feature_config = self.opener.options[b'feature-config'].copy()
1010 else:
1012 else:
1011 self.feature_config = FeatureConfig()
1013 self.feature_config = FeatureConfig()
1012 self.feature_config.censorable = censorable
1014 self.feature_config.censorable = censorable
1013 self.feature_config.canonical_parent_order = canonical_parent_order
1015 self.feature_config.canonical_parent_order = canonical_parent_order
1014 if b'data-config' in self.opener.options:
1016 if b'data-config' in self.opener.options:
1015 self.data_config = self.opener.options[b'data-config'].copy()
1017 self.data_config = self.opener.options[b'data-config'].copy()
1016 else:
1018 else:
1017 self.data_config = DataConfig()
1019 self.data_config = DataConfig()
1018 self.data_config.check_ambig = checkambig
1020 self.data_config.check_ambig = checkambig
1019 self.data_config.mmap_large_index = mmaplargeindex
1021 self.data_config.mmap_large_index = mmaplargeindex
1020 if b'delta-config' in self.opener.options:
1022 if b'delta-config' in self.opener.options:
1021 self.delta_config = self.opener.options[b'delta-config'].copy()
1023 self.delta_config = self.opener.options[b'delta-config'].copy()
1022 else:
1024 else:
1023 self.delta_config = DeltaConfig()
1025 self.delta_config = DeltaConfig()
1024 self.delta_config.upper_bound_comp = upperboundcomp
1026 self.delta_config.upper_bound_comp = upperboundcomp
1025
1027
1026 # 3-tuple of (node, rev, text) for a raw revision.
1027 self._revisioncache = None
1028 # Maps rev to chain base rev.
1028 # Maps rev to chain base rev.
1029 self._chainbasecache = util.lrucachedict(100)
1029 self._chainbasecache = util.lrucachedict(100)
1030
1030
1031 self.index = None
1031 self.index = None
1032 self._docket = None
1032 self._docket = None
1033 self._nodemap_docket = None
1033 self._nodemap_docket = None
1034 # Mapping of partial identifiers to full nodes.
1034 # Mapping of partial identifiers to full nodes.
1035 self._pcache = {}
1035 self._pcache = {}
1036
1036
1037 # other optionnals features
1037 # other optionnals features
1038
1038
1039 # Make copy of flag processors so each revlog instance can support
1039 # Make copy of flag processors so each revlog instance can support
1040 # custom flags.
1040 # custom flags.
1041 self._flagprocessors = dict(flagutil.flagprocessors)
1041 self._flagprocessors = dict(flagutil.flagprocessors)
1042 # prevent nesting of addgroup
1042 # prevent nesting of addgroup
1043 self._adding_group = None
1043 self._adding_group = None
1044
1044
1045 chunk_cache = self._loadindex()
1045 chunk_cache = self._loadindex()
1046 self._load_inner(chunk_cache)
1046 self._load_inner(chunk_cache)
1047 self._concurrencychecker = concurrencychecker
1047 self._concurrencychecker = concurrencychecker
1048
1048
1049 @property
1049 @property
1050 def _generaldelta(self):
1050 def _generaldelta(self):
1051 """temporary compatibility proxy"""
1051 """temporary compatibility proxy"""
1052 util.nouideprecwarn(
1052 util.nouideprecwarn(
1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1054 )
1054 )
1055 return self.delta_config.general_delta
1055 return self.delta_config.general_delta
1056
1056
1057 @property
1057 @property
1058 def _checkambig(self):
1058 def _checkambig(self):
1059 """temporary compatibility proxy"""
1059 """temporary compatibility proxy"""
1060 util.nouideprecwarn(
1060 util.nouideprecwarn(
1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1062 )
1062 )
1063 return self.data_config.check_ambig
1063 return self.data_config.check_ambig
1064
1064
1065 @property
1065 @property
1066 def _mmaplargeindex(self):
1066 def _mmaplargeindex(self):
1067 """temporary compatibility proxy"""
1067 """temporary compatibility proxy"""
1068 util.nouideprecwarn(
1068 util.nouideprecwarn(
1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1070 )
1070 )
1071 return self.data_config.mmap_large_index
1071 return self.data_config.mmap_large_index
1072
1072
1073 @property
1073 @property
1074 def _censorable(self):
1074 def _censorable(self):
1075 """temporary compatibility proxy"""
1075 """temporary compatibility proxy"""
1076 util.nouideprecwarn(
1076 util.nouideprecwarn(
1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1078 )
1078 )
1079 return self.feature_config.censorable
1079 return self.feature_config.censorable
1080
1080
1081 @property
1081 @property
1082 def _chunkcachesize(self):
1082 def _chunkcachesize(self):
1083 """temporary compatibility proxy"""
1083 """temporary compatibility proxy"""
1084 util.nouideprecwarn(
1084 util.nouideprecwarn(
1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1086 )
1086 )
1087 return self.data_config.chunk_cache_size
1087 return self.data_config.chunk_cache_size
1088
1088
1089 @property
1089 @property
1090 def _maxchainlen(self):
1090 def _maxchainlen(self):
1091 """temporary compatibility proxy"""
1091 """temporary compatibility proxy"""
1092 util.nouideprecwarn(
1092 util.nouideprecwarn(
1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1094 )
1094 )
1095 return self.delta_config.max_chain_len
1095 return self.delta_config.max_chain_len
1096
1096
1097 @property
1097 @property
1098 def _deltabothparents(self):
1098 def _deltabothparents(self):
1099 """temporary compatibility proxy"""
1099 """temporary compatibility proxy"""
1100 util.nouideprecwarn(
1100 util.nouideprecwarn(
1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1102 )
1102 )
1103 return self.delta_config.delta_both_parents
1103 return self.delta_config.delta_both_parents
1104
1104
1105 @property
1105 @property
1106 def _candidate_group_chunk_size(self):
1106 def _candidate_group_chunk_size(self):
1107 """temporary compatibility proxy"""
1107 """temporary compatibility proxy"""
1108 util.nouideprecwarn(
1108 util.nouideprecwarn(
1109 b"use revlog.delta_config.candidate_group_chunk_size",
1109 b"use revlog.delta_config.candidate_group_chunk_size",
1110 b"6.6",
1110 b"6.6",
1111 stacklevel=2,
1111 stacklevel=2,
1112 )
1112 )
1113 return self.delta_config.candidate_group_chunk_size
1113 return self.delta_config.candidate_group_chunk_size
1114
1114
1115 @property
1115 @property
1116 def _debug_delta(self):
1116 def _debug_delta(self):
1117 """temporary compatibility proxy"""
1117 """temporary compatibility proxy"""
1118 util.nouideprecwarn(
1118 util.nouideprecwarn(
1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1120 )
1120 )
1121 return self.delta_config.debug_delta
1121 return self.delta_config.debug_delta
1122
1122
1123 @property
1123 @property
1124 def _compengine(self):
1124 def _compengine(self):
1125 """temporary compatibility proxy"""
1125 """temporary compatibility proxy"""
1126 util.nouideprecwarn(
1126 util.nouideprecwarn(
1127 b"use revlog.feature_config.compression_engine",
1127 b"use revlog.feature_config.compression_engine",
1128 b"6.6",
1128 b"6.6",
1129 stacklevel=2,
1129 stacklevel=2,
1130 )
1130 )
1131 return self.feature_config.compression_engine
1131 return self.feature_config.compression_engine
1132
1132
1133 @property
1133 @property
1134 def upperboundcomp(self):
1134 def upperboundcomp(self):
1135 """temporary compatibility proxy"""
1135 """temporary compatibility proxy"""
1136 util.nouideprecwarn(
1136 util.nouideprecwarn(
1137 b"use revlog.delta_config.upper_bound_comp",
1137 b"use revlog.delta_config.upper_bound_comp",
1138 b"6.6",
1138 b"6.6",
1139 stacklevel=2,
1139 stacklevel=2,
1140 )
1140 )
1141 return self.delta_config.upper_bound_comp
1141 return self.delta_config.upper_bound_comp
1142
1142
1143 @property
1143 @property
1144 def _compengineopts(self):
1144 def _compengineopts(self):
1145 """temporary compatibility proxy"""
1145 """temporary compatibility proxy"""
1146 util.nouideprecwarn(
1146 util.nouideprecwarn(
1147 b"use revlog.feature_config.compression_engine_options",
1147 b"use revlog.feature_config.compression_engine_options",
1148 b"6.6",
1148 b"6.6",
1149 stacklevel=2,
1149 stacklevel=2,
1150 )
1150 )
1151 return self.feature_config.compression_engine_options
1151 return self.feature_config.compression_engine_options
1152
1152
1153 @property
1153 @property
1154 def _maxdeltachainspan(self):
1154 def _maxdeltachainspan(self):
1155 """temporary compatibility proxy"""
1155 """temporary compatibility proxy"""
1156 util.nouideprecwarn(
1156 util.nouideprecwarn(
1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1158 )
1158 )
1159 return self.delta_config.max_deltachain_span
1159 return self.delta_config.max_deltachain_span
1160
1160
1161 @property
1161 @property
1162 def _withsparseread(self):
1162 def _withsparseread(self):
1163 """temporary compatibility proxy"""
1163 """temporary compatibility proxy"""
1164 util.nouideprecwarn(
1164 util.nouideprecwarn(
1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1166 )
1166 )
1167 return self.data_config.with_sparse_read
1167 return self.data_config.with_sparse_read
1168
1168
1169 @property
1169 @property
1170 def _sparserevlog(self):
1170 def _sparserevlog(self):
1171 """temporary compatibility proxy"""
1171 """temporary compatibility proxy"""
1172 util.nouideprecwarn(
1172 util.nouideprecwarn(
1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1174 )
1174 )
1175 return self.delta_config.sparse_revlog
1175 return self.delta_config.sparse_revlog
1176
1176
1177 @property
1177 @property
1178 def hassidedata(self):
1178 def hassidedata(self):
1179 """temporary compatibility proxy"""
1179 """temporary compatibility proxy"""
1180 util.nouideprecwarn(
1180 util.nouideprecwarn(
1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1182 )
1182 )
1183 return self.feature_config.has_side_data
1183 return self.feature_config.has_side_data
1184
1184
1185 @property
1185 @property
1186 def _srdensitythreshold(self):
1186 def _srdensitythreshold(self):
1187 """temporary compatibility proxy"""
1187 """temporary compatibility proxy"""
1188 util.nouideprecwarn(
1188 util.nouideprecwarn(
1189 b"use revlog.data_config.sr_density_threshold",
1189 b"use revlog.data_config.sr_density_threshold",
1190 b"6.6",
1190 b"6.6",
1191 stacklevel=2,
1191 stacklevel=2,
1192 )
1192 )
1193 return self.data_config.sr_density_threshold
1193 return self.data_config.sr_density_threshold
1194
1194
1195 @property
1195 @property
1196 def _srmingapsize(self):
1196 def _srmingapsize(self):
1197 """temporary compatibility proxy"""
1197 """temporary compatibility proxy"""
1198 util.nouideprecwarn(
1198 util.nouideprecwarn(
1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1200 )
1200 )
1201 return self.data_config.sr_min_gap_size
1201 return self.data_config.sr_min_gap_size
1202
1202
1203 @property
1203 @property
1204 def _compute_rank(self):
1204 def _compute_rank(self):
1205 """temporary compatibility proxy"""
1205 """temporary compatibility proxy"""
1206 util.nouideprecwarn(
1206 util.nouideprecwarn(
1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1208 )
1208 )
1209 return self.feature_config.compute_rank
1209 return self.feature_config.compute_rank
1210
1210
1211 @property
1211 @property
1212 def canonical_parent_order(self):
1212 def canonical_parent_order(self):
1213 """temporary compatibility proxy"""
1213 """temporary compatibility proxy"""
1214 util.nouideprecwarn(
1214 util.nouideprecwarn(
1215 b"use revlog.feature_config.canonical_parent_order",
1215 b"use revlog.feature_config.canonical_parent_order",
1216 b"6.6",
1216 b"6.6",
1217 stacklevel=2,
1217 stacklevel=2,
1218 )
1218 )
1219 return self.feature_config.canonical_parent_order
1219 return self.feature_config.canonical_parent_order
1220
1220
1221 @property
1221 @property
1222 def _lazydelta(self):
1222 def _lazydelta(self):
1223 """temporary compatibility proxy"""
1223 """temporary compatibility proxy"""
1224 util.nouideprecwarn(
1224 util.nouideprecwarn(
1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1226 )
1226 )
1227 return self.delta_config.lazy_delta
1227 return self.delta_config.lazy_delta
1228
1228
1229 @property
1229 @property
1230 def _lazydeltabase(self):
1230 def _lazydeltabase(self):
1231 """temporary compatibility proxy"""
1231 """temporary compatibility proxy"""
1232 util.nouideprecwarn(
1232 util.nouideprecwarn(
1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1234 )
1234 )
1235 return self.delta_config.lazy_delta_base
1235 return self.delta_config.lazy_delta_base
1236
1236
1237 def _init_opts(self):
1237 def _init_opts(self):
1238 """process options (from above/config) to setup associated default revlog mode
1238 """process options (from above/config) to setup associated default revlog mode
1239
1239
1240 These values might be affected when actually reading on disk information.
1240 These values might be affected when actually reading on disk information.
1241
1241
1242 The relevant values are returned for use in _loadindex().
1242 The relevant values are returned for use in _loadindex().
1243
1243
1244 * newversionflags:
1244 * newversionflags:
1245 version header to use if we need to create a new revlog
1245 version header to use if we need to create a new revlog
1246
1246
1247 * mmapindexthreshold:
1247 * mmapindexthreshold:
1248 minimal index size for start to use mmap
1248 minimal index size for start to use mmap
1249
1249
1250 * force_nodemap:
1250 * force_nodemap:
1251 force the usage of a "development" version of the nodemap code
1251 force the usage of a "development" version of the nodemap code
1252 """
1252 """
1253 opts = self.opener.options
1253 opts = self.opener.options
1254
1254
1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1256 new_header = CHANGELOGV2
1256 new_header = CHANGELOGV2
1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1258 self.feature_config.compute_rank = compute_rank
1258 self.feature_config.compute_rank = compute_rank
1259 elif b'revlogv2' in opts:
1259 elif b'revlogv2' in opts:
1260 new_header = REVLOGV2
1260 new_header = REVLOGV2
1261 elif b'revlogv1' in opts:
1261 elif b'revlogv1' in opts:
1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1263 if b'generaldelta' in opts:
1263 if b'generaldelta' in opts:
1264 new_header |= FLAG_GENERALDELTA
1264 new_header |= FLAG_GENERALDELTA
1265 elif b'revlogv0' in self.opener.options:
1265 elif b'revlogv0' in self.opener.options:
1266 new_header = REVLOGV0
1266 new_header = REVLOGV0
1267 else:
1267 else:
1268 new_header = REVLOG_DEFAULT_VERSION
1268 new_header = REVLOG_DEFAULT_VERSION
1269
1269
1270 mmapindexthreshold = None
1270 mmapindexthreshold = None
1271 if self.data_config.mmap_large_index:
1271 if self.data_config.mmap_large_index:
1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1273 if self.feature_config.enable_ellipsis:
1273 if self.feature_config.enable_ellipsis:
1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1275
1275
1276 # revlog v0 doesn't have flag processors
1276 # revlog v0 doesn't have flag processors
1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1279
1279
1280 chunk_cache_size = self.data_config.chunk_cache_size
1280 chunk_cache_size = self.data_config.chunk_cache_size
1281 if chunk_cache_size <= 0:
1281 if chunk_cache_size <= 0:
1282 raise error.RevlogError(
1282 raise error.RevlogError(
1283 _(b'revlog chunk cache size %r is not greater than 0')
1283 _(b'revlog chunk cache size %r is not greater than 0')
1284 % chunk_cache_size
1284 % chunk_cache_size
1285 )
1285 )
1286 elif chunk_cache_size & (chunk_cache_size - 1):
1286 elif chunk_cache_size & (chunk_cache_size - 1):
1287 raise error.RevlogError(
1287 raise error.RevlogError(
1288 _(b'revlog chunk cache size %r is not a power of 2')
1288 _(b'revlog chunk cache size %r is not a power of 2')
1289 % chunk_cache_size
1289 % chunk_cache_size
1290 )
1290 )
1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1292 return new_header, mmapindexthreshold, force_nodemap
1292 return new_header, mmapindexthreshold, force_nodemap
1293
1293
1294 def _get_data(self, filepath, mmap_threshold, size=None):
1294 def _get_data(self, filepath, mmap_threshold, size=None):
1295 """return a file content with or without mmap
1295 """return a file content with or without mmap
1296
1296
1297 If the file is missing return the empty string"""
1297 If the file is missing return the empty string"""
1298 try:
1298 try:
1299 with self.opener(filepath) as fp:
1299 with self.opener(filepath) as fp:
1300 if mmap_threshold is not None:
1300 if mmap_threshold is not None:
1301 file_size = self.opener.fstat(fp).st_size
1301 file_size = self.opener.fstat(fp).st_size
1302 if file_size >= mmap_threshold:
1302 if file_size >= mmap_threshold:
1303 if size is not None:
1303 if size is not None:
1304 # avoid potentiel mmap crash
1304 # avoid potentiel mmap crash
1305 size = min(file_size, size)
1305 size = min(file_size, size)
1306 # TODO: should .close() to release resources without
1306 # TODO: should .close() to release resources without
1307 # relying on Python GC
1307 # relying on Python GC
1308 if size is None:
1308 if size is None:
1309 return util.buffer(util.mmapread(fp))
1309 return util.buffer(util.mmapread(fp))
1310 else:
1310 else:
1311 return util.buffer(util.mmapread(fp, size))
1311 return util.buffer(util.mmapread(fp, size))
1312 if size is None:
1312 if size is None:
1313 return fp.read()
1313 return fp.read()
1314 else:
1314 else:
1315 return fp.read(size)
1315 return fp.read(size)
1316 except FileNotFoundError:
1316 except FileNotFoundError:
1317 return b''
1317 return b''
1318
1318
1319 def get_streams(self, max_linkrev, force_inline=False):
1319 def get_streams(self, max_linkrev, force_inline=False):
1320 """return a list of streams that represent this revlog
1320 """return a list of streams that represent this revlog
1321
1321
1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1323
1323
1324 This streams data for all revisions that refer to a changelog revision up
1324 This streams data for all revisions that refer to a changelog revision up
1325 to `max_linkrev`.
1325 to `max_linkrev`.
1326
1326
1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1328
1328
1329 It returns is a list of three-tuple:
1329 It returns is a list of three-tuple:
1330
1330
1331 [
1331 [
1332 (filename, bytes_stream, stream_size),
1332 (filename, bytes_stream, stream_size),
1333 …
1333 …
1334 ]
1334 ]
1335 """
1335 """
1336 n = len(self)
1336 n = len(self)
1337 index = self.index
1337 index = self.index
1338 while n > 0:
1338 while n > 0:
1339 linkrev = index[n - 1][4]
1339 linkrev = index[n - 1][4]
1340 if linkrev < max_linkrev:
1340 if linkrev < max_linkrev:
1341 break
1341 break
1342 # note: this loop will rarely go through multiple iterations, since
1342 # note: this loop will rarely go through multiple iterations, since
1343 # it only traverses commits created during the current streaming
1343 # it only traverses commits created during the current streaming
1344 # pull operation.
1344 # pull operation.
1345 #
1345 #
1346 # If this become a problem, using a binary search should cap the
1346 # If this become a problem, using a binary search should cap the
1347 # runtime of this.
1347 # runtime of this.
1348 n = n - 1
1348 n = n - 1
1349 if n == 0:
1349 if n == 0:
1350 # no data to send
1350 # no data to send
1351 return []
1351 return []
1352 index_size = n * index.entry_size
1352 index_size = n * index.entry_size
1353 data_size = self.end(n - 1)
1353 data_size = self.end(n - 1)
1354
1354
1355 # XXX we might have been split (or stripped) since the object
1355 # XXX we might have been split (or stripped) since the object
1356 # initialization, We need to close this race too, but having a way to
1356 # initialization, We need to close this race too, but having a way to
1357 # pre-open the file we feed to the revlog and never closing them before
1357 # pre-open the file we feed to the revlog and never closing them before
1358 # we are done streaming.
1358 # we are done streaming.
1359
1359
1360 if self._inline:
1360 if self._inline:
1361
1361
1362 def get_stream():
1362 def get_stream():
1363 with self.opener(self._indexfile, mode=b"r") as fp:
1363 with self.opener(self._indexfile, mode=b"r") as fp:
1364 yield None
1364 yield None
1365 size = index_size + data_size
1365 size = index_size + data_size
1366 if size <= 65536:
1366 if size <= 65536:
1367 yield fp.read(size)
1367 yield fp.read(size)
1368 else:
1368 else:
1369 yield from util.filechunkiter(fp, limit=size)
1369 yield from util.filechunkiter(fp, limit=size)
1370
1370
1371 inline_stream = get_stream()
1371 inline_stream = get_stream()
1372 next(inline_stream)
1372 next(inline_stream)
1373 return [
1373 return [
1374 (self._indexfile, inline_stream, index_size + data_size),
1374 (self._indexfile, inline_stream, index_size + data_size),
1375 ]
1375 ]
1376 elif force_inline:
1376 elif force_inline:
1377
1377
1378 def get_stream():
1378 def get_stream():
1379 with self.reading():
1379 with self.reading():
1380 yield None
1380 yield None
1381
1381
1382 for rev in range(n):
1382 for rev in range(n):
1383 idx = self.index.entry_binary(rev)
1383 idx = self.index.entry_binary(rev)
1384 if rev == 0 and self._docket is None:
1384 if rev == 0 and self._docket is None:
1385 # re-inject the inline flag
1385 # re-inject the inline flag
1386 header = self._format_flags
1386 header = self._format_flags
1387 header |= self._format_version
1387 header |= self._format_version
1388 header |= FLAG_INLINE_DATA
1388 header |= FLAG_INLINE_DATA
1389 header = self.index.pack_header(header)
1389 header = self.index.pack_header(header)
1390 idx = header + idx
1390 idx = header + idx
1391 yield idx
1391 yield idx
1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1393
1393
1394 inline_stream = get_stream()
1394 inline_stream = get_stream()
1395 next(inline_stream)
1395 next(inline_stream)
1396 return [
1396 return [
1397 (self._indexfile, inline_stream, index_size + data_size),
1397 (self._indexfile, inline_stream, index_size + data_size),
1398 ]
1398 ]
1399 else:
1399 else:
1400
1400
1401 def get_index_stream():
1401 def get_index_stream():
1402 with self.opener(self._indexfile, mode=b"r") as fp:
1402 with self.opener(self._indexfile, mode=b"r") as fp:
1403 yield None
1403 yield None
1404 if index_size <= 65536:
1404 if index_size <= 65536:
1405 yield fp.read(index_size)
1405 yield fp.read(index_size)
1406 else:
1406 else:
1407 yield from util.filechunkiter(fp, limit=index_size)
1407 yield from util.filechunkiter(fp, limit=index_size)
1408
1408
1409 def get_data_stream():
1409 def get_data_stream():
1410 with self._datafp() as fp:
1410 with self._datafp() as fp:
1411 yield None
1411 yield None
1412 if data_size <= 65536:
1412 if data_size <= 65536:
1413 yield fp.read(data_size)
1413 yield fp.read(data_size)
1414 else:
1414 else:
1415 yield from util.filechunkiter(fp, limit=data_size)
1415 yield from util.filechunkiter(fp, limit=data_size)
1416
1416
1417 index_stream = get_index_stream()
1417 index_stream = get_index_stream()
1418 next(index_stream)
1418 next(index_stream)
1419 data_stream = get_data_stream()
1419 data_stream = get_data_stream()
1420 next(data_stream)
1420 next(data_stream)
1421 return [
1421 return [
1422 (self._datafile, data_stream, data_size),
1422 (self._datafile, data_stream, data_size),
1423 (self._indexfile, index_stream, index_size),
1423 (self._indexfile, index_stream, index_size),
1424 ]
1424 ]
1425
1425
1426 def _loadindex(self, docket=None):
1426 def _loadindex(self, docket=None):
1427
1427
1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1429
1429
1430 if self.postfix is not None:
1430 if self.postfix is not None:
1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1433 entry_point = b'%s.i.a' % self.radix
1433 entry_point = b'%s.i.a' % self.radix
1434 elif self._try_split and self.opener.exists(self._split_index_file):
1434 elif self._try_split and self.opener.exists(self._split_index_file):
1435 entry_point = self._split_index_file
1435 entry_point = self._split_index_file
1436 else:
1436 else:
1437 entry_point = b'%s.i' % self.radix
1437 entry_point = b'%s.i' % self.radix
1438
1438
1439 if docket is not None:
1439 if docket is not None:
1440 self._docket = docket
1440 self._docket = docket
1441 self._docket_file = entry_point
1441 self._docket_file = entry_point
1442 else:
1442 else:
1443 self._initempty = True
1443 self._initempty = True
1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1445 if len(entry_data) > 0:
1445 if len(entry_data) > 0:
1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1447 self._initempty = False
1447 self._initempty = False
1448 else:
1448 else:
1449 header = new_header
1449 header = new_header
1450
1450
1451 self._format_flags = header & ~0xFFFF
1451 self._format_flags = header & ~0xFFFF
1452 self._format_version = header & 0xFFFF
1452 self._format_version = header & 0xFFFF
1453
1453
1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1455 if supported_flags is None:
1455 if supported_flags is None:
1456 msg = _(b'unknown version (%d) in revlog %s')
1456 msg = _(b'unknown version (%d) in revlog %s')
1457 msg %= (self._format_version, self.display_id)
1457 msg %= (self._format_version, self.display_id)
1458 raise error.RevlogError(msg)
1458 raise error.RevlogError(msg)
1459 elif self._format_flags & ~supported_flags:
1459 elif self._format_flags & ~supported_flags:
1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1461 display_flag = self._format_flags >> 16
1461 display_flag = self._format_flags >> 16
1462 msg %= (display_flag, self._format_version, self.display_id)
1462 msg %= (display_flag, self._format_version, self.display_id)
1463 raise error.RevlogError(msg)
1463 raise error.RevlogError(msg)
1464
1464
1465 features = FEATURES_BY_VERSION[self._format_version]
1465 features = FEATURES_BY_VERSION[self._format_version]
1466 self._inline = features[b'inline'](self._format_flags)
1466 self._inline = features[b'inline'](self._format_flags)
1467 self.delta_config.general_delta = features[b'generaldelta'](
1467 self.delta_config.general_delta = features[b'generaldelta'](
1468 self._format_flags
1468 self._format_flags
1469 )
1469 )
1470 self.feature_config.has_side_data = features[b'sidedata']
1470 self.feature_config.has_side_data = features[b'sidedata']
1471
1471
1472 if not features[b'docket']:
1472 if not features[b'docket']:
1473 self._indexfile = entry_point
1473 self._indexfile = entry_point
1474 index_data = entry_data
1474 index_data = entry_data
1475 else:
1475 else:
1476 self._docket_file = entry_point
1476 self._docket_file = entry_point
1477 if self._initempty:
1477 if self._initempty:
1478 self._docket = docketutil.default_docket(self, header)
1478 self._docket = docketutil.default_docket(self, header)
1479 else:
1479 else:
1480 self._docket = docketutil.parse_docket(
1480 self._docket = docketutil.parse_docket(
1481 self, entry_data, use_pending=self._trypending
1481 self, entry_data, use_pending=self._trypending
1482 )
1482 )
1483
1483
1484 if self._docket is not None:
1484 if self._docket is not None:
1485 self._indexfile = self._docket.index_filepath()
1485 self._indexfile = self._docket.index_filepath()
1486 index_data = b''
1486 index_data = b''
1487 index_size = self._docket.index_end
1487 index_size = self._docket.index_end
1488 if index_size > 0:
1488 if index_size > 0:
1489 index_data = self._get_data(
1489 index_data = self._get_data(
1490 self._indexfile, mmapindexthreshold, size=index_size
1490 self._indexfile, mmapindexthreshold, size=index_size
1491 )
1491 )
1492 if len(index_data) < index_size:
1492 if len(index_data) < index_size:
1493 msg = _(b'too few index data for %s: got %d, expected %d')
1493 msg = _(b'too few index data for %s: got %d, expected %d')
1494 msg %= (self.display_id, len(index_data), index_size)
1494 msg %= (self.display_id, len(index_data), index_size)
1495 raise error.RevlogError(msg)
1495 raise error.RevlogError(msg)
1496
1496
1497 self._inline = False
1497 self._inline = False
1498 # generaldelta implied by version 2 revlogs.
1498 # generaldelta implied by version 2 revlogs.
1499 self.delta_config.general_delta = True
1499 self.delta_config.general_delta = True
1500 # the logic for persistent nodemap will be dealt with within the
1500 # the logic for persistent nodemap will be dealt with within the
1501 # main docket, so disable it for now.
1501 # main docket, so disable it for now.
1502 self._nodemap_file = None
1502 self._nodemap_file = None
1503
1503
1504 if self._docket is not None:
1504 if self._docket is not None:
1505 self._datafile = self._docket.data_filepath()
1505 self._datafile = self._docket.data_filepath()
1506 self._sidedatafile = self._docket.sidedata_filepath()
1506 self._sidedatafile = self._docket.sidedata_filepath()
1507 elif self.postfix is None:
1507 elif self.postfix is None:
1508 self._datafile = b'%s.d' % self.radix
1508 self._datafile = b'%s.d' % self.radix
1509 else:
1509 else:
1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1511
1511
1512 self.nodeconstants = sha1nodeconstants
1512 self.nodeconstants = sha1nodeconstants
1513 self.nullid = self.nodeconstants.nullid
1513 self.nullid = self.nodeconstants.nullid
1514
1514
1515 # sparse-revlog can't be on without general-delta (issue6056)
1515 # sparse-revlog can't be on without general-delta (issue6056)
1516 if not self.delta_config.general_delta:
1516 if not self.delta_config.general_delta:
1517 self.delta_config.sparse_revlog = False
1517 self.delta_config.sparse_revlog = False
1518
1518
1519 self._storedeltachains = True
1519 self._storedeltachains = True
1520
1520
1521 devel_nodemap = (
1521 devel_nodemap = (
1522 self._nodemap_file
1522 self._nodemap_file
1523 and force_nodemap
1523 and force_nodemap
1524 and parse_index_v1_nodemap is not None
1524 and parse_index_v1_nodemap is not None
1525 )
1525 )
1526
1526
1527 use_rust_index = False
1527 use_rust_index = False
1528 if rustrevlog is not None:
1528 if rustrevlog is not None:
1529 if self._nodemap_file is not None:
1529 if self._nodemap_file is not None:
1530 use_rust_index = True
1530 use_rust_index = True
1531 else:
1531 else:
1532 use_rust_index = self.opener.options.get(b'rust.index')
1532 use_rust_index = self.opener.options.get(b'rust.index')
1533
1533
1534 self._parse_index = parse_index_v1
1534 self._parse_index = parse_index_v1
1535 if self._format_version == REVLOGV0:
1535 if self._format_version == REVLOGV0:
1536 self._parse_index = revlogv0.parse_index_v0
1536 self._parse_index = revlogv0.parse_index_v0
1537 elif self._format_version == REVLOGV2:
1537 elif self._format_version == REVLOGV2:
1538 self._parse_index = parse_index_v2
1538 self._parse_index = parse_index_v2
1539 elif self._format_version == CHANGELOGV2:
1539 elif self._format_version == CHANGELOGV2:
1540 self._parse_index = parse_index_cl_v2
1540 self._parse_index = parse_index_cl_v2
1541 elif devel_nodemap:
1541 elif devel_nodemap:
1542 self._parse_index = parse_index_v1_nodemap
1542 self._parse_index = parse_index_v1_nodemap
1543 elif use_rust_index:
1543 elif use_rust_index:
1544 self._parse_index = parse_index_v1_mixed
1544 self._parse_index = parse_index_v1_mixed
1545 try:
1545 try:
1546 d = self._parse_index(index_data, self._inline)
1546 d = self._parse_index(index_data, self._inline)
1547 index, chunkcache = d
1547 index, chunkcache = d
1548 use_nodemap = (
1548 use_nodemap = (
1549 not self._inline
1549 not self._inline
1550 and self._nodemap_file is not None
1550 and self._nodemap_file is not None
1551 and hasattr(index, 'update_nodemap_data')
1551 and hasattr(index, 'update_nodemap_data')
1552 )
1552 )
1553 if use_nodemap:
1553 if use_nodemap:
1554 nodemap_data = nodemaputil.persisted_data(self)
1554 nodemap_data = nodemaputil.persisted_data(self)
1555 if nodemap_data is not None:
1555 if nodemap_data is not None:
1556 docket = nodemap_data[0]
1556 docket = nodemap_data[0]
1557 if (
1557 if (
1558 len(d[0]) > docket.tip_rev
1558 len(d[0]) > docket.tip_rev
1559 and d[0][docket.tip_rev][7] == docket.tip_node
1559 and d[0][docket.tip_rev][7] == docket.tip_node
1560 ):
1560 ):
1561 # no changelog tampering
1561 # no changelog tampering
1562 self._nodemap_docket = docket
1562 self._nodemap_docket = docket
1563 index.update_nodemap_data(*nodemap_data)
1563 index.update_nodemap_data(*nodemap_data)
1564 except (ValueError, IndexError):
1564 except (ValueError, IndexError):
1565 raise error.RevlogError(
1565 raise error.RevlogError(
1566 _(b"index %s is corrupted") % self.display_id
1566 _(b"index %s is corrupted") % self.display_id
1567 )
1567 )
1568 self.index = index
1568 self.index = index
1569 # revnum -> (chain-length, sum-delta-length)
1569 # revnum -> (chain-length, sum-delta-length)
1570 self._chaininfocache = util.lrucachedict(500)
1570 self._chaininfocache = util.lrucachedict(500)
1571
1571
1572 return chunkcache
1572 return chunkcache
1573
1573
1574 def _load_inner(self, chunk_cache):
1574 def _load_inner(self, chunk_cache):
1575 if self._docket is None:
1575 if self._docket is None:
1576 default_compression_header = None
1576 default_compression_header = None
1577 else:
1577 else:
1578 default_compression_header = self._docket.default_compression_header
1578 default_compression_header = self._docket.default_compression_header
1579
1579
1580 self._inner = _InnerRevlog(
1580 self._inner = _InnerRevlog(
1581 opener=self.opener,
1581 opener=self.opener,
1582 index=self.index,
1582 index=self.index,
1583 index_file=self._indexfile,
1583 index_file=self._indexfile,
1584 data_file=self._datafile,
1584 data_file=self._datafile,
1585 sidedata_file=self._sidedatafile,
1585 sidedata_file=self._sidedatafile,
1586 inline=self._inline,
1586 inline=self._inline,
1587 data_config=self.data_config,
1587 data_config=self.data_config,
1588 delta_config=self.delta_config,
1588 delta_config=self.delta_config,
1589 feature_config=self.feature_config,
1589 feature_config=self.feature_config,
1590 chunk_cache=chunk_cache,
1590 chunk_cache=chunk_cache,
1591 default_compression_header=default_compression_header,
1591 default_compression_header=default_compression_header,
1592 )
1592 )
1593
1593
1594 def get_revlog(self):
1594 def get_revlog(self):
1595 """simple function to mirror API of other not-really-revlog API"""
1595 """simple function to mirror API of other not-really-revlog API"""
1596 return self
1596 return self
1597
1597
1598 @util.propertycache
1598 @util.propertycache
1599 def revlog_kind(self):
1599 def revlog_kind(self):
1600 return self.target[0]
1600 return self.target[0]
1601
1601
1602 @util.propertycache
1602 @util.propertycache
1603 def display_id(self):
1603 def display_id(self):
1604 """The public facing "ID" of the revlog that we use in message"""
1604 """The public facing "ID" of the revlog that we use in message"""
1605 if self.revlog_kind == KIND_FILELOG:
1605 if self.revlog_kind == KIND_FILELOG:
1606 # Reference the file without the "data/" prefix, so it is familiar
1606 # Reference the file without the "data/" prefix, so it is familiar
1607 # to the user.
1607 # to the user.
1608 return self.target[1]
1608 return self.target[1]
1609 else:
1609 else:
1610 return self.radix
1610 return self.radix
1611
1611
1612 def _datafp(self, mode=b'r'):
1612 def _datafp(self, mode=b'r'):
1613 """file object for the revlog's data file"""
1613 """file object for the revlog's data file"""
1614 return self.opener(self._datafile, mode=mode)
1614 return self.opener(self._datafile, mode=mode)
1615
1615
1616 def tiprev(self):
1616 def tiprev(self):
1617 return len(self.index) - 1
1617 return len(self.index) - 1
1618
1618
1619 def tip(self):
1619 def tip(self):
1620 return self.node(self.tiprev())
1620 return self.node(self.tiprev())
1621
1621
1622 def __contains__(self, rev):
1622 def __contains__(self, rev):
1623 return 0 <= rev < len(self)
1623 return 0 <= rev < len(self)
1624
1624
1625 def __len__(self):
1625 def __len__(self):
1626 return len(self.index)
1626 return len(self.index)
1627
1627
1628 def __iter__(self):
1628 def __iter__(self):
1629 return iter(range(len(self)))
1629 return iter(range(len(self)))
1630
1630
1631 def revs(self, start=0, stop=None):
1631 def revs(self, start=0, stop=None):
1632 """iterate over all rev in this revlog (from start to stop)"""
1632 """iterate over all rev in this revlog (from start to stop)"""
1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1634
1634
1635 def hasnode(self, node):
1635 def hasnode(self, node):
1636 try:
1636 try:
1637 self.rev(node)
1637 self.rev(node)
1638 return True
1638 return True
1639 except KeyError:
1639 except KeyError:
1640 return False
1640 return False
1641
1641
1642 def _candelta(self, baserev, rev):
1642 def _candelta(self, baserev, rev):
1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1644 # Disable delta if either rev requires a content-changing flag
1644 # Disable delta if either rev requires a content-changing flag
1645 # processor (ex. LFS). This is because such flag processor can alter
1645 # processor (ex. LFS). This is because such flag processor can alter
1646 # the rawtext content that the delta will be based on, and two clients
1646 # the rawtext content that the delta will be based on, and two clients
1647 # could have a same revlog node with different flags (i.e. different
1647 # could have a same revlog node with different flags (i.e. different
1648 # rawtext contents) and the delta could be incompatible.
1648 # rawtext contents) and the delta could be incompatible.
1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1651 ):
1651 ):
1652 return False
1652 return False
1653 return True
1653 return True
1654
1654
1655 def update_caches(self, transaction):
1655 def update_caches(self, transaction):
1656 """update on disk cache
1656 """update on disk cache
1657
1657
1658 If a transaction is passed, the update may be delayed to transaction
1658 If a transaction is passed, the update may be delayed to transaction
1659 commit."""
1659 commit."""
1660 if self._nodemap_file is not None:
1660 if self._nodemap_file is not None:
1661 if transaction is None:
1661 if transaction is None:
1662 nodemaputil.update_persistent_nodemap(self)
1662 nodemaputil.update_persistent_nodemap(self)
1663 else:
1663 else:
1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1665
1665
1666 def clearcaches(self):
1666 def clearcaches(self):
1667 """Clear in-memory caches"""
1667 """Clear in-memory caches"""
1668 self._revisioncache = None
1668 self._inner._revisioncache = None
1669 self._chainbasecache.clear()
1669 self._chainbasecache.clear()
1670 self._inner._segmentfile.clear_cache()
1670 self._inner._segmentfile.clear_cache()
1671 self._inner._segmentfile_sidedata.clear_cache()
1671 self._inner._segmentfile_sidedata.clear_cache()
1672 self._pcache = {}
1672 self._pcache = {}
1673 self._nodemap_docket = None
1673 self._nodemap_docket = None
1674 self.index.clearcaches()
1674 self.index.clearcaches()
1675 # The python code is the one responsible for validating the docket, we
1675 # The python code is the one responsible for validating the docket, we
1676 # end up having to refresh it here.
1676 # end up having to refresh it here.
1677 use_nodemap = (
1677 use_nodemap = (
1678 not self._inline
1678 not self._inline
1679 and self._nodemap_file is not None
1679 and self._nodemap_file is not None
1680 and hasattr(self.index, 'update_nodemap_data')
1680 and hasattr(self.index, 'update_nodemap_data')
1681 )
1681 )
1682 if use_nodemap:
1682 if use_nodemap:
1683 nodemap_data = nodemaputil.persisted_data(self)
1683 nodemap_data = nodemaputil.persisted_data(self)
1684 if nodemap_data is not None:
1684 if nodemap_data is not None:
1685 self._nodemap_docket = nodemap_data[0]
1685 self._nodemap_docket = nodemap_data[0]
1686 self.index.update_nodemap_data(*nodemap_data)
1686 self.index.update_nodemap_data(*nodemap_data)
1687
1687
1688 def rev(self, node):
1688 def rev(self, node):
1689 """return the revision number associated with a <nodeid>"""
1689 """return the revision number associated with a <nodeid>"""
1690 try:
1690 try:
1691 return self.index.rev(node)
1691 return self.index.rev(node)
1692 except TypeError:
1692 except TypeError:
1693 raise
1693 raise
1694 except error.RevlogError:
1694 except error.RevlogError:
1695 # parsers.c radix tree lookup failed
1695 # parsers.c radix tree lookup failed
1696 if (
1696 if (
1697 node == self.nodeconstants.wdirid
1697 node == self.nodeconstants.wdirid
1698 or node in self.nodeconstants.wdirfilenodeids
1698 or node in self.nodeconstants.wdirfilenodeids
1699 ):
1699 ):
1700 raise error.WdirUnsupported
1700 raise error.WdirUnsupported
1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1702
1702
1703 # Accessors for index entries.
1703 # Accessors for index entries.
1704
1704
1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1706 # are flags.
1706 # are flags.
1707 def start(self, rev):
1707 def start(self, rev):
1708 return int(self.index[rev][0] >> 16)
1708 return int(self.index[rev][0] >> 16)
1709
1709
1710 def sidedata_cut_off(self, rev):
1710 def sidedata_cut_off(self, rev):
1711 sd_cut_off = self.index[rev][8]
1711 sd_cut_off = self.index[rev][8]
1712 if sd_cut_off != 0:
1712 if sd_cut_off != 0:
1713 return sd_cut_off
1713 return sd_cut_off
1714 # This is some annoying dance, because entries without sidedata
1714 # This is some annoying dance, because entries without sidedata
1715 # currently use 0 as their ofsset. (instead of previous-offset +
1715 # currently use 0 as their ofsset. (instead of previous-offset +
1716 # previous-size)
1716 # previous-size)
1717 #
1717 #
1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1719 # In the meantime, we need this.
1719 # In the meantime, we need this.
1720 while 0 <= rev:
1720 while 0 <= rev:
1721 e = self.index[rev]
1721 e = self.index[rev]
1722 if e[9] != 0:
1722 if e[9] != 0:
1723 return e[8] + e[9]
1723 return e[8] + e[9]
1724 rev -= 1
1724 rev -= 1
1725 return 0
1725 return 0
1726
1726
1727 def flags(self, rev):
1727 def flags(self, rev):
1728 return self.index[rev][0] & 0xFFFF
1728 return self.index[rev][0] & 0xFFFF
1729
1729
1730 def length(self, rev):
1730 def length(self, rev):
1731 return self.index[rev][1]
1731 return self.index[rev][1]
1732
1732
1733 def sidedata_length(self, rev):
1733 def sidedata_length(self, rev):
1734 if not self.feature_config.has_side_data:
1734 if not self.feature_config.has_side_data:
1735 return 0
1735 return 0
1736 return self.index[rev][9]
1736 return self.index[rev][9]
1737
1737
1738 def rawsize(self, rev):
1738 def rawsize(self, rev):
1739 """return the length of the uncompressed text for a given revision"""
1739 """return the length of the uncompressed text for a given revision"""
1740 l = self.index[rev][2]
1740 l = self.index[rev][2]
1741 if l >= 0:
1741 if l >= 0:
1742 return l
1742 return l
1743
1743
1744 t = self.rawdata(rev)
1744 t = self.rawdata(rev)
1745 return len(t)
1745 return len(t)
1746
1746
1747 def size(self, rev):
1747 def size(self, rev):
1748 """length of non-raw text (processed by a "read" flag processor)"""
1748 """length of non-raw text (processed by a "read" flag processor)"""
1749 # fast path: if no "read" flag processor could change the content,
1749 # fast path: if no "read" flag processor could change the content,
1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1751 flags = self.flags(rev)
1751 flags = self.flags(rev)
1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1753 return self.rawsize(rev)
1753 return self.rawsize(rev)
1754
1754
1755 return len(self.revision(rev))
1755 return len(self.revision(rev))
1756
1756
1757 def fast_rank(self, rev):
1757 def fast_rank(self, rev):
1758 """Return the rank of a revision if already known, or None otherwise.
1758 """Return the rank of a revision if already known, or None otherwise.
1759
1759
1760 The rank of a revision is the size of the sub-graph it defines as a
1760 The rank of a revision is the size of the sub-graph it defines as a
1761 head. Equivalently, the rank of a revision `r` is the size of the set
1761 head. Equivalently, the rank of a revision `r` is the size of the set
1762 `ancestors(r)`, `r` included.
1762 `ancestors(r)`, `r` included.
1763
1763
1764 This method returns the rank retrieved from the revlog in constant
1764 This method returns the rank retrieved from the revlog in constant
1765 time. It makes no attempt at computing unknown values for versions of
1765 time. It makes no attempt at computing unknown values for versions of
1766 the revlog which do not persist the rank.
1766 the revlog which do not persist the rank.
1767 """
1767 """
1768 rank = self.index[rev][ENTRY_RANK]
1768 rank = self.index[rev][ENTRY_RANK]
1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1770 return None
1770 return None
1771 if rev == nullrev:
1771 if rev == nullrev:
1772 return 0 # convention
1772 return 0 # convention
1773 return rank
1773 return rank
1774
1774
1775 def chainbase(self, rev):
1775 def chainbase(self, rev):
1776 base = self._chainbasecache.get(rev)
1776 base = self._chainbasecache.get(rev)
1777 if base is not None:
1777 if base is not None:
1778 return base
1778 return base
1779
1779
1780 index = self.index
1780 index = self.index
1781 iterrev = rev
1781 iterrev = rev
1782 base = index[iterrev][3]
1782 base = index[iterrev][3]
1783 while base != iterrev:
1783 while base != iterrev:
1784 iterrev = base
1784 iterrev = base
1785 base = index[iterrev][3]
1785 base = index[iterrev][3]
1786
1786
1787 self._chainbasecache[rev] = base
1787 self._chainbasecache[rev] = base
1788 return base
1788 return base
1789
1789
1790 def linkrev(self, rev):
1790 def linkrev(self, rev):
1791 return self.index[rev][4]
1791 return self.index[rev][4]
1792
1792
1793 def parentrevs(self, rev):
1793 def parentrevs(self, rev):
1794 try:
1794 try:
1795 entry = self.index[rev]
1795 entry = self.index[rev]
1796 except IndexError:
1796 except IndexError:
1797 if rev == wdirrev:
1797 if rev == wdirrev:
1798 raise error.WdirUnsupported
1798 raise error.WdirUnsupported
1799 raise
1799 raise
1800
1800
1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1802 return entry[6], entry[5]
1802 return entry[6], entry[5]
1803 else:
1803 else:
1804 return entry[5], entry[6]
1804 return entry[5], entry[6]
1805
1805
1806 # fast parentrevs(rev) where rev isn't filtered
1806 # fast parentrevs(rev) where rev isn't filtered
1807 _uncheckedparentrevs = parentrevs
1807 _uncheckedparentrevs = parentrevs
1808
1808
1809 def node(self, rev):
1809 def node(self, rev):
1810 try:
1810 try:
1811 return self.index[rev][7]
1811 return self.index[rev][7]
1812 except IndexError:
1812 except IndexError:
1813 if rev == wdirrev:
1813 if rev == wdirrev:
1814 raise error.WdirUnsupported
1814 raise error.WdirUnsupported
1815 raise
1815 raise
1816
1816
1817 # Derived from index values.
1817 # Derived from index values.
1818
1818
1819 def end(self, rev):
1819 def end(self, rev):
1820 return self.start(rev) + self.length(rev)
1820 return self.start(rev) + self.length(rev)
1821
1821
1822 def parents(self, node):
1822 def parents(self, node):
1823 i = self.index
1823 i = self.index
1824 d = i[self.rev(node)]
1824 d = i[self.rev(node)]
1825 # inline node() to avoid function call overhead
1825 # inline node() to avoid function call overhead
1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1827 return i[d[6]][7], i[d[5]][7]
1827 return i[d[6]][7], i[d[5]][7]
1828 else:
1828 else:
1829 return i[d[5]][7], i[d[6]][7]
1829 return i[d[5]][7], i[d[6]][7]
1830
1830
1831 def chainlen(self, rev):
1831 def chainlen(self, rev):
1832 return self._chaininfo(rev)[0]
1832 return self._chaininfo(rev)[0]
1833
1833
1834 def _chaininfo(self, rev):
1834 def _chaininfo(self, rev):
1835 chaininfocache = self._chaininfocache
1835 chaininfocache = self._chaininfocache
1836 if rev in chaininfocache:
1836 if rev in chaininfocache:
1837 return chaininfocache[rev]
1837 return chaininfocache[rev]
1838 index = self.index
1838 index = self.index
1839 generaldelta = self.delta_config.general_delta
1839 generaldelta = self.delta_config.general_delta
1840 iterrev = rev
1840 iterrev = rev
1841 e = index[iterrev]
1841 e = index[iterrev]
1842 clen = 0
1842 clen = 0
1843 compresseddeltalen = 0
1843 compresseddeltalen = 0
1844 while iterrev != e[3]:
1844 while iterrev != e[3]:
1845 clen += 1
1845 clen += 1
1846 compresseddeltalen += e[1]
1846 compresseddeltalen += e[1]
1847 if generaldelta:
1847 if generaldelta:
1848 iterrev = e[3]
1848 iterrev = e[3]
1849 else:
1849 else:
1850 iterrev -= 1
1850 iterrev -= 1
1851 if iterrev in chaininfocache:
1851 if iterrev in chaininfocache:
1852 t = chaininfocache[iterrev]
1852 t = chaininfocache[iterrev]
1853 clen += t[0]
1853 clen += t[0]
1854 compresseddeltalen += t[1]
1854 compresseddeltalen += t[1]
1855 break
1855 break
1856 e = index[iterrev]
1856 e = index[iterrev]
1857 else:
1857 else:
1858 # Add text length of base since decompressing that also takes
1858 # Add text length of base since decompressing that also takes
1859 # work. For cache hits the length is already included.
1859 # work. For cache hits the length is already included.
1860 compresseddeltalen += e[1]
1860 compresseddeltalen += e[1]
1861 r = (clen, compresseddeltalen)
1861 r = (clen, compresseddeltalen)
1862 chaininfocache[rev] = r
1862 chaininfocache[rev] = r
1863 return r
1863 return r
1864
1864
1865 def _deltachain(self, rev, stoprev=None):
1865 def _deltachain(self, rev, stoprev=None):
1866 return self._inner._deltachain(rev, stoprev=stoprev)
1866 return self._inner._deltachain(rev, stoprev=stoprev)
1867
1867
1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1869 """Generate the ancestors of 'revs' in reverse revision order.
1869 """Generate the ancestors of 'revs' in reverse revision order.
1870 Does not generate revs lower than stoprev.
1870 Does not generate revs lower than stoprev.
1871
1871
1872 See the documentation for ancestor.lazyancestors for more details."""
1872 See the documentation for ancestor.lazyancestors for more details."""
1873
1873
1874 # first, make sure start revisions aren't filtered
1874 # first, make sure start revisions aren't filtered
1875 revs = list(revs)
1875 revs = list(revs)
1876 checkrev = self.node
1876 checkrev = self.node
1877 for r in revs:
1877 for r in revs:
1878 checkrev(r)
1878 checkrev(r)
1879 # and we're sure ancestors aren't filtered as well
1879 # and we're sure ancestors aren't filtered as well
1880
1880
1881 if rustancestor is not None and self.index.rust_ext_compat:
1881 if rustancestor is not None and self.index.rust_ext_compat:
1882 lazyancestors = rustancestor.LazyAncestors
1882 lazyancestors = rustancestor.LazyAncestors
1883 arg = self.index
1883 arg = self.index
1884 else:
1884 else:
1885 lazyancestors = ancestor.lazyancestors
1885 lazyancestors = ancestor.lazyancestors
1886 arg = self._uncheckedparentrevs
1886 arg = self._uncheckedparentrevs
1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1888
1888
1889 def descendants(self, revs):
1889 def descendants(self, revs):
1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1891
1891
1892 def findcommonmissing(self, common=None, heads=None):
1892 def findcommonmissing(self, common=None, heads=None):
1893 """Return a tuple of the ancestors of common and the ancestors of heads
1893 """Return a tuple of the ancestors of common and the ancestors of heads
1894 that are not ancestors of common. In revset terminology, we return the
1894 that are not ancestors of common. In revset terminology, we return the
1895 tuple:
1895 tuple:
1896
1896
1897 ::common, (::heads) - (::common)
1897 ::common, (::heads) - (::common)
1898
1898
1899 The list is sorted by revision number, meaning it is
1899 The list is sorted by revision number, meaning it is
1900 topologically sorted.
1900 topologically sorted.
1901
1901
1902 'heads' and 'common' are both lists of node IDs. If heads is
1902 'heads' and 'common' are both lists of node IDs. If heads is
1903 not supplied, uses all of the revlog's heads. If common is not
1903 not supplied, uses all of the revlog's heads. If common is not
1904 supplied, uses nullid."""
1904 supplied, uses nullid."""
1905 if common is None:
1905 if common is None:
1906 common = [self.nullid]
1906 common = [self.nullid]
1907 if heads is None:
1907 if heads is None:
1908 heads = self.heads()
1908 heads = self.heads()
1909
1909
1910 common = [self.rev(n) for n in common]
1910 common = [self.rev(n) for n in common]
1911 heads = [self.rev(n) for n in heads]
1911 heads = [self.rev(n) for n in heads]
1912
1912
1913 # we want the ancestors, but inclusive
1913 # we want the ancestors, but inclusive
1914 class lazyset:
1914 class lazyset:
1915 def __init__(self, lazyvalues):
1915 def __init__(self, lazyvalues):
1916 self.addedvalues = set()
1916 self.addedvalues = set()
1917 self.lazyvalues = lazyvalues
1917 self.lazyvalues = lazyvalues
1918
1918
1919 def __contains__(self, value):
1919 def __contains__(self, value):
1920 return value in self.addedvalues or value in self.lazyvalues
1920 return value in self.addedvalues or value in self.lazyvalues
1921
1921
1922 def __iter__(self):
1922 def __iter__(self):
1923 added = self.addedvalues
1923 added = self.addedvalues
1924 for r in added:
1924 for r in added:
1925 yield r
1925 yield r
1926 for r in self.lazyvalues:
1926 for r in self.lazyvalues:
1927 if not r in added:
1927 if not r in added:
1928 yield r
1928 yield r
1929
1929
1930 def add(self, value):
1930 def add(self, value):
1931 self.addedvalues.add(value)
1931 self.addedvalues.add(value)
1932
1932
1933 def update(self, values):
1933 def update(self, values):
1934 self.addedvalues.update(values)
1934 self.addedvalues.update(values)
1935
1935
1936 has = lazyset(self.ancestors(common))
1936 has = lazyset(self.ancestors(common))
1937 has.add(nullrev)
1937 has.add(nullrev)
1938 has.update(common)
1938 has.update(common)
1939
1939
1940 # take all ancestors from heads that aren't in has
1940 # take all ancestors from heads that aren't in has
1941 missing = set()
1941 missing = set()
1942 visit = collections.deque(r for r in heads if r not in has)
1942 visit = collections.deque(r for r in heads if r not in has)
1943 while visit:
1943 while visit:
1944 r = visit.popleft()
1944 r = visit.popleft()
1945 if r in missing:
1945 if r in missing:
1946 continue
1946 continue
1947 else:
1947 else:
1948 missing.add(r)
1948 missing.add(r)
1949 for p in self.parentrevs(r):
1949 for p in self.parentrevs(r):
1950 if p not in has:
1950 if p not in has:
1951 visit.append(p)
1951 visit.append(p)
1952 missing = list(missing)
1952 missing = list(missing)
1953 missing.sort()
1953 missing.sort()
1954 return has, [self.node(miss) for miss in missing]
1954 return has, [self.node(miss) for miss in missing]
1955
1955
1956 def incrementalmissingrevs(self, common=None):
1956 def incrementalmissingrevs(self, common=None):
1957 """Return an object that can be used to incrementally compute the
1957 """Return an object that can be used to incrementally compute the
1958 revision numbers of the ancestors of arbitrary sets that are not
1958 revision numbers of the ancestors of arbitrary sets that are not
1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1960 object.
1960 object.
1961
1961
1962 'common' is a list of revision numbers. If common is not supplied, uses
1962 'common' is a list of revision numbers. If common is not supplied, uses
1963 nullrev.
1963 nullrev.
1964 """
1964 """
1965 if common is None:
1965 if common is None:
1966 common = [nullrev]
1966 common = [nullrev]
1967
1967
1968 if rustancestor is not None and self.index.rust_ext_compat:
1968 if rustancestor is not None and self.index.rust_ext_compat:
1969 return rustancestor.MissingAncestors(self.index, common)
1969 return rustancestor.MissingAncestors(self.index, common)
1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1971
1971
1972 def findmissingrevs(self, common=None, heads=None):
1972 def findmissingrevs(self, common=None, heads=None):
1973 """Return the revision numbers of the ancestors of heads that
1973 """Return the revision numbers of the ancestors of heads that
1974 are not ancestors of common.
1974 are not ancestors of common.
1975
1975
1976 More specifically, return a list of revision numbers corresponding to
1976 More specifically, return a list of revision numbers corresponding to
1977 nodes N such that every N satisfies the following constraints:
1977 nodes N such that every N satisfies the following constraints:
1978
1978
1979 1. N is an ancestor of some node in 'heads'
1979 1. N is an ancestor of some node in 'heads'
1980 2. N is not an ancestor of any node in 'common'
1980 2. N is not an ancestor of any node in 'common'
1981
1981
1982 The list is sorted by revision number, meaning it is
1982 The list is sorted by revision number, meaning it is
1983 topologically sorted.
1983 topologically sorted.
1984
1984
1985 'heads' and 'common' are both lists of revision numbers. If heads is
1985 'heads' and 'common' are both lists of revision numbers. If heads is
1986 not supplied, uses all of the revlog's heads. If common is not
1986 not supplied, uses all of the revlog's heads. If common is not
1987 supplied, uses nullid."""
1987 supplied, uses nullid."""
1988 if common is None:
1988 if common is None:
1989 common = [nullrev]
1989 common = [nullrev]
1990 if heads is None:
1990 if heads is None:
1991 heads = self.headrevs()
1991 heads = self.headrevs()
1992
1992
1993 inc = self.incrementalmissingrevs(common=common)
1993 inc = self.incrementalmissingrevs(common=common)
1994 return inc.missingancestors(heads)
1994 return inc.missingancestors(heads)
1995
1995
1996 def findmissing(self, common=None, heads=None):
1996 def findmissing(self, common=None, heads=None):
1997 """Return the ancestors of heads that are not ancestors of common.
1997 """Return the ancestors of heads that are not ancestors of common.
1998
1998
1999 More specifically, return a list of nodes N such that every N
1999 More specifically, return a list of nodes N such that every N
2000 satisfies the following constraints:
2000 satisfies the following constraints:
2001
2001
2002 1. N is an ancestor of some node in 'heads'
2002 1. N is an ancestor of some node in 'heads'
2003 2. N is not an ancestor of any node in 'common'
2003 2. N is not an ancestor of any node in 'common'
2004
2004
2005 The list is sorted by revision number, meaning it is
2005 The list is sorted by revision number, meaning it is
2006 topologically sorted.
2006 topologically sorted.
2007
2007
2008 'heads' and 'common' are both lists of node IDs. If heads is
2008 'heads' and 'common' are both lists of node IDs. If heads is
2009 not supplied, uses all of the revlog's heads. If common is not
2009 not supplied, uses all of the revlog's heads. If common is not
2010 supplied, uses nullid."""
2010 supplied, uses nullid."""
2011 if common is None:
2011 if common is None:
2012 common = [self.nullid]
2012 common = [self.nullid]
2013 if heads is None:
2013 if heads is None:
2014 heads = self.heads()
2014 heads = self.heads()
2015
2015
2016 common = [self.rev(n) for n in common]
2016 common = [self.rev(n) for n in common]
2017 heads = [self.rev(n) for n in heads]
2017 heads = [self.rev(n) for n in heads]
2018
2018
2019 inc = self.incrementalmissingrevs(common=common)
2019 inc = self.incrementalmissingrevs(common=common)
2020 return [self.node(r) for r in inc.missingancestors(heads)]
2020 return [self.node(r) for r in inc.missingancestors(heads)]
2021
2021
2022 def nodesbetween(self, roots=None, heads=None):
2022 def nodesbetween(self, roots=None, heads=None):
2023 """Return a topological path from 'roots' to 'heads'.
2023 """Return a topological path from 'roots' to 'heads'.
2024
2024
2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2026 topologically sorted list of all nodes N that satisfy both of
2026 topologically sorted list of all nodes N that satisfy both of
2027 these constraints:
2027 these constraints:
2028
2028
2029 1. N is a descendant of some node in 'roots'
2029 1. N is a descendant of some node in 'roots'
2030 2. N is an ancestor of some node in 'heads'
2030 2. N is an ancestor of some node in 'heads'
2031
2031
2032 Every node is considered to be both a descendant and an ancestor
2032 Every node is considered to be both a descendant and an ancestor
2033 of itself, so every reachable node in 'roots' and 'heads' will be
2033 of itself, so every reachable node in 'roots' and 'heads' will be
2034 included in 'nodes'.
2034 included in 'nodes'.
2035
2035
2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2039
2039
2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2041 unspecified, uses nullid as the only root. If 'heads' is
2041 unspecified, uses nullid as the only root. If 'heads' is
2042 unspecified, uses list of all of the revlog's heads."""
2042 unspecified, uses list of all of the revlog's heads."""
2043 nonodes = ([], [], [])
2043 nonodes = ([], [], [])
2044 if roots is not None:
2044 if roots is not None:
2045 roots = list(roots)
2045 roots = list(roots)
2046 if not roots:
2046 if not roots:
2047 return nonodes
2047 return nonodes
2048 lowestrev = min([self.rev(n) for n in roots])
2048 lowestrev = min([self.rev(n) for n in roots])
2049 else:
2049 else:
2050 roots = [self.nullid] # Everybody's a descendant of nullid
2050 roots = [self.nullid] # Everybody's a descendant of nullid
2051 lowestrev = nullrev
2051 lowestrev = nullrev
2052 if (lowestrev == nullrev) and (heads is None):
2052 if (lowestrev == nullrev) and (heads is None):
2053 # We want _all_ the nodes!
2053 # We want _all_ the nodes!
2054 return (
2054 return (
2055 [self.node(r) for r in self],
2055 [self.node(r) for r in self],
2056 [self.nullid],
2056 [self.nullid],
2057 list(self.heads()),
2057 list(self.heads()),
2058 )
2058 )
2059 if heads is None:
2059 if heads is None:
2060 # All nodes are ancestors, so the latest ancestor is the last
2060 # All nodes are ancestors, so the latest ancestor is the last
2061 # node.
2061 # node.
2062 highestrev = len(self) - 1
2062 highestrev = len(self) - 1
2063 # Set ancestors to None to signal that every node is an ancestor.
2063 # Set ancestors to None to signal that every node is an ancestor.
2064 ancestors = None
2064 ancestors = None
2065 # Set heads to an empty dictionary for later discovery of heads
2065 # Set heads to an empty dictionary for later discovery of heads
2066 heads = {}
2066 heads = {}
2067 else:
2067 else:
2068 heads = list(heads)
2068 heads = list(heads)
2069 if not heads:
2069 if not heads:
2070 return nonodes
2070 return nonodes
2071 ancestors = set()
2071 ancestors = set()
2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2073 # Also, later we will be using it to filter out the heads we can't
2073 # Also, later we will be using it to filter out the heads we can't
2074 # find from roots.
2074 # find from roots.
2075 heads = dict.fromkeys(heads, False)
2075 heads = dict.fromkeys(heads, False)
2076 # Start at the top and keep marking parents until we're done.
2076 # Start at the top and keep marking parents until we're done.
2077 nodestotag = set(heads)
2077 nodestotag = set(heads)
2078 # Remember where the top was so we can use it as a limit later.
2078 # Remember where the top was so we can use it as a limit later.
2079 highestrev = max([self.rev(n) for n in nodestotag])
2079 highestrev = max([self.rev(n) for n in nodestotag])
2080 while nodestotag:
2080 while nodestotag:
2081 # grab a node to tag
2081 # grab a node to tag
2082 n = nodestotag.pop()
2082 n = nodestotag.pop()
2083 # Never tag nullid
2083 # Never tag nullid
2084 if n == self.nullid:
2084 if n == self.nullid:
2085 continue
2085 continue
2086 # A node's revision number represents its place in a
2086 # A node's revision number represents its place in a
2087 # topologically sorted list of nodes.
2087 # topologically sorted list of nodes.
2088 r = self.rev(n)
2088 r = self.rev(n)
2089 if r >= lowestrev:
2089 if r >= lowestrev:
2090 if n not in ancestors:
2090 if n not in ancestors:
2091 # If we are possibly a descendant of one of the roots
2091 # If we are possibly a descendant of one of the roots
2092 # and we haven't already been marked as an ancestor
2092 # and we haven't already been marked as an ancestor
2093 ancestors.add(n) # Mark as ancestor
2093 ancestors.add(n) # Mark as ancestor
2094 # Add non-nullid parents to list of nodes to tag.
2094 # Add non-nullid parents to list of nodes to tag.
2095 nodestotag.update(
2095 nodestotag.update(
2096 [p for p in self.parents(n) if p != self.nullid]
2096 [p for p in self.parents(n) if p != self.nullid]
2097 )
2097 )
2098 elif n in heads: # We've seen it before, is it a fake head?
2098 elif n in heads: # We've seen it before, is it a fake head?
2099 # So it is, real heads should not be the ancestors of
2099 # So it is, real heads should not be the ancestors of
2100 # any other heads.
2100 # any other heads.
2101 heads.pop(n)
2101 heads.pop(n)
2102 if not ancestors:
2102 if not ancestors:
2103 return nonodes
2103 return nonodes
2104 # Now that we have our set of ancestors, we want to remove any
2104 # Now that we have our set of ancestors, we want to remove any
2105 # roots that are not ancestors.
2105 # roots that are not ancestors.
2106
2106
2107 # If one of the roots was nullid, everything is included anyway.
2107 # If one of the roots was nullid, everything is included anyway.
2108 if lowestrev > nullrev:
2108 if lowestrev > nullrev:
2109 # But, since we weren't, let's recompute the lowest rev to not
2109 # But, since we weren't, let's recompute the lowest rev to not
2110 # include roots that aren't ancestors.
2110 # include roots that aren't ancestors.
2111
2111
2112 # Filter out roots that aren't ancestors of heads
2112 # Filter out roots that aren't ancestors of heads
2113 roots = [root for root in roots if root in ancestors]
2113 roots = [root for root in roots if root in ancestors]
2114 # Recompute the lowest revision
2114 # Recompute the lowest revision
2115 if roots:
2115 if roots:
2116 lowestrev = min([self.rev(root) for root in roots])
2116 lowestrev = min([self.rev(root) for root in roots])
2117 else:
2117 else:
2118 # No more roots? Return empty list
2118 # No more roots? Return empty list
2119 return nonodes
2119 return nonodes
2120 else:
2120 else:
2121 # We are descending from nullid, and don't need to care about
2121 # We are descending from nullid, and don't need to care about
2122 # any other roots.
2122 # any other roots.
2123 lowestrev = nullrev
2123 lowestrev = nullrev
2124 roots = [self.nullid]
2124 roots = [self.nullid]
2125 # Transform our roots list into a set.
2125 # Transform our roots list into a set.
2126 descendants = set(roots)
2126 descendants = set(roots)
2127 # Also, keep the original roots so we can filter out roots that aren't
2127 # Also, keep the original roots so we can filter out roots that aren't
2128 # 'real' roots (i.e. are descended from other roots).
2128 # 'real' roots (i.e. are descended from other roots).
2129 roots = descendants.copy()
2129 roots = descendants.copy()
2130 # Our topologically sorted list of output nodes.
2130 # Our topologically sorted list of output nodes.
2131 orderedout = []
2131 orderedout = []
2132 # Don't start at nullid since we don't want nullid in our output list,
2132 # Don't start at nullid since we don't want nullid in our output list,
2133 # and if nullid shows up in descendants, empty parents will look like
2133 # and if nullid shows up in descendants, empty parents will look like
2134 # they're descendants.
2134 # they're descendants.
2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2136 n = self.node(r)
2136 n = self.node(r)
2137 isdescendant = False
2137 isdescendant = False
2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2139 isdescendant = True
2139 isdescendant = True
2140 elif n in descendants:
2140 elif n in descendants:
2141 # n is already a descendant
2141 # n is already a descendant
2142 isdescendant = True
2142 isdescendant = True
2143 # This check only needs to be done here because all the roots
2143 # This check only needs to be done here because all the roots
2144 # will start being marked is descendants before the loop.
2144 # will start being marked is descendants before the loop.
2145 if n in roots:
2145 if n in roots:
2146 # If n was a root, check if it's a 'real' root.
2146 # If n was a root, check if it's a 'real' root.
2147 p = tuple(self.parents(n))
2147 p = tuple(self.parents(n))
2148 # If any of its parents are descendants, it's not a root.
2148 # If any of its parents are descendants, it's not a root.
2149 if (p[0] in descendants) or (p[1] in descendants):
2149 if (p[0] in descendants) or (p[1] in descendants):
2150 roots.remove(n)
2150 roots.remove(n)
2151 else:
2151 else:
2152 p = tuple(self.parents(n))
2152 p = tuple(self.parents(n))
2153 # A node is a descendant if either of its parents are
2153 # A node is a descendant if either of its parents are
2154 # descendants. (We seeded the dependents list with the roots
2154 # descendants. (We seeded the dependents list with the roots
2155 # up there, remember?)
2155 # up there, remember?)
2156 if (p[0] in descendants) or (p[1] in descendants):
2156 if (p[0] in descendants) or (p[1] in descendants):
2157 descendants.add(n)
2157 descendants.add(n)
2158 isdescendant = True
2158 isdescendant = True
2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2160 # Only include nodes that are both descendants and ancestors.
2160 # Only include nodes that are both descendants and ancestors.
2161 orderedout.append(n)
2161 orderedout.append(n)
2162 if (ancestors is not None) and (n in heads):
2162 if (ancestors is not None) and (n in heads):
2163 # We're trying to figure out which heads are reachable
2163 # We're trying to figure out which heads are reachable
2164 # from roots.
2164 # from roots.
2165 # Mark this head as having been reached
2165 # Mark this head as having been reached
2166 heads[n] = True
2166 heads[n] = True
2167 elif ancestors is None:
2167 elif ancestors is None:
2168 # Otherwise, we're trying to discover the heads.
2168 # Otherwise, we're trying to discover the heads.
2169 # Assume this is a head because if it isn't, the next step
2169 # Assume this is a head because if it isn't, the next step
2170 # will eventually remove it.
2170 # will eventually remove it.
2171 heads[n] = True
2171 heads[n] = True
2172 # But, obviously its parents aren't.
2172 # But, obviously its parents aren't.
2173 for p in self.parents(n):
2173 for p in self.parents(n):
2174 heads.pop(p, None)
2174 heads.pop(p, None)
2175 heads = [head for head, flag in heads.items() if flag]
2175 heads = [head for head, flag in heads.items() if flag]
2176 roots = list(roots)
2176 roots = list(roots)
2177 assert orderedout
2177 assert orderedout
2178 assert roots
2178 assert roots
2179 assert heads
2179 assert heads
2180 return (orderedout, roots, heads)
2180 return (orderedout, roots, heads)
2181
2181
2182 def headrevs(self, revs=None):
2182 def headrevs(self, revs=None):
2183 if revs is None:
2183 if revs is None:
2184 try:
2184 try:
2185 return self.index.headrevs()
2185 return self.index.headrevs()
2186 except AttributeError:
2186 except AttributeError:
2187 return self._headrevs()
2187 return self._headrevs()
2188 if rustdagop is not None and self.index.rust_ext_compat:
2188 if rustdagop is not None and self.index.rust_ext_compat:
2189 return rustdagop.headrevs(self.index, revs)
2189 return rustdagop.headrevs(self.index, revs)
2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2191
2191
2192 def computephases(self, roots):
2192 def computephases(self, roots):
2193 return self.index.computephasesmapsets(roots)
2193 return self.index.computephasesmapsets(roots)
2194
2194
2195 def _headrevs(self):
2195 def _headrevs(self):
2196 count = len(self)
2196 count = len(self)
2197 if not count:
2197 if not count:
2198 return [nullrev]
2198 return [nullrev]
2199 # we won't iter over filtered rev so nobody is a head at start
2199 # we won't iter over filtered rev so nobody is a head at start
2200 ishead = [0] * (count + 1)
2200 ishead = [0] * (count + 1)
2201 index = self.index
2201 index = self.index
2202 for r in self:
2202 for r in self:
2203 ishead[r] = 1 # I may be an head
2203 ishead[r] = 1 # I may be an head
2204 e = index[r]
2204 e = index[r]
2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2206 return [r for r, val in enumerate(ishead) if val]
2206 return [r for r, val in enumerate(ishead) if val]
2207
2207
2208 def heads(self, start=None, stop=None):
2208 def heads(self, start=None, stop=None):
2209 """return the list of all nodes that have no children
2209 """return the list of all nodes that have no children
2210
2210
2211 if start is specified, only heads that are descendants of
2211 if start is specified, only heads that are descendants of
2212 start will be returned
2212 start will be returned
2213 if stop is specified, it will consider all the revs from stop
2213 if stop is specified, it will consider all the revs from stop
2214 as if they had no children
2214 as if they had no children
2215 """
2215 """
2216 if start is None and stop is None:
2216 if start is None and stop is None:
2217 if not len(self):
2217 if not len(self):
2218 return [self.nullid]
2218 return [self.nullid]
2219 return [self.node(r) for r in self.headrevs()]
2219 return [self.node(r) for r in self.headrevs()]
2220
2220
2221 if start is None:
2221 if start is None:
2222 start = nullrev
2222 start = nullrev
2223 else:
2223 else:
2224 start = self.rev(start)
2224 start = self.rev(start)
2225
2225
2226 stoprevs = {self.rev(n) for n in stop or []}
2226 stoprevs = {self.rev(n) for n in stop or []}
2227
2227
2228 revs = dagop.headrevssubset(
2228 revs = dagop.headrevssubset(
2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2230 )
2230 )
2231
2231
2232 return [self.node(rev) for rev in revs]
2232 return [self.node(rev) for rev in revs]
2233
2233
2234 def children(self, node):
2234 def children(self, node):
2235 """find the children of a given node"""
2235 """find the children of a given node"""
2236 c = []
2236 c = []
2237 p = self.rev(node)
2237 p = self.rev(node)
2238 for r in self.revs(start=p + 1):
2238 for r in self.revs(start=p + 1):
2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2240 if prevs:
2240 if prevs:
2241 for pr in prevs:
2241 for pr in prevs:
2242 if pr == p:
2242 if pr == p:
2243 c.append(self.node(r))
2243 c.append(self.node(r))
2244 elif p == nullrev:
2244 elif p == nullrev:
2245 c.append(self.node(r))
2245 c.append(self.node(r))
2246 return c
2246 return c
2247
2247
2248 def commonancestorsheads(self, a, b):
2248 def commonancestorsheads(self, a, b):
2249 """calculate all the heads of the common ancestors of nodes a and b"""
2249 """calculate all the heads of the common ancestors of nodes a and b"""
2250 a, b = self.rev(a), self.rev(b)
2250 a, b = self.rev(a), self.rev(b)
2251 ancs = self._commonancestorsheads(a, b)
2251 ancs = self._commonancestorsheads(a, b)
2252 return pycompat.maplist(self.node, ancs)
2252 return pycompat.maplist(self.node, ancs)
2253
2253
2254 def _commonancestorsheads(self, *revs):
2254 def _commonancestorsheads(self, *revs):
2255 """calculate all the heads of the common ancestors of revs"""
2255 """calculate all the heads of the common ancestors of revs"""
2256 try:
2256 try:
2257 ancs = self.index.commonancestorsheads(*revs)
2257 ancs = self.index.commonancestorsheads(*revs)
2258 except (AttributeError, OverflowError): # C implementation failed
2258 except (AttributeError, OverflowError): # C implementation failed
2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2260 return ancs
2260 return ancs
2261
2261
2262 def isancestor(self, a, b):
2262 def isancestor(self, a, b):
2263 """return True if node a is an ancestor of node b
2263 """return True if node a is an ancestor of node b
2264
2264
2265 A revision is considered an ancestor of itself."""
2265 A revision is considered an ancestor of itself."""
2266 a, b = self.rev(a), self.rev(b)
2266 a, b = self.rev(a), self.rev(b)
2267 return self.isancestorrev(a, b)
2267 return self.isancestorrev(a, b)
2268
2268
2269 def isancestorrev(self, a, b):
2269 def isancestorrev(self, a, b):
2270 """return True if revision a is an ancestor of revision b
2270 """return True if revision a is an ancestor of revision b
2271
2271
2272 A revision is considered an ancestor of itself.
2272 A revision is considered an ancestor of itself.
2273
2273
2274 The implementation of this is trivial but the use of
2274 The implementation of this is trivial but the use of
2275 reachableroots is not."""
2275 reachableroots is not."""
2276 if a == nullrev:
2276 if a == nullrev:
2277 return True
2277 return True
2278 elif a == b:
2278 elif a == b:
2279 return True
2279 return True
2280 elif a > b:
2280 elif a > b:
2281 return False
2281 return False
2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2283
2283
2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2285 """return (heads(::(<roots> and <roots>::<heads>)))
2285 """return (heads(::(<roots> and <roots>::<heads>)))
2286
2286
2287 If includepath is True, return (<roots>::<heads>)."""
2287 If includepath is True, return (<roots>::<heads>)."""
2288 try:
2288 try:
2289 return self.index.reachableroots2(
2289 return self.index.reachableroots2(
2290 minroot, heads, roots, includepath
2290 minroot, heads, roots, includepath
2291 )
2291 )
2292 except AttributeError:
2292 except AttributeError:
2293 return dagop._reachablerootspure(
2293 return dagop._reachablerootspure(
2294 self.parentrevs, minroot, roots, heads, includepath
2294 self.parentrevs, minroot, roots, heads, includepath
2295 )
2295 )
2296
2296
2297 def ancestor(self, a, b):
2297 def ancestor(self, a, b):
2298 """calculate the "best" common ancestor of nodes a and b"""
2298 """calculate the "best" common ancestor of nodes a and b"""
2299
2299
2300 a, b = self.rev(a), self.rev(b)
2300 a, b = self.rev(a), self.rev(b)
2301 try:
2301 try:
2302 ancs = self.index.ancestors(a, b)
2302 ancs = self.index.ancestors(a, b)
2303 except (AttributeError, OverflowError):
2303 except (AttributeError, OverflowError):
2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2305 if ancs:
2305 if ancs:
2306 # choose a consistent winner when there's a tie
2306 # choose a consistent winner when there's a tie
2307 return min(map(self.node, ancs))
2307 return min(map(self.node, ancs))
2308 return self.nullid
2308 return self.nullid
2309
2309
2310 def _match(self, id):
2310 def _match(self, id):
2311 if isinstance(id, int):
2311 if isinstance(id, int):
2312 # rev
2312 # rev
2313 return self.node(id)
2313 return self.node(id)
2314 if len(id) == self.nodeconstants.nodelen:
2314 if len(id) == self.nodeconstants.nodelen:
2315 # possibly a binary node
2315 # possibly a binary node
2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2317 try:
2317 try:
2318 node = id
2318 node = id
2319 self.rev(node) # quick search the index
2319 self.rev(node) # quick search the index
2320 return node
2320 return node
2321 except error.LookupError:
2321 except error.LookupError:
2322 pass # may be partial hex id
2322 pass # may be partial hex id
2323 try:
2323 try:
2324 # str(rev)
2324 # str(rev)
2325 rev = int(id)
2325 rev = int(id)
2326 if b"%d" % rev != id:
2326 if b"%d" % rev != id:
2327 raise ValueError
2327 raise ValueError
2328 if rev < 0:
2328 if rev < 0:
2329 rev = len(self) + rev
2329 rev = len(self) + rev
2330 if rev < 0 or rev >= len(self):
2330 if rev < 0 or rev >= len(self):
2331 raise ValueError
2331 raise ValueError
2332 return self.node(rev)
2332 return self.node(rev)
2333 except (ValueError, OverflowError):
2333 except (ValueError, OverflowError):
2334 pass
2334 pass
2335 if len(id) == 2 * self.nodeconstants.nodelen:
2335 if len(id) == 2 * self.nodeconstants.nodelen:
2336 try:
2336 try:
2337 # a full hex nodeid?
2337 # a full hex nodeid?
2338 node = bin(id)
2338 node = bin(id)
2339 self.rev(node)
2339 self.rev(node)
2340 return node
2340 return node
2341 except (binascii.Error, error.LookupError):
2341 except (binascii.Error, error.LookupError):
2342 pass
2342 pass
2343
2343
2344 def _partialmatch(self, id):
2344 def _partialmatch(self, id):
2345 # we don't care wdirfilenodeids as they should be always full hash
2345 # we don't care wdirfilenodeids as they should be always full hash
2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2347 ambiguous = False
2347 ambiguous = False
2348 try:
2348 try:
2349 partial = self.index.partialmatch(id)
2349 partial = self.index.partialmatch(id)
2350 if partial and self.hasnode(partial):
2350 if partial and self.hasnode(partial):
2351 if maybewdir:
2351 if maybewdir:
2352 # single 'ff...' match in radix tree, ambiguous with wdir
2352 # single 'ff...' match in radix tree, ambiguous with wdir
2353 ambiguous = True
2353 ambiguous = True
2354 else:
2354 else:
2355 return partial
2355 return partial
2356 elif maybewdir:
2356 elif maybewdir:
2357 # no 'ff...' match in radix tree, wdir identified
2357 # no 'ff...' match in radix tree, wdir identified
2358 raise error.WdirUnsupported
2358 raise error.WdirUnsupported
2359 else:
2359 else:
2360 return None
2360 return None
2361 except error.RevlogError:
2361 except error.RevlogError:
2362 # parsers.c radix tree lookup gave multiple matches
2362 # parsers.c radix tree lookup gave multiple matches
2363 # fast path: for unfiltered changelog, radix tree is accurate
2363 # fast path: for unfiltered changelog, radix tree is accurate
2364 if not getattr(self, 'filteredrevs', None):
2364 if not getattr(self, 'filteredrevs', None):
2365 ambiguous = True
2365 ambiguous = True
2366 # fall through to slow path that filters hidden revisions
2366 # fall through to slow path that filters hidden revisions
2367 except (AttributeError, ValueError):
2367 except (AttributeError, ValueError):
2368 # we are pure python, or key is not hex
2368 # we are pure python, or key is not hex
2369 pass
2369 pass
2370 if ambiguous:
2370 if ambiguous:
2371 raise error.AmbiguousPrefixLookupError(
2371 raise error.AmbiguousPrefixLookupError(
2372 id, self.display_id, _(b'ambiguous identifier')
2372 id, self.display_id, _(b'ambiguous identifier')
2373 )
2373 )
2374
2374
2375 if id in self._pcache:
2375 if id in self._pcache:
2376 return self._pcache[id]
2376 return self._pcache[id]
2377
2377
2378 if len(id) <= 40:
2378 if len(id) <= 40:
2379 # hex(node)[:...]
2379 # hex(node)[:...]
2380 l = len(id) // 2 * 2 # grab an even number of digits
2380 l = len(id) // 2 * 2 # grab an even number of digits
2381 try:
2381 try:
2382 # we're dropping the last digit, so let's check that it's hex,
2382 # we're dropping the last digit, so let's check that it's hex,
2383 # to avoid the expensive computation below if it's not
2383 # to avoid the expensive computation below if it's not
2384 if len(id) % 2 > 0:
2384 if len(id) % 2 > 0:
2385 if not (id[-1] in hexdigits):
2385 if not (id[-1] in hexdigits):
2386 return None
2386 return None
2387 prefix = bin(id[:l])
2387 prefix = bin(id[:l])
2388 except binascii.Error:
2388 except binascii.Error:
2389 pass
2389 pass
2390 else:
2390 else:
2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2392 nl = [
2392 nl = [
2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2394 ]
2394 ]
2395 if self.nodeconstants.nullhex.startswith(id):
2395 if self.nodeconstants.nullhex.startswith(id):
2396 nl.append(self.nullid)
2396 nl.append(self.nullid)
2397 if len(nl) > 0:
2397 if len(nl) > 0:
2398 if len(nl) == 1 and not maybewdir:
2398 if len(nl) == 1 and not maybewdir:
2399 self._pcache[id] = nl[0]
2399 self._pcache[id] = nl[0]
2400 return nl[0]
2400 return nl[0]
2401 raise error.AmbiguousPrefixLookupError(
2401 raise error.AmbiguousPrefixLookupError(
2402 id, self.display_id, _(b'ambiguous identifier')
2402 id, self.display_id, _(b'ambiguous identifier')
2403 )
2403 )
2404 if maybewdir:
2404 if maybewdir:
2405 raise error.WdirUnsupported
2405 raise error.WdirUnsupported
2406 return None
2406 return None
2407
2407
2408 def lookup(self, id):
2408 def lookup(self, id):
2409 """locate a node based on:
2409 """locate a node based on:
2410 - revision number or str(revision number)
2410 - revision number or str(revision number)
2411 - nodeid or subset of hex nodeid
2411 - nodeid or subset of hex nodeid
2412 """
2412 """
2413 n = self._match(id)
2413 n = self._match(id)
2414 if n is not None:
2414 if n is not None:
2415 return n
2415 return n
2416 n = self._partialmatch(id)
2416 n = self._partialmatch(id)
2417 if n:
2417 if n:
2418 return n
2418 return n
2419
2419
2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2421
2421
2422 def shortest(self, node, minlength=1):
2422 def shortest(self, node, minlength=1):
2423 """Find the shortest unambiguous prefix that matches node."""
2423 """Find the shortest unambiguous prefix that matches node."""
2424
2424
2425 def isvalid(prefix):
2425 def isvalid(prefix):
2426 try:
2426 try:
2427 matchednode = self._partialmatch(prefix)
2427 matchednode = self._partialmatch(prefix)
2428 except error.AmbiguousPrefixLookupError:
2428 except error.AmbiguousPrefixLookupError:
2429 return False
2429 return False
2430 except error.WdirUnsupported:
2430 except error.WdirUnsupported:
2431 # single 'ff...' match
2431 # single 'ff...' match
2432 return True
2432 return True
2433 if matchednode is None:
2433 if matchednode is None:
2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2435 return True
2435 return True
2436
2436
2437 def maybewdir(prefix):
2437 def maybewdir(prefix):
2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2439
2439
2440 hexnode = hex(node)
2440 hexnode = hex(node)
2441
2441
2442 def disambiguate(hexnode, minlength):
2442 def disambiguate(hexnode, minlength):
2443 """Disambiguate against wdirid."""
2443 """Disambiguate against wdirid."""
2444 for length in range(minlength, len(hexnode) + 1):
2444 for length in range(minlength, len(hexnode) + 1):
2445 prefix = hexnode[:length]
2445 prefix = hexnode[:length]
2446 if not maybewdir(prefix):
2446 if not maybewdir(prefix):
2447 return prefix
2447 return prefix
2448
2448
2449 if not getattr(self, 'filteredrevs', None):
2449 if not getattr(self, 'filteredrevs', None):
2450 try:
2450 try:
2451 length = max(self.index.shortest(node), minlength)
2451 length = max(self.index.shortest(node), minlength)
2452 return disambiguate(hexnode, length)
2452 return disambiguate(hexnode, length)
2453 except error.RevlogError:
2453 except error.RevlogError:
2454 if node != self.nodeconstants.wdirid:
2454 if node != self.nodeconstants.wdirid:
2455 raise error.LookupError(
2455 raise error.LookupError(
2456 node, self.display_id, _(b'no node')
2456 node, self.display_id, _(b'no node')
2457 )
2457 )
2458 except AttributeError:
2458 except AttributeError:
2459 # Fall through to pure code
2459 # Fall through to pure code
2460 pass
2460 pass
2461
2461
2462 if node == self.nodeconstants.wdirid:
2462 if node == self.nodeconstants.wdirid:
2463 for length in range(minlength, len(hexnode) + 1):
2463 for length in range(minlength, len(hexnode) + 1):
2464 prefix = hexnode[:length]
2464 prefix = hexnode[:length]
2465 if isvalid(prefix):
2465 if isvalid(prefix):
2466 return prefix
2466 return prefix
2467
2467
2468 for length in range(minlength, len(hexnode) + 1):
2468 for length in range(minlength, len(hexnode) + 1):
2469 prefix = hexnode[:length]
2469 prefix = hexnode[:length]
2470 if isvalid(prefix):
2470 if isvalid(prefix):
2471 return disambiguate(hexnode, length)
2471 return disambiguate(hexnode, length)
2472
2472
2473 def cmp(self, node, text):
2473 def cmp(self, node, text):
2474 """compare text with a given file revision
2474 """compare text with a given file revision
2475
2475
2476 returns True if text is different than what is stored.
2476 returns True if text is different than what is stored.
2477 """
2477 """
2478 p1, p2 = self.parents(node)
2478 p1, p2 = self.parents(node)
2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2480
2480
2481 def deltaparent(self, rev):
2481 def deltaparent(self, rev):
2482 """return deltaparent of the given revision"""
2482 """return deltaparent of the given revision"""
2483 base = self.index[rev][3]
2483 base = self.index[rev][3]
2484 if base == rev:
2484 if base == rev:
2485 return nullrev
2485 return nullrev
2486 elif self.delta_config.general_delta:
2486 elif self.delta_config.general_delta:
2487 return base
2487 return base
2488 else:
2488 else:
2489 return rev - 1
2489 return rev - 1
2490
2490
2491 def issnapshot(self, rev):
2491 def issnapshot(self, rev):
2492 """tells whether rev is a snapshot"""
2492 """tells whether rev is a snapshot"""
2493 ret = self._inner.issnapshot(rev)
2493 ret = self._inner.issnapshot(rev)
2494 self.issnapshot = self._inner.issnapshot
2494 self.issnapshot = self._inner.issnapshot
2495 return ret
2495 return ret
2496
2496
2497 def snapshotdepth(self, rev):
2497 def snapshotdepth(self, rev):
2498 """number of snapshot in the chain before this one"""
2498 """number of snapshot in the chain before this one"""
2499 if not self.issnapshot(rev):
2499 if not self.issnapshot(rev):
2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2501 return len(self._inner._deltachain(rev)[0]) - 1
2501 return len(self._inner._deltachain(rev)[0]) - 1
2502
2502
2503 def revdiff(self, rev1, rev2):
2503 def revdiff(self, rev1, rev2):
2504 """return or calculate a delta between two revisions
2504 """return or calculate a delta between two revisions
2505
2505
2506 The delta calculated is in binary form and is intended to be written to
2506 The delta calculated is in binary form and is intended to be written to
2507 revlog data directly. So this function needs raw revision data.
2507 revlog data directly. So this function needs raw revision data.
2508 """
2508 """
2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2510 return bytes(self._inner._chunk(rev2))
2510 return bytes(self._inner._chunk(rev2))
2511
2511
2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2513
2513
2514 def revision(self, nodeorrev):
2514 def revision(self, nodeorrev):
2515 """return an uncompressed revision of a given node or revision
2515 """return an uncompressed revision of a given node or revision
2516 number.
2516 number.
2517 """
2517 """
2518 return self._revisiondata(nodeorrev)
2518 return self._revisiondata(nodeorrev)
2519
2519
2520 def sidedata(self, nodeorrev):
2520 def sidedata(self, nodeorrev):
2521 """a map of extra data related to the changeset but not part of the hash
2521 """a map of extra data related to the changeset but not part of the hash
2522
2522
2523 This function currently return a dictionary. However, more advanced
2523 This function currently return a dictionary. However, more advanced
2524 mapping object will likely be used in the future for a more
2524 mapping object will likely be used in the future for a more
2525 efficient/lazy code.
2525 efficient/lazy code.
2526 """
2526 """
2527 # deal with <nodeorrev> argument type
2527 # deal with <nodeorrev> argument type
2528 if isinstance(nodeorrev, int):
2528 if isinstance(nodeorrev, int):
2529 rev = nodeorrev
2529 rev = nodeorrev
2530 else:
2530 else:
2531 rev = self.rev(nodeorrev)
2531 rev = self.rev(nodeorrev)
2532 return self._sidedata(rev)
2532 return self._sidedata(rev)
2533
2533
2534 def _revisiondata(self, nodeorrev, raw=False):
2534 def _revisiondata(self, nodeorrev, raw=False):
2535 # deal with <nodeorrev> argument type
2535 # deal with <nodeorrev> argument type
2536 if isinstance(nodeorrev, int):
2536 if isinstance(nodeorrev, int):
2537 rev = nodeorrev
2537 rev = nodeorrev
2538 node = self.node(rev)
2538 node = self.node(rev)
2539 else:
2539 else:
2540 node = nodeorrev
2540 node = nodeorrev
2541 rev = None
2541 rev = None
2542
2542
2543 # fast path the special `nullid` rev
2543 # fast path the special `nullid` rev
2544 if node == self.nullid:
2544 if node == self.nullid:
2545 return b""
2545 return b""
2546
2546
2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2548 # revision or might need to be processed to retrieve the revision.
2548 # revision or might need to be processed to retrieve the revision.
2549 rev, rawtext, validated = self._rawtext(node, rev)
2549 rev, rawtext, validated = self._rawtext(node, rev)
2550
2550
2551 if raw and validated:
2551 if raw and validated:
2552 # if we don't want to process the raw text and that raw
2552 # if we don't want to process the raw text and that raw
2553 # text is cached, we can exit early.
2553 # text is cached, we can exit early.
2554 return rawtext
2554 return rawtext
2555 if rev is None:
2555 if rev is None:
2556 rev = self.rev(node)
2556 rev = self.rev(node)
2557 # the revlog's flag for this revision
2557 # the revlog's flag for this revision
2558 # (usually alter its state or content)
2558 # (usually alter its state or content)
2559 flags = self.flags(rev)
2559 flags = self.flags(rev)
2560
2560
2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2562 # no extra flags set, no flag processor runs, text = rawtext
2562 # no extra flags set, no flag processor runs, text = rawtext
2563 return rawtext
2563 return rawtext
2564
2564
2565 if raw:
2565 if raw:
2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2567 text = rawtext
2567 text = rawtext
2568 else:
2568 else:
2569 r = flagutil.processflagsread(self, rawtext, flags)
2569 r = flagutil.processflagsread(self, rawtext, flags)
2570 text, validatehash = r
2570 text, validatehash = r
2571 if validatehash:
2571 if validatehash:
2572 self.checkhash(text, node, rev=rev)
2572 self.checkhash(text, node, rev=rev)
2573 if not validated:
2573 if not validated:
2574 self._revisioncache = (node, rev, rawtext)
2574 self._inner._revisioncache = (node, rev, rawtext)
2575
2575
2576 return text
2576 return text
2577
2577
2578 def _rawtext(self, node, rev):
2578 def _rawtext(self, node, rev):
2579 """return the possibly unvalidated rawtext for a revision
2579 """return the possibly unvalidated rawtext for a revision
2580
2580
2581 returns (rev, rawtext, validated)
2581 returns (rev, rawtext, validated)
2582 """
2582 """
2583
2583
2584 # revision in the cache (could be useful to apply delta)
2584 # revision in the cache (could be useful to apply delta)
2585 cachedrev = None
2585 cachedrev = None
2586 # An intermediate text to apply deltas to
2586 # An intermediate text to apply deltas to
2587 basetext = None
2587 basetext = None
2588
2588
2589 # Check if we have the entry in cache
2589 # Check if we have the entry in cache
2590 # The cache entry looks like (node, rev, rawtext)
2590 # The cache entry looks like (node, rev, rawtext)
2591 if self._revisioncache:
2591 if self._inner._revisioncache:
2592 if self._revisioncache[0] == node:
2592 if self._inner._revisioncache[0] == node:
2593 return (rev, self._revisioncache[2], True)
2593 return (rev, self._inner._revisioncache[2], True)
2594 cachedrev = self._revisioncache[1]
2594 cachedrev = self._inner._revisioncache[1]
2595
2595
2596 if rev is None:
2596 if rev is None:
2597 rev = self.rev(node)
2597 rev = self.rev(node)
2598
2598
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2600 if stopped:
2600 if stopped:
2601 basetext = self._revisioncache[2]
2601 basetext = self._inner._revisioncache[2]
2602
2602
2603 # drop cache to save memory, the caller is expected to
2603 # drop cache to save memory, the caller is expected to
2604 # update self._revisioncache after validating the text
2604 # update self._inner._revisioncache after validating the text
2605 self._revisioncache = None
2605 self._inner._revisioncache = None
2606
2606
2607 targetsize = None
2607 targetsize = None
2608 rawsize = self.index[rev][2]
2608 rawsize = self.index[rev][2]
2609 if 0 <= rawsize:
2609 if 0 <= rawsize:
2610 targetsize = 4 * rawsize
2610 targetsize = 4 * rawsize
2611
2611
2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2613 if basetext is None:
2613 if basetext is None:
2614 basetext = bytes(bins[0])
2614 basetext = bytes(bins[0])
2615 bins = bins[1:]
2615 bins = bins[1:]
2616
2616
2617 rawtext = mdiff.patches(basetext, bins)
2617 rawtext = mdiff.patches(basetext, bins)
2618 del basetext # let us have a chance to free memory early
2618 del basetext # let us have a chance to free memory early
2619 return (rev, rawtext, False)
2619 return (rev, rawtext, False)
2620
2620
2621 def _sidedata(self, rev):
2621 def _sidedata(self, rev):
2622 """Return the sidedata for a given revision number."""
2622 """Return the sidedata for a given revision number."""
2623 index_entry = self.index[rev]
2623 index_entry = self.index[rev]
2624 sidedata_offset = index_entry[8]
2624 sidedata_offset = index_entry[8]
2625 sidedata_size = index_entry[9]
2625 sidedata_size = index_entry[9]
2626
2626
2627 if self._inline:
2627 if self._inline:
2628 sidedata_offset += self.index.entry_size * (1 + rev)
2628 sidedata_offset += self.index.entry_size * (1 + rev)
2629 if sidedata_size == 0:
2629 if sidedata_size == 0:
2630 return {}
2630 return {}
2631
2631
2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2633 filename = self._sidedatafile
2633 filename = self._sidedatafile
2634 end = self._docket.sidedata_end
2634 end = self._docket.sidedata_end
2635 offset = sidedata_offset
2635 offset = sidedata_offset
2636 length = sidedata_size
2636 length = sidedata_size
2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2638 raise error.RevlogError(m)
2638 raise error.RevlogError(m)
2639
2639
2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2641 sidedata_offset, sidedata_size
2641 sidedata_offset, sidedata_size
2642 )
2642 )
2643
2643
2644 comp = self.index[rev][11]
2644 comp = self.index[rev][11]
2645 if comp == COMP_MODE_PLAIN:
2645 if comp == COMP_MODE_PLAIN:
2646 segment = comp_segment
2646 segment = comp_segment
2647 elif comp == COMP_MODE_DEFAULT:
2647 elif comp == COMP_MODE_DEFAULT:
2648 segment = self._inner._decompressor(comp_segment)
2648 segment = self._inner._decompressor(comp_segment)
2649 elif comp == COMP_MODE_INLINE:
2649 elif comp == COMP_MODE_INLINE:
2650 segment = self._inner.decompress(comp_segment)
2650 segment = self._inner.decompress(comp_segment)
2651 else:
2651 else:
2652 msg = b'unknown compression mode %d'
2652 msg = b'unknown compression mode %d'
2653 msg %= comp
2653 msg %= comp
2654 raise error.RevlogError(msg)
2654 raise error.RevlogError(msg)
2655
2655
2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2657 return sidedata
2657 return sidedata
2658
2658
2659 def rawdata(self, nodeorrev):
2659 def rawdata(self, nodeorrev):
2660 """return an uncompressed raw data of a given node or revision number."""
2660 """return an uncompressed raw data of a given node or revision number."""
2661 return self._revisiondata(nodeorrev, raw=True)
2661 return self._revisiondata(nodeorrev, raw=True)
2662
2662
2663 def hash(self, text, p1, p2):
2663 def hash(self, text, p1, p2):
2664 """Compute a node hash.
2664 """Compute a node hash.
2665
2665
2666 Available as a function so that subclasses can replace the hash
2666 Available as a function so that subclasses can replace the hash
2667 as needed.
2667 as needed.
2668 """
2668 """
2669 return storageutil.hashrevisionsha1(text, p1, p2)
2669 return storageutil.hashrevisionsha1(text, p1, p2)
2670
2670
2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2672 """Check node hash integrity.
2672 """Check node hash integrity.
2673
2673
2674 Available as a function so that subclasses can extend hash mismatch
2674 Available as a function so that subclasses can extend hash mismatch
2675 behaviors as needed.
2675 behaviors as needed.
2676 """
2676 """
2677 try:
2677 try:
2678 if p1 is None and p2 is None:
2678 if p1 is None and p2 is None:
2679 p1, p2 = self.parents(node)
2679 p1, p2 = self.parents(node)
2680 if node != self.hash(text, p1, p2):
2680 if node != self.hash(text, p1, p2):
2681 # Clear the revision cache on hash failure. The revision cache
2681 # Clear the revision cache on hash failure. The revision cache
2682 # only stores the raw revision and clearing the cache does have
2682 # only stores the raw revision and clearing the cache does have
2683 # the side-effect that we won't have a cache hit when the raw
2683 # the side-effect that we won't have a cache hit when the raw
2684 # revision data is accessed. But this case should be rare and
2684 # revision data is accessed. But this case should be rare and
2685 # it is extra work to teach the cache about the hash
2685 # it is extra work to teach the cache about the hash
2686 # verification state.
2686 # verification state.
2687 if self._revisioncache and self._revisioncache[0] == node:
2687 if (
2688 self._revisioncache = None
2688 self._inner._revisioncache
2689 and self._inner._revisioncache[0] == node
2690 ):
2691 self._inner._revisioncache = None
2689
2692
2690 revornode = rev
2693 revornode = rev
2691 if revornode is None:
2694 if revornode is None:
2692 revornode = templatefilters.short(hex(node))
2695 revornode = templatefilters.short(hex(node))
2693 raise error.RevlogError(
2696 raise error.RevlogError(
2694 _(b"integrity check failed on %s:%s")
2697 _(b"integrity check failed on %s:%s")
2695 % (self.display_id, pycompat.bytestr(revornode))
2698 % (self.display_id, pycompat.bytestr(revornode))
2696 )
2699 )
2697 except error.RevlogError:
2700 except error.RevlogError:
2698 if self.feature_config.censorable and storageutil.iscensoredtext(
2701 if self.feature_config.censorable and storageutil.iscensoredtext(
2699 text
2702 text
2700 ):
2703 ):
2701 raise error.CensoredNodeError(self.display_id, node, text)
2704 raise error.CensoredNodeError(self.display_id, node, text)
2702 raise
2705 raise
2703
2706
2704 @property
2707 @property
2705 def _split_index_file(self):
2708 def _split_index_file(self):
2706 """the path where to expect the index of an ongoing splitting operation
2709 """the path where to expect the index of an ongoing splitting operation
2707
2710
2708 The file will only exist if a splitting operation is in progress, but
2711 The file will only exist if a splitting operation is in progress, but
2709 it is always expected at the same location."""
2712 it is always expected at the same location."""
2710 parts = self.radix.split(b'/')
2713 parts = self.radix.split(b'/')
2711 if len(parts) > 1:
2714 if len(parts) > 1:
2712 # adds a '-s' prefix to the ``data/` or `meta/` base
2715 # adds a '-s' prefix to the ``data/` or `meta/` base
2713 head = parts[0] + b'-s'
2716 head = parts[0] + b'-s'
2714 mids = parts[1:-1]
2717 mids = parts[1:-1]
2715 tail = parts[-1] + b'.i'
2718 tail = parts[-1] + b'.i'
2716 pieces = [head] + mids + [tail]
2719 pieces = [head] + mids + [tail]
2717 return b'/'.join(pieces)
2720 return b'/'.join(pieces)
2718 else:
2721 else:
2719 # the revlog is stored at the root of the store (changelog or
2722 # the revlog is stored at the root of the store (changelog or
2720 # manifest), no risk of collision.
2723 # manifest), no risk of collision.
2721 return self.radix + b'.i.s'
2724 return self.radix + b'.i.s'
2722
2725
2723 def _enforceinlinesize(self, tr, side_write=True):
2726 def _enforceinlinesize(self, tr, side_write=True):
2724 """Check if the revlog is too big for inline and convert if so.
2727 """Check if the revlog is too big for inline and convert if so.
2725
2728
2726 This should be called after revisions are added to the revlog. If the
2729 This should be called after revisions are added to the revlog. If the
2727 revlog has grown too large to be an inline revlog, it will convert it
2730 revlog has grown too large to be an inline revlog, it will convert it
2728 to use multiple index and data files.
2731 to use multiple index and data files.
2729 """
2732 """
2730 tiprev = len(self) - 1
2733 tiprev = len(self) - 1
2731 total_size = self.start(tiprev) + self.length(tiprev)
2734 total_size = self.start(tiprev) + self.length(tiprev)
2732 if not self._inline or total_size < _maxinline:
2735 if not self._inline or total_size < _maxinline:
2733 return
2736 return
2734
2737
2735 if self._docket is not None:
2738 if self._docket is not None:
2736 msg = b"inline revlog should not have a docket"
2739 msg = b"inline revlog should not have a docket"
2737 raise error.ProgrammingError(msg)
2740 raise error.ProgrammingError(msg)
2738
2741
2739 troffset = tr.findoffset(self._indexfile)
2742 troffset = tr.findoffset(self._indexfile)
2740 if troffset is None:
2743 if troffset is None:
2741 raise error.RevlogError(
2744 raise error.RevlogError(
2742 _(b"%s not found in the transaction") % self._indexfile
2745 _(b"%s not found in the transaction") % self._indexfile
2743 )
2746 )
2744 if troffset:
2747 if troffset:
2745 tr.addbackup(self._indexfile, for_offset=True)
2748 tr.addbackup(self._indexfile, for_offset=True)
2746 tr.add(self._datafile, 0)
2749 tr.add(self._datafile, 0)
2747
2750
2748 new_index_file_path = None
2751 new_index_file_path = None
2749 if side_write:
2752 if side_write:
2750 old_index_file_path = self._indexfile
2753 old_index_file_path = self._indexfile
2751 new_index_file_path = self._split_index_file
2754 new_index_file_path = self._split_index_file
2752 opener = self.opener
2755 opener = self.opener
2753 weak_self = weakref.ref(self)
2756 weak_self = weakref.ref(self)
2754
2757
2755 # the "split" index replace the real index when the transaction is
2758 # the "split" index replace the real index when the transaction is
2756 # finalized
2759 # finalized
2757 def finalize_callback(tr):
2760 def finalize_callback(tr):
2758 opener.rename(
2761 opener.rename(
2759 new_index_file_path,
2762 new_index_file_path,
2760 old_index_file_path,
2763 old_index_file_path,
2761 checkambig=True,
2764 checkambig=True,
2762 )
2765 )
2763 maybe_self = weak_self()
2766 maybe_self = weak_self()
2764 if maybe_self is not None:
2767 if maybe_self is not None:
2765 maybe_self._indexfile = old_index_file_path
2768 maybe_self._indexfile = old_index_file_path
2766 maybe_self._inner.index_file = maybe_self._indexfile
2769 maybe_self._inner.index_file = maybe_self._indexfile
2767
2770
2768 def abort_callback(tr):
2771 def abort_callback(tr):
2769 maybe_self = weak_self()
2772 maybe_self = weak_self()
2770 if maybe_self is not None:
2773 if maybe_self is not None:
2771 maybe_self._indexfile = old_index_file_path
2774 maybe_self._indexfile = old_index_file_path
2772 maybe_self._inner.inline = True
2775 maybe_self._inner.inline = True
2773 maybe_self._inner.index_file = old_index_file_path
2776 maybe_self._inner.index_file = old_index_file_path
2774
2777
2775 tr.registertmp(new_index_file_path)
2778 tr.registertmp(new_index_file_path)
2776 if self.target[1] is not None:
2779 if self.target[1] is not None:
2777 callback_id = b'000-revlog-split-%d-%s' % self.target
2780 callback_id = b'000-revlog-split-%d-%s' % self.target
2778 else:
2781 else:
2779 callback_id = b'000-revlog-split-%d' % self.target[0]
2782 callback_id = b'000-revlog-split-%d' % self.target[0]
2780 tr.addfinalize(callback_id, finalize_callback)
2783 tr.addfinalize(callback_id, finalize_callback)
2781 tr.addabort(callback_id, abort_callback)
2784 tr.addabort(callback_id, abort_callback)
2782
2785
2783 self._format_flags &= ~FLAG_INLINE_DATA
2786 self._format_flags &= ~FLAG_INLINE_DATA
2784 self._inner.split_inline(
2787 self._inner.split_inline(
2785 tr,
2788 tr,
2786 self._format_flags | self._format_version,
2789 self._format_flags | self._format_version,
2787 new_index_file_path=new_index_file_path,
2790 new_index_file_path=new_index_file_path,
2788 )
2791 )
2789
2792
2790 self._inline = False
2793 self._inline = False
2791 if new_index_file_path is not None:
2794 if new_index_file_path is not None:
2792 self._indexfile = new_index_file_path
2795 self._indexfile = new_index_file_path
2793
2796
2794 nodemaputil.setup_persistent_nodemap(tr, self)
2797 nodemaputil.setup_persistent_nodemap(tr, self)
2795
2798
2796 def _nodeduplicatecallback(self, transaction, node):
2799 def _nodeduplicatecallback(self, transaction, node):
2797 """called when trying to add a node already stored."""
2800 """called when trying to add a node already stored."""
2798
2801
2799 @contextlib.contextmanager
2802 @contextlib.contextmanager
2800 def reading(self):
2803 def reading(self):
2801 with self._inner.reading():
2804 with self._inner.reading():
2802 yield
2805 yield
2803
2806
2804 @contextlib.contextmanager
2807 @contextlib.contextmanager
2805 def _writing(self, transaction):
2808 def _writing(self, transaction):
2806 if self._trypending:
2809 if self._trypending:
2807 msg = b'try to write in a `trypending` revlog: %s'
2810 msg = b'try to write in a `trypending` revlog: %s'
2808 msg %= self.display_id
2811 msg %= self.display_id
2809 raise error.ProgrammingError(msg)
2812 raise error.ProgrammingError(msg)
2810 if self._inner.is_writing:
2813 if self._inner.is_writing:
2811 yield
2814 yield
2812 else:
2815 else:
2813 data_end = None
2816 data_end = None
2814 sidedata_end = None
2817 sidedata_end = None
2815 if self._docket is not None:
2818 if self._docket is not None:
2816 data_end = self._docket.data_end
2819 data_end = self._docket.data_end
2817 sidedata_end = self._docket.sidedata_end
2820 sidedata_end = self._docket.sidedata_end
2818 with self._inner.writing(
2821 with self._inner.writing(
2819 transaction,
2822 transaction,
2820 data_end=data_end,
2823 data_end=data_end,
2821 sidedata_end=sidedata_end,
2824 sidedata_end=sidedata_end,
2822 ):
2825 ):
2823 yield
2826 yield
2824 if self._docket is not None:
2827 if self._docket is not None:
2825 self._write_docket(transaction)
2828 self._write_docket(transaction)
2826
2829
2827 def _write_docket(self, transaction):
2830 def _write_docket(self, transaction):
2828 """write the current docket on disk
2831 """write the current docket on disk
2829
2832
2830 Exist as a method to help changelog to implement transaction logic
2833 Exist as a method to help changelog to implement transaction logic
2831
2834
2832 We could also imagine using the same transaction logic for all revlog
2835 We could also imagine using the same transaction logic for all revlog
2833 since docket are cheap."""
2836 since docket are cheap."""
2834 self._docket.write(transaction)
2837 self._docket.write(transaction)
2835
2838
2836 def addrevision(
2839 def addrevision(
2837 self,
2840 self,
2838 text,
2841 text,
2839 transaction,
2842 transaction,
2840 link,
2843 link,
2841 p1,
2844 p1,
2842 p2,
2845 p2,
2843 cachedelta=None,
2846 cachedelta=None,
2844 node=None,
2847 node=None,
2845 flags=REVIDX_DEFAULT_FLAGS,
2848 flags=REVIDX_DEFAULT_FLAGS,
2846 deltacomputer=None,
2849 deltacomputer=None,
2847 sidedata=None,
2850 sidedata=None,
2848 ):
2851 ):
2849 """add a revision to the log
2852 """add a revision to the log
2850
2853
2851 text - the revision data to add
2854 text - the revision data to add
2852 transaction - the transaction object used for rollback
2855 transaction - the transaction object used for rollback
2853 link - the linkrev data to add
2856 link - the linkrev data to add
2854 p1, p2 - the parent nodeids of the revision
2857 p1, p2 - the parent nodeids of the revision
2855 cachedelta - an optional precomputed delta
2858 cachedelta - an optional precomputed delta
2856 node - nodeid of revision; typically node is not specified, and it is
2859 node - nodeid of revision; typically node is not specified, and it is
2857 computed by default as hash(text, p1, p2), however subclasses might
2860 computed by default as hash(text, p1, p2), however subclasses might
2858 use different hashing method (and override checkhash() in such case)
2861 use different hashing method (and override checkhash() in such case)
2859 flags - the known flags to set on the revision
2862 flags - the known flags to set on the revision
2860 deltacomputer - an optional deltacomputer instance shared between
2863 deltacomputer - an optional deltacomputer instance shared between
2861 multiple calls
2864 multiple calls
2862 """
2865 """
2863 if link == nullrev:
2866 if link == nullrev:
2864 raise error.RevlogError(
2867 raise error.RevlogError(
2865 _(b"attempted to add linkrev -1 to %s") % self.display_id
2868 _(b"attempted to add linkrev -1 to %s") % self.display_id
2866 )
2869 )
2867
2870
2868 if sidedata is None:
2871 if sidedata is None:
2869 sidedata = {}
2872 sidedata = {}
2870 elif sidedata and not self.feature_config.has_side_data:
2873 elif sidedata and not self.feature_config.has_side_data:
2871 raise error.ProgrammingError(
2874 raise error.ProgrammingError(
2872 _(b"trying to add sidedata to a revlog who don't support them")
2875 _(b"trying to add sidedata to a revlog who don't support them")
2873 )
2876 )
2874
2877
2875 if flags:
2878 if flags:
2876 node = node or self.hash(text, p1, p2)
2879 node = node or self.hash(text, p1, p2)
2877
2880
2878 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2881 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2879
2882
2880 # If the flag processor modifies the revision data, ignore any provided
2883 # If the flag processor modifies the revision data, ignore any provided
2881 # cachedelta.
2884 # cachedelta.
2882 if rawtext != text:
2885 if rawtext != text:
2883 cachedelta = None
2886 cachedelta = None
2884
2887
2885 if len(rawtext) > _maxentrysize:
2888 if len(rawtext) > _maxentrysize:
2886 raise error.RevlogError(
2889 raise error.RevlogError(
2887 _(
2890 _(
2888 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2891 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2889 )
2892 )
2890 % (self.display_id, len(rawtext))
2893 % (self.display_id, len(rawtext))
2891 )
2894 )
2892
2895
2893 node = node or self.hash(rawtext, p1, p2)
2896 node = node or self.hash(rawtext, p1, p2)
2894 rev = self.index.get_rev(node)
2897 rev = self.index.get_rev(node)
2895 if rev is not None:
2898 if rev is not None:
2896 return rev
2899 return rev
2897
2900
2898 if validatehash:
2901 if validatehash:
2899 self.checkhash(rawtext, node, p1=p1, p2=p2)
2902 self.checkhash(rawtext, node, p1=p1, p2=p2)
2900
2903
2901 return self.addrawrevision(
2904 return self.addrawrevision(
2902 rawtext,
2905 rawtext,
2903 transaction,
2906 transaction,
2904 link,
2907 link,
2905 p1,
2908 p1,
2906 p2,
2909 p2,
2907 node,
2910 node,
2908 flags,
2911 flags,
2909 cachedelta=cachedelta,
2912 cachedelta=cachedelta,
2910 deltacomputer=deltacomputer,
2913 deltacomputer=deltacomputer,
2911 sidedata=sidedata,
2914 sidedata=sidedata,
2912 )
2915 )
2913
2916
2914 def addrawrevision(
2917 def addrawrevision(
2915 self,
2918 self,
2916 rawtext,
2919 rawtext,
2917 transaction,
2920 transaction,
2918 link,
2921 link,
2919 p1,
2922 p1,
2920 p2,
2923 p2,
2921 node,
2924 node,
2922 flags,
2925 flags,
2923 cachedelta=None,
2926 cachedelta=None,
2924 deltacomputer=None,
2927 deltacomputer=None,
2925 sidedata=None,
2928 sidedata=None,
2926 ):
2929 ):
2927 """add a raw revision with known flags, node and parents
2930 """add a raw revision with known flags, node and parents
2928 useful when reusing a revision not stored in this revlog (ex: received
2931 useful when reusing a revision not stored in this revlog (ex: received
2929 over wire, or read from an external bundle).
2932 over wire, or read from an external bundle).
2930 """
2933 """
2931 with self._writing(transaction):
2934 with self._writing(transaction):
2932 return self._addrevision(
2935 return self._addrevision(
2933 node,
2936 node,
2934 rawtext,
2937 rawtext,
2935 transaction,
2938 transaction,
2936 link,
2939 link,
2937 p1,
2940 p1,
2938 p2,
2941 p2,
2939 flags,
2942 flags,
2940 cachedelta,
2943 cachedelta,
2941 deltacomputer=deltacomputer,
2944 deltacomputer=deltacomputer,
2942 sidedata=sidedata,
2945 sidedata=sidedata,
2943 )
2946 )
2944
2947
2945 def compress(self, data):
2948 def compress(self, data):
2946 return self._inner.compress(data)
2949 return self._inner.compress(data)
2947
2950
2948 def decompress(self, data):
2951 def decompress(self, data):
2949 return self._inner.decompress(data)
2952 return self._inner.decompress(data)
2950
2953
2951 def _addrevision(
2954 def _addrevision(
2952 self,
2955 self,
2953 node,
2956 node,
2954 rawtext,
2957 rawtext,
2955 transaction,
2958 transaction,
2956 link,
2959 link,
2957 p1,
2960 p1,
2958 p2,
2961 p2,
2959 flags,
2962 flags,
2960 cachedelta,
2963 cachedelta,
2961 alwayscache=False,
2964 alwayscache=False,
2962 deltacomputer=None,
2965 deltacomputer=None,
2963 sidedata=None,
2966 sidedata=None,
2964 ):
2967 ):
2965 """internal function to add revisions to the log
2968 """internal function to add revisions to the log
2966
2969
2967 see addrevision for argument descriptions.
2970 see addrevision for argument descriptions.
2968
2971
2969 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2970
2973
2971 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2972 be used.
2975 be used.
2973
2976
2974 invariants:
2977 invariants:
2975 - rawtext is optional (can be None); if not set, cachedelta must be set.
2978 - rawtext is optional (can be None); if not set, cachedelta must be set.
2976 if both are set, they must correspond to each other.
2979 if both are set, they must correspond to each other.
2977 """
2980 """
2978 if node == self.nullid:
2981 if node == self.nullid:
2979 raise error.RevlogError(
2982 raise error.RevlogError(
2980 _(b"%s: attempt to add null revision") % self.display_id
2983 _(b"%s: attempt to add null revision") % self.display_id
2981 )
2984 )
2982 if (
2985 if (
2983 node == self.nodeconstants.wdirid
2986 node == self.nodeconstants.wdirid
2984 or node in self.nodeconstants.wdirfilenodeids
2987 or node in self.nodeconstants.wdirfilenodeids
2985 ):
2988 ):
2986 raise error.RevlogError(
2989 raise error.RevlogError(
2987 _(b"%s: attempt to add wdir revision") % self.display_id
2990 _(b"%s: attempt to add wdir revision") % self.display_id
2988 )
2991 )
2989 if self._inner._writinghandles is None:
2992 if self._inner._writinghandles is None:
2990 msg = b'adding revision outside `revlog._writing` context'
2993 msg = b'adding revision outside `revlog._writing` context'
2991 raise error.ProgrammingError(msg)
2994 raise error.ProgrammingError(msg)
2992
2995
2993 btext = [rawtext]
2996 btext = [rawtext]
2994
2997
2995 curr = len(self)
2998 curr = len(self)
2996 prev = curr - 1
2999 prev = curr - 1
2997
3000
2998 offset = self._get_data_offset(prev)
3001 offset = self._get_data_offset(prev)
2999
3002
3000 if self._concurrencychecker:
3003 if self._concurrencychecker:
3001 ifh, dfh, sdfh = self._inner._writinghandles
3004 ifh, dfh, sdfh = self._inner._writinghandles
3002 # XXX no checking for the sidedata file
3005 # XXX no checking for the sidedata file
3003 if self._inline:
3006 if self._inline:
3004 # offset is "as if" it were in the .d file, so we need to add on
3007 # offset is "as if" it were in the .d file, so we need to add on
3005 # the size of the entry metadata.
3008 # the size of the entry metadata.
3006 self._concurrencychecker(
3009 self._concurrencychecker(
3007 ifh, self._indexfile, offset + curr * self.index.entry_size
3010 ifh, self._indexfile, offset + curr * self.index.entry_size
3008 )
3011 )
3009 else:
3012 else:
3010 # Entries in the .i are a consistent size.
3013 # Entries in the .i are a consistent size.
3011 self._concurrencychecker(
3014 self._concurrencychecker(
3012 ifh, self._indexfile, curr * self.index.entry_size
3015 ifh, self._indexfile, curr * self.index.entry_size
3013 )
3016 )
3014 self._concurrencychecker(dfh, self._datafile, offset)
3017 self._concurrencychecker(dfh, self._datafile, offset)
3015
3018
3016 p1r, p2r = self.rev(p1), self.rev(p2)
3019 p1r, p2r = self.rev(p1), self.rev(p2)
3017
3020
3018 # full versions are inserted when the needed deltas
3021 # full versions are inserted when the needed deltas
3019 # become comparable to the uncompressed text
3022 # become comparable to the uncompressed text
3020 if rawtext is None:
3023 if rawtext is None:
3021 # need rawtext size, before changed by flag processors, which is
3024 # need rawtext size, before changed by flag processors, which is
3022 # the non-raw size. use revlog explicitly to avoid filelog's extra
3025 # the non-raw size. use revlog explicitly to avoid filelog's extra
3023 # logic that might remove metadata size.
3026 # logic that might remove metadata size.
3024 textlen = mdiff.patchedsize(
3027 textlen = mdiff.patchedsize(
3025 revlog.size(self, cachedelta[0]), cachedelta[1]
3028 revlog.size(self, cachedelta[0]), cachedelta[1]
3026 )
3029 )
3027 else:
3030 else:
3028 textlen = len(rawtext)
3031 textlen = len(rawtext)
3029
3032
3030 if deltacomputer is None:
3033 if deltacomputer is None:
3031 write_debug = None
3034 write_debug = None
3032 if self.delta_config.debug_delta:
3035 if self.delta_config.debug_delta:
3033 write_debug = transaction._report
3036 write_debug = transaction._report
3034 deltacomputer = deltautil.deltacomputer(
3037 deltacomputer = deltautil.deltacomputer(
3035 self, write_debug=write_debug
3038 self, write_debug=write_debug
3036 )
3039 )
3037
3040
3038 if cachedelta is not None and len(cachedelta) == 2:
3041 if cachedelta is not None and len(cachedelta) == 2:
3039 # If the cached delta has no information about how it should be
3042 # If the cached delta has no information about how it should be
3040 # reused, add the default reuse instruction according to the
3043 # reused, add the default reuse instruction according to the
3041 # revlog's configuration.
3044 # revlog's configuration.
3042 if (
3045 if (
3043 self.delta_config.general_delta
3046 self.delta_config.general_delta
3044 and self.delta_config.lazy_delta_base
3047 and self.delta_config.lazy_delta_base
3045 ):
3048 ):
3046 delta_base_reuse = DELTA_BASE_REUSE_TRY
3049 delta_base_reuse = DELTA_BASE_REUSE_TRY
3047 else:
3050 else:
3048 delta_base_reuse = DELTA_BASE_REUSE_NO
3051 delta_base_reuse = DELTA_BASE_REUSE_NO
3049 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3052 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3050
3053
3051 revinfo = revlogutils.revisioninfo(
3054 revinfo = revlogutils.revisioninfo(
3052 node,
3055 node,
3053 p1,
3056 p1,
3054 p2,
3057 p2,
3055 btext,
3058 btext,
3056 textlen,
3059 textlen,
3057 cachedelta,
3060 cachedelta,
3058 flags,
3061 flags,
3059 )
3062 )
3060
3063
3061 deltainfo = deltacomputer.finddeltainfo(revinfo)
3064 deltainfo = deltacomputer.finddeltainfo(revinfo)
3062
3065
3063 compression_mode = COMP_MODE_INLINE
3066 compression_mode = COMP_MODE_INLINE
3064 if self._docket is not None:
3067 if self._docket is not None:
3065 default_comp = self._docket.default_compression_header
3068 default_comp = self._docket.default_compression_header
3066 r = deltautil.delta_compression(default_comp, deltainfo)
3069 r = deltautil.delta_compression(default_comp, deltainfo)
3067 compression_mode, deltainfo = r
3070 compression_mode, deltainfo = r
3068
3071
3069 sidedata_compression_mode = COMP_MODE_INLINE
3072 sidedata_compression_mode = COMP_MODE_INLINE
3070 if sidedata and self.feature_config.has_side_data:
3073 if sidedata and self.feature_config.has_side_data:
3071 sidedata_compression_mode = COMP_MODE_PLAIN
3074 sidedata_compression_mode = COMP_MODE_PLAIN
3072 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3075 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3073 sidedata_offset = self._docket.sidedata_end
3076 sidedata_offset = self._docket.sidedata_end
3074 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3077 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3075 if (
3078 if (
3076 h != b'u'
3079 h != b'u'
3077 and comp_sidedata[0:1] != b'\0'
3080 and comp_sidedata[0:1] != b'\0'
3078 and len(comp_sidedata) < len(serialized_sidedata)
3081 and len(comp_sidedata) < len(serialized_sidedata)
3079 ):
3082 ):
3080 assert not h
3083 assert not h
3081 if (
3084 if (
3082 comp_sidedata[0:1]
3085 comp_sidedata[0:1]
3083 == self._docket.default_compression_header
3086 == self._docket.default_compression_header
3084 ):
3087 ):
3085 sidedata_compression_mode = COMP_MODE_DEFAULT
3088 sidedata_compression_mode = COMP_MODE_DEFAULT
3086 serialized_sidedata = comp_sidedata
3089 serialized_sidedata = comp_sidedata
3087 else:
3090 else:
3088 sidedata_compression_mode = COMP_MODE_INLINE
3091 sidedata_compression_mode = COMP_MODE_INLINE
3089 serialized_sidedata = comp_sidedata
3092 serialized_sidedata = comp_sidedata
3090 else:
3093 else:
3091 serialized_sidedata = b""
3094 serialized_sidedata = b""
3092 # Don't store the offset if the sidedata is empty, that way
3095 # Don't store the offset if the sidedata is empty, that way
3093 # we can easily detect empty sidedata and they will be no different
3096 # we can easily detect empty sidedata and they will be no different
3094 # than ones we manually add.
3097 # than ones we manually add.
3095 sidedata_offset = 0
3098 sidedata_offset = 0
3096
3099
3097 rank = RANK_UNKNOWN
3100 rank = RANK_UNKNOWN
3098 if self.feature_config.compute_rank:
3101 if self.feature_config.compute_rank:
3099 if (p1r, p2r) == (nullrev, nullrev):
3102 if (p1r, p2r) == (nullrev, nullrev):
3100 rank = 1
3103 rank = 1
3101 elif p1r != nullrev and p2r == nullrev:
3104 elif p1r != nullrev and p2r == nullrev:
3102 rank = 1 + self.fast_rank(p1r)
3105 rank = 1 + self.fast_rank(p1r)
3103 elif p1r == nullrev and p2r != nullrev:
3106 elif p1r == nullrev and p2r != nullrev:
3104 rank = 1 + self.fast_rank(p2r)
3107 rank = 1 + self.fast_rank(p2r)
3105 else: # merge node
3108 else: # merge node
3106 if rustdagop is not None and self.index.rust_ext_compat:
3109 if rustdagop is not None and self.index.rust_ext_compat:
3107 rank = rustdagop.rank(self.index, p1r, p2r)
3110 rank = rustdagop.rank(self.index, p1r, p2r)
3108 else:
3111 else:
3109 pmin, pmax = sorted((p1r, p2r))
3112 pmin, pmax = sorted((p1r, p2r))
3110 rank = 1 + self.fast_rank(pmax)
3113 rank = 1 + self.fast_rank(pmax)
3111 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3114 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3112
3115
3113 e = revlogutils.entry(
3116 e = revlogutils.entry(
3114 flags=flags,
3117 flags=flags,
3115 data_offset=offset,
3118 data_offset=offset,
3116 data_compressed_length=deltainfo.deltalen,
3119 data_compressed_length=deltainfo.deltalen,
3117 data_uncompressed_length=textlen,
3120 data_uncompressed_length=textlen,
3118 data_compression_mode=compression_mode,
3121 data_compression_mode=compression_mode,
3119 data_delta_base=deltainfo.base,
3122 data_delta_base=deltainfo.base,
3120 link_rev=link,
3123 link_rev=link,
3121 parent_rev_1=p1r,
3124 parent_rev_1=p1r,
3122 parent_rev_2=p2r,
3125 parent_rev_2=p2r,
3123 node_id=node,
3126 node_id=node,
3124 sidedata_offset=sidedata_offset,
3127 sidedata_offset=sidedata_offset,
3125 sidedata_compressed_length=len(serialized_sidedata),
3128 sidedata_compressed_length=len(serialized_sidedata),
3126 sidedata_compression_mode=sidedata_compression_mode,
3129 sidedata_compression_mode=sidedata_compression_mode,
3127 rank=rank,
3130 rank=rank,
3128 )
3131 )
3129
3132
3130 self.index.append(e)
3133 self.index.append(e)
3131 entry = self.index.entry_binary(curr)
3134 entry = self.index.entry_binary(curr)
3132 if curr == 0 and self._docket is None:
3135 if curr == 0 and self._docket is None:
3133 header = self._format_flags | self._format_version
3136 header = self._format_flags | self._format_version
3134 header = self.index.pack_header(header)
3137 header = self.index.pack_header(header)
3135 entry = header + entry
3138 entry = header + entry
3136 self._writeentry(
3139 self._writeentry(
3137 transaction,
3140 transaction,
3138 entry,
3141 entry,
3139 deltainfo.data,
3142 deltainfo.data,
3140 link,
3143 link,
3141 offset,
3144 offset,
3142 serialized_sidedata,
3145 serialized_sidedata,
3143 sidedata_offset,
3146 sidedata_offset,
3144 )
3147 )
3145
3148
3146 rawtext = btext[0]
3149 rawtext = btext[0]
3147
3150
3148 if alwayscache and rawtext is None:
3151 if alwayscache and rawtext is None:
3149 rawtext = deltacomputer.buildtext(revinfo)
3152 rawtext = deltacomputer.buildtext(revinfo)
3150
3153
3151 if type(rawtext) == bytes: # only accept immutable objects
3154 if type(rawtext) == bytes: # only accept immutable objects
3152 self._revisioncache = (node, curr, rawtext)
3155 self._inner._revisioncache = (node, curr, rawtext)
3153 self._chainbasecache[curr] = deltainfo.chainbase
3156 self._chainbasecache[curr] = deltainfo.chainbase
3154 return curr
3157 return curr
3155
3158
3156 def _get_data_offset(self, prev):
3159 def _get_data_offset(self, prev):
3157 """Returns the current offset in the (in-transaction) data file.
3160 """Returns the current offset in the (in-transaction) data file.
3158 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3161 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3159 file to store that information: since sidedata can be rewritten to the
3162 file to store that information: since sidedata can be rewritten to the
3160 end of the data file within a transaction, you can have cases where, for
3163 end of the data file within a transaction, you can have cases where, for
3161 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3164 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3162 to `n - 1`'s sidedata being written after `n`'s data.
3165 to `n - 1`'s sidedata being written after `n`'s data.
3163
3166
3164 TODO cache this in a docket file before getting out of experimental."""
3167 TODO cache this in a docket file before getting out of experimental."""
3165 if self._docket is None:
3168 if self._docket is None:
3166 return self.end(prev)
3169 return self.end(prev)
3167 else:
3170 else:
3168 return self._docket.data_end
3171 return self._docket.data_end
3169
3172
3170 def _writeentry(
3173 def _writeentry(
3171 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3174 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3172 ):
3175 ):
3173 # Files opened in a+ mode have inconsistent behavior on various
3176 # Files opened in a+ mode have inconsistent behavior on various
3174 # platforms. Windows requires that a file positioning call be made
3177 # platforms. Windows requires that a file positioning call be made
3175 # when the file handle transitions between reads and writes. See
3178 # when the file handle transitions between reads and writes. See
3176 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3179 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3177 # platforms, Python or the platform itself can be buggy. Some versions
3180 # platforms, Python or the platform itself can be buggy. Some versions
3178 # of Solaris have been observed to not append at the end of the file
3181 # of Solaris have been observed to not append at the end of the file
3179 # if the file was seeked to before the end. See issue4943 for more.
3182 # if the file was seeked to before the end. See issue4943 for more.
3180 #
3183 #
3181 # We work around this issue by inserting a seek() before writing.
3184 # We work around this issue by inserting a seek() before writing.
3182 # Note: This is likely not necessary on Python 3. However, because
3185 # Note: This is likely not necessary on Python 3. However, because
3183 # the file handle is reused for reads and may be seeked there, we need
3186 # the file handle is reused for reads and may be seeked there, we need
3184 # to be careful before changing this.
3187 # to be careful before changing this.
3185 if self._inner._writinghandles is None:
3188 if self._inner._writinghandles is None:
3186 msg = b'adding revision outside `revlog._writing` context'
3189 msg = b'adding revision outside `revlog._writing` context'
3187 raise error.ProgrammingError(msg)
3190 raise error.ProgrammingError(msg)
3188 ifh, dfh, sdfh = self._inner._writinghandles
3191 ifh, dfh, sdfh = self._inner._writinghandles
3189 if self._docket is None:
3192 if self._docket is None:
3190 ifh.seek(0, os.SEEK_END)
3193 ifh.seek(0, os.SEEK_END)
3191 else:
3194 else:
3192 ifh.seek(self._docket.index_end, os.SEEK_SET)
3195 ifh.seek(self._docket.index_end, os.SEEK_SET)
3193 if dfh:
3196 if dfh:
3194 if self._docket is None:
3197 if self._docket is None:
3195 dfh.seek(0, os.SEEK_END)
3198 dfh.seek(0, os.SEEK_END)
3196 else:
3199 else:
3197 dfh.seek(self._docket.data_end, os.SEEK_SET)
3200 dfh.seek(self._docket.data_end, os.SEEK_SET)
3198 if sdfh:
3201 if sdfh:
3199 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3202 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3200
3203
3201 curr = len(self) - 1
3204 curr = len(self) - 1
3202 if not self._inline:
3205 if not self._inline:
3203 transaction.add(self._datafile, offset)
3206 transaction.add(self._datafile, offset)
3204 if self._sidedatafile:
3207 if self._sidedatafile:
3205 transaction.add(self._sidedatafile, sidedata_offset)
3208 transaction.add(self._sidedatafile, sidedata_offset)
3206 transaction.add(self._indexfile, curr * len(entry))
3209 transaction.add(self._indexfile, curr * len(entry))
3207 if data[0]:
3210 if data[0]:
3208 dfh.write(data[0])
3211 dfh.write(data[0])
3209 dfh.write(data[1])
3212 dfh.write(data[1])
3210 if sidedata:
3213 if sidedata:
3211 sdfh.write(sidedata)
3214 sdfh.write(sidedata)
3212 ifh.write(entry)
3215 ifh.write(entry)
3213 else:
3216 else:
3214 offset += curr * self.index.entry_size
3217 offset += curr * self.index.entry_size
3215 transaction.add(self._indexfile, offset)
3218 transaction.add(self._indexfile, offset)
3216 ifh.write(entry)
3219 ifh.write(entry)
3217 ifh.write(data[0])
3220 ifh.write(data[0])
3218 ifh.write(data[1])
3221 ifh.write(data[1])
3219 assert not sidedata
3222 assert not sidedata
3220 self._enforceinlinesize(transaction)
3223 self._enforceinlinesize(transaction)
3221 if self._docket is not None:
3224 if self._docket is not None:
3222 # revlog-v2 always has 3 writing handles, help Pytype
3225 # revlog-v2 always has 3 writing handles, help Pytype
3223 wh1 = self._inner._writinghandles[0]
3226 wh1 = self._inner._writinghandles[0]
3224 wh2 = self._inner._writinghandles[1]
3227 wh2 = self._inner._writinghandles[1]
3225 wh3 = self._inner._writinghandles[2]
3228 wh3 = self._inner._writinghandles[2]
3226 assert wh1 is not None
3229 assert wh1 is not None
3227 assert wh2 is not None
3230 assert wh2 is not None
3228 assert wh3 is not None
3231 assert wh3 is not None
3229 self._docket.index_end = wh1.tell()
3232 self._docket.index_end = wh1.tell()
3230 self._docket.data_end = wh2.tell()
3233 self._docket.data_end = wh2.tell()
3231 self._docket.sidedata_end = wh3.tell()
3234 self._docket.sidedata_end = wh3.tell()
3232
3235
3233 nodemaputil.setup_persistent_nodemap(transaction, self)
3236 nodemaputil.setup_persistent_nodemap(transaction, self)
3234
3237
3235 def addgroup(
3238 def addgroup(
3236 self,
3239 self,
3237 deltas,
3240 deltas,
3238 linkmapper,
3241 linkmapper,
3239 transaction,
3242 transaction,
3240 alwayscache=False,
3243 alwayscache=False,
3241 addrevisioncb=None,
3244 addrevisioncb=None,
3242 duplicaterevisioncb=None,
3245 duplicaterevisioncb=None,
3243 debug_info=None,
3246 debug_info=None,
3244 delta_base_reuse_policy=None,
3247 delta_base_reuse_policy=None,
3245 ):
3248 ):
3246 """
3249 """
3247 add a delta group
3250 add a delta group
3248
3251
3249 given a set of deltas, add them to the revision log. the
3252 given a set of deltas, add them to the revision log. the
3250 first delta is against its parent, which should be in our
3253 first delta is against its parent, which should be in our
3251 log, the rest are against the previous delta.
3254 log, the rest are against the previous delta.
3252
3255
3253 If ``addrevisioncb`` is defined, it will be called with arguments of
3256 If ``addrevisioncb`` is defined, it will be called with arguments of
3254 this revlog and the node that was added.
3257 this revlog and the node that was added.
3255 """
3258 """
3256
3259
3257 if self._adding_group:
3260 if self._adding_group:
3258 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3261 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3259
3262
3260 # read the default delta-base reuse policy from revlog config if the
3263 # read the default delta-base reuse policy from revlog config if the
3261 # group did not specify one.
3264 # group did not specify one.
3262 if delta_base_reuse_policy is None:
3265 if delta_base_reuse_policy is None:
3263 if (
3266 if (
3264 self.delta_config.general_delta
3267 self.delta_config.general_delta
3265 and self.delta_config.lazy_delta_base
3268 and self.delta_config.lazy_delta_base
3266 ):
3269 ):
3267 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3270 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3268 else:
3271 else:
3269 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3272 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3270
3273
3271 self._adding_group = True
3274 self._adding_group = True
3272 empty = True
3275 empty = True
3273 try:
3276 try:
3274 with self._writing(transaction):
3277 with self._writing(transaction):
3275 write_debug = None
3278 write_debug = None
3276 if self.delta_config.debug_delta:
3279 if self.delta_config.debug_delta:
3277 write_debug = transaction._report
3280 write_debug = transaction._report
3278 deltacomputer = deltautil.deltacomputer(
3281 deltacomputer = deltautil.deltacomputer(
3279 self,
3282 self,
3280 write_debug=write_debug,
3283 write_debug=write_debug,
3281 debug_info=debug_info,
3284 debug_info=debug_info,
3282 )
3285 )
3283 # loop through our set of deltas
3286 # loop through our set of deltas
3284 for data in deltas:
3287 for data in deltas:
3285 (
3288 (
3286 node,
3289 node,
3287 p1,
3290 p1,
3288 p2,
3291 p2,
3289 linknode,
3292 linknode,
3290 deltabase,
3293 deltabase,
3291 delta,
3294 delta,
3292 flags,
3295 flags,
3293 sidedata,
3296 sidedata,
3294 ) = data
3297 ) = data
3295 link = linkmapper(linknode)
3298 link = linkmapper(linknode)
3296 flags = flags or REVIDX_DEFAULT_FLAGS
3299 flags = flags or REVIDX_DEFAULT_FLAGS
3297
3300
3298 rev = self.index.get_rev(node)
3301 rev = self.index.get_rev(node)
3299 if rev is not None:
3302 if rev is not None:
3300 # this can happen if two branches make the same change
3303 # this can happen if two branches make the same change
3301 self._nodeduplicatecallback(transaction, rev)
3304 self._nodeduplicatecallback(transaction, rev)
3302 if duplicaterevisioncb:
3305 if duplicaterevisioncb:
3303 duplicaterevisioncb(self, rev)
3306 duplicaterevisioncb(self, rev)
3304 empty = False
3307 empty = False
3305 continue
3308 continue
3306
3309
3307 for p in (p1, p2):
3310 for p in (p1, p2):
3308 if not self.index.has_node(p):
3311 if not self.index.has_node(p):
3309 raise error.LookupError(
3312 raise error.LookupError(
3310 p, self.radix, _(b'unknown parent')
3313 p, self.radix, _(b'unknown parent')
3311 )
3314 )
3312
3315
3313 if not self.index.has_node(deltabase):
3316 if not self.index.has_node(deltabase):
3314 raise error.LookupError(
3317 raise error.LookupError(
3315 deltabase, self.display_id, _(b'unknown delta base')
3318 deltabase, self.display_id, _(b'unknown delta base')
3316 )
3319 )
3317
3320
3318 baserev = self.rev(deltabase)
3321 baserev = self.rev(deltabase)
3319
3322
3320 if baserev != nullrev and self.iscensored(baserev):
3323 if baserev != nullrev and self.iscensored(baserev):
3321 # if base is censored, delta must be full replacement in a
3324 # if base is censored, delta must be full replacement in a
3322 # single patch operation
3325 # single patch operation
3323 hlen = struct.calcsize(b">lll")
3326 hlen = struct.calcsize(b">lll")
3324 oldlen = self.rawsize(baserev)
3327 oldlen = self.rawsize(baserev)
3325 newlen = len(delta) - hlen
3328 newlen = len(delta) - hlen
3326 if delta[:hlen] != mdiff.replacediffheader(
3329 if delta[:hlen] != mdiff.replacediffheader(
3327 oldlen, newlen
3330 oldlen, newlen
3328 ):
3331 ):
3329 raise error.CensoredBaseError(
3332 raise error.CensoredBaseError(
3330 self.display_id, self.node(baserev)
3333 self.display_id, self.node(baserev)
3331 )
3334 )
3332
3335
3333 if not flags and self._peek_iscensored(baserev, delta):
3336 if not flags and self._peek_iscensored(baserev, delta):
3334 flags |= REVIDX_ISCENSORED
3337 flags |= REVIDX_ISCENSORED
3335
3338
3336 # We assume consumers of addrevisioncb will want to retrieve
3339 # We assume consumers of addrevisioncb will want to retrieve
3337 # the added revision, which will require a call to
3340 # the added revision, which will require a call to
3338 # revision(). revision() will fast path if there is a cache
3341 # revision(). revision() will fast path if there is a cache
3339 # hit. So, we tell _addrevision() to always cache in this case.
3342 # hit. So, we tell _addrevision() to always cache in this case.
3340 # We're only using addgroup() in the context of changegroup
3343 # We're only using addgroup() in the context of changegroup
3341 # generation so the revision data can always be handled as raw
3344 # generation so the revision data can always be handled as raw
3342 # by the flagprocessor.
3345 # by the flagprocessor.
3343 rev = self._addrevision(
3346 rev = self._addrevision(
3344 node,
3347 node,
3345 None,
3348 None,
3346 transaction,
3349 transaction,
3347 link,
3350 link,
3348 p1,
3351 p1,
3349 p2,
3352 p2,
3350 flags,
3353 flags,
3351 (baserev, delta, delta_base_reuse_policy),
3354 (baserev, delta, delta_base_reuse_policy),
3352 alwayscache=alwayscache,
3355 alwayscache=alwayscache,
3353 deltacomputer=deltacomputer,
3356 deltacomputer=deltacomputer,
3354 sidedata=sidedata,
3357 sidedata=sidedata,
3355 )
3358 )
3356
3359
3357 if addrevisioncb:
3360 if addrevisioncb:
3358 addrevisioncb(self, rev)
3361 addrevisioncb(self, rev)
3359 empty = False
3362 empty = False
3360 finally:
3363 finally:
3361 self._adding_group = False
3364 self._adding_group = False
3362 return not empty
3365 return not empty
3363
3366
3364 def iscensored(self, rev):
3367 def iscensored(self, rev):
3365 """Check if a file revision is censored."""
3368 """Check if a file revision is censored."""
3366 if not self.feature_config.censorable:
3369 if not self.feature_config.censorable:
3367 return False
3370 return False
3368
3371
3369 return self.flags(rev) & REVIDX_ISCENSORED
3372 return self.flags(rev) & REVIDX_ISCENSORED
3370
3373
3371 def _peek_iscensored(self, baserev, delta):
3374 def _peek_iscensored(self, baserev, delta):
3372 """Quickly check if a delta produces a censored revision."""
3375 """Quickly check if a delta produces a censored revision."""
3373 if not self.feature_config.censorable:
3376 if not self.feature_config.censorable:
3374 return False
3377 return False
3375
3378
3376 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3379 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3377
3380
3378 def getstrippoint(self, minlink):
3381 def getstrippoint(self, minlink):
3379 """find the minimum rev that must be stripped to strip the linkrev
3382 """find the minimum rev that must be stripped to strip the linkrev
3380
3383
3381 Returns a tuple containing the minimum rev and a set of all revs that
3384 Returns a tuple containing the minimum rev and a set of all revs that
3382 have linkrevs that will be broken by this strip.
3385 have linkrevs that will be broken by this strip.
3383 """
3386 """
3384 return storageutil.resolvestripinfo(
3387 return storageutil.resolvestripinfo(
3385 minlink,
3388 minlink,
3386 len(self) - 1,
3389 len(self) - 1,
3387 self.headrevs(),
3390 self.headrevs(),
3388 self.linkrev,
3391 self.linkrev,
3389 self.parentrevs,
3392 self.parentrevs,
3390 )
3393 )
3391
3394
3392 def strip(self, minlink, transaction):
3395 def strip(self, minlink, transaction):
3393 """truncate the revlog on the first revision with a linkrev >= minlink
3396 """truncate the revlog on the first revision with a linkrev >= minlink
3394
3397
3395 This function is called when we're stripping revision minlink and
3398 This function is called when we're stripping revision minlink and
3396 its descendants from the repository.
3399 its descendants from the repository.
3397
3400
3398 We have to remove all revisions with linkrev >= minlink, because
3401 We have to remove all revisions with linkrev >= minlink, because
3399 the equivalent changelog revisions will be renumbered after the
3402 the equivalent changelog revisions will be renumbered after the
3400 strip.
3403 strip.
3401
3404
3402 So we truncate the revlog on the first of these revisions, and
3405 So we truncate the revlog on the first of these revisions, and
3403 trust that the caller has saved the revisions that shouldn't be
3406 trust that the caller has saved the revisions that shouldn't be
3404 removed and that it'll re-add them after this truncation.
3407 removed and that it'll re-add them after this truncation.
3405 """
3408 """
3406 if len(self) == 0:
3409 if len(self) == 0:
3407 return
3410 return
3408
3411
3409 rev, _ = self.getstrippoint(minlink)
3412 rev, _ = self.getstrippoint(minlink)
3410 if rev == len(self):
3413 if rev == len(self):
3411 return
3414 return
3412
3415
3413 # first truncate the files on disk
3416 # first truncate the files on disk
3414 data_end = self.start(rev)
3417 data_end = self.start(rev)
3415 if not self._inline:
3418 if not self._inline:
3416 transaction.add(self._datafile, data_end)
3419 transaction.add(self._datafile, data_end)
3417 end = rev * self.index.entry_size
3420 end = rev * self.index.entry_size
3418 else:
3421 else:
3419 end = data_end + (rev * self.index.entry_size)
3422 end = data_end + (rev * self.index.entry_size)
3420
3423
3421 if self._sidedatafile:
3424 if self._sidedatafile:
3422 sidedata_end = self.sidedata_cut_off(rev)
3425 sidedata_end = self.sidedata_cut_off(rev)
3423 transaction.add(self._sidedatafile, sidedata_end)
3426 transaction.add(self._sidedatafile, sidedata_end)
3424
3427
3425 transaction.add(self._indexfile, end)
3428 transaction.add(self._indexfile, end)
3426 if self._docket is not None:
3429 if self._docket is not None:
3427 # XXX we could, leverage the docket while stripping. However it is
3430 # XXX we could, leverage the docket while stripping. However it is
3428 # not powerfull enough at the time of this comment
3431 # not powerfull enough at the time of this comment
3429 self._docket.index_end = end
3432 self._docket.index_end = end
3430 self._docket.data_end = data_end
3433 self._docket.data_end = data_end
3431 self._docket.sidedata_end = sidedata_end
3434 self._docket.sidedata_end = sidedata_end
3432 self._docket.write(transaction, stripping=True)
3435 self._docket.write(transaction, stripping=True)
3433
3436
3434 # then reset internal state in memory to forget those revisions
3437 # then reset internal state in memory to forget those revisions
3435 self._revisioncache = None
3438 self._inner._revisioncache = None
3436 self._chaininfocache = util.lrucachedict(500)
3439 self._chaininfocache = util.lrucachedict(500)
3437 self._inner._segmentfile.clear_cache()
3440 self._inner._segmentfile.clear_cache()
3438 self._inner._segmentfile_sidedata.clear_cache()
3441 self._inner._segmentfile_sidedata.clear_cache()
3439
3442
3440 del self.index[rev:-1]
3443 del self.index[rev:-1]
3441
3444
3442 def checksize(self):
3445 def checksize(self):
3443 """Check size of index and data files
3446 """Check size of index and data files
3444
3447
3445 return a (dd, di) tuple.
3448 return a (dd, di) tuple.
3446 - dd: extra bytes for the "data" file
3449 - dd: extra bytes for the "data" file
3447 - di: extra bytes for the "index" file
3450 - di: extra bytes for the "index" file
3448
3451
3449 A healthy revlog will return (0, 0).
3452 A healthy revlog will return (0, 0).
3450 """
3453 """
3451 expected = 0
3454 expected = 0
3452 if len(self):
3455 if len(self):
3453 expected = max(0, self.end(len(self) - 1))
3456 expected = max(0, self.end(len(self) - 1))
3454
3457
3455 try:
3458 try:
3456 with self._datafp() as f:
3459 with self._datafp() as f:
3457 f.seek(0, io.SEEK_END)
3460 f.seek(0, io.SEEK_END)
3458 actual = f.tell()
3461 actual = f.tell()
3459 dd = actual - expected
3462 dd = actual - expected
3460 except FileNotFoundError:
3463 except FileNotFoundError:
3461 dd = 0
3464 dd = 0
3462
3465
3463 try:
3466 try:
3464 f = self.opener(self._indexfile)
3467 f = self.opener(self._indexfile)
3465 f.seek(0, io.SEEK_END)
3468 f.seek(0, io.SEEK_END)
3466 actual = f.tell()
3469 actual = f.tell()
3467 f.close()
3470 f.close()
3468 s = self.index.entry_size
3471 s = self.index.entry_size
3469 i = max(0, actual // s)
3472 i = max(0, actual // s)
3470 di = actual - (i * s)
3473 di = actual - (i * s)
3471 if self._inline:
3474 if self._inline:
3472 databytes = 0
3475 databytes = 0
3473 for r in self:
3476 for r in self:
3474 databytes += max(0, self.length(r))
3477 databytes += max(0, self.length(r))
3475 dd = 0
3478 dd = 0
3476 di = actual - len(self) * s - databytes
3479 di = actual - len(self) * s - databytes
3477 except FileNotFoundError:
3480 except FileNotFoundError:
3478 di = 0
3481 di = 0
3479
3482
3480 return (dd, di)
3483 return (dd, di)
3481
3484
3482 def files(self):
3485 def files(self):
3483 """return list of files that compose this revlog"""
3486 """return list of files that compose this revlog"""
3484 res = [self._indexfile]
3487 res = [self._indexfile]
3485 if self._docket_file is None:
3488 if self._docket_file is None:
3486 if not self._inline:
3489 if not self._inline:
3487 res.append(self._datafile)
3490 res.append(self._datafile)
3488 else:
3491 else:
3489 res.append(self._docket_file)
3492 res.append(self._docket_file)
3490 res.extend(self._docket.old_index_filepaths(include_empty=False))
3493 res.extend(self._docket.old_index_filepaths(include_empty=False))
3491 if self._docket.data_end:
3494 if self._docket.data_end:
3492 res.append(self._datafile)
3495 res.append(self._datafile)
3493 res.extend(self._docket.old_data_filepaths(include_empty=False))
3496 res.extend(self._docket.old_data_filepaths(include_empty=False))
3494 if self._docket.sidedata_end:
3497 if self._docket.sidedata_end:
3495 res.append(self._sidedatafile)
3498 res.append(self._sidedatafile)
3496 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3499 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3497 return res
3500 return res
3498
3501
3499 def emitrevisions(
3502 def emitrevisions(
3500 self,
3503 self,
3501 nodes,
3504 nodes,
3502 nodesorder=None,
3505 nodesorder=None,
3503 revisiondata=False,
3506 revisiondata=False,
3504 assumehaveparentrevisions=False,
3507 assumehaveparentrevisions=False,
3505 deltamode=repository.CG_DELTAMODE_STD,
3508 deltamode=repository.CG_DELTAMODE_STD,
3506 sidedata_helpers=None,
3509 sidedata_helpers=None,
3507 debug_info=None,
3510 debug_info=None,
3508 ):
3511 ):
3509 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3512 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3510 raise error.ProgrammingError(
3513 raise error.ProgrammingError(
3511 b'unhandled value for nodesorder: %s' % nodesorder
3514 b'unhandled value for nodesorder: %s' % nodesorder
3512 )
3515 )
3513
3516
3514 if nodesorder is None and not self.delta_config.general_delta:
3517 if nodesorder is None and not self.delta_config.general_delta:
3515 nodesorder = b'storage'
3518 nodesorder = b'storage'
3516
3519
3517 if (
3520 if (
3518 not self._storedeltachains
3521 not self._storedeltachains
3519 and deltamode != repository.CG_DELTAMODE_PREV
3522 and deltamode != repository.CG_DELTAMODE_PREV
3520 ):
3523 ):
3521 deltamode = repository.CG_DELTAMODE_FULL
3524 deltamode = repository.CG_DELTAMODE_FULL
3522
3525
3523 return storageutil.emitrevisions(
3526 return storageutil.emitrevisions(
3524 self,
3527 self,
3525 nodes,
3528 nodes,
3526 nodesorder,
3529 nodesorder,
3527 revlogrevisiondelta,
3530 revlogrevisiondelta,
3528 deltaparentfn=self.deltaparent,
3531 deltaparentfn=self.deltaparent,
3529 candeltafn=self._candelta,
3532 candeltafn=self._candelta,
3530 rawsizefn=self.rawsize,
3533 rawsizefn=self.rawsize,
3531 revdifffn=self.revdiff,
3534 revdifffn=self.revdiff,
3532 flagsfn=self.flags,
3535 flagsfn=self.flags,
3533 deltamode=deltamode,
3536 deltamode=deltamode,
3534 revisiondata=revisiondata,
3537 revisiondata=revisiondata,
3535 assumehaveparentrevisions=assumehaveparentrevisions,
3538 assumehaveparentrevisions=assumehaveparentrevisions,
3536 sidedata_helpers=sidedata_helpers,
3539 sidedata_helpers=sidedata_helpers,
3537 debug_info=debug_info,
3540 debug_info=debug_info,
3538 )
3541 )
3539
3542
3540 DELTAREUSEALWAYS = b'always'
3543 DELTAREUSEALWAYS = b'always'
3541 DELTAREUSESAMEREVS = b'samerevs'
3544 DELTAREUSESAMEREVS = b'samerevs'
3542 DELTAREUSENEVER = b'never'
3545 DELTAREUSENEVER = b'never'
3543
3546
3544 DELTAREUSEFULLADD = b'fulladd'
3547 DELTAREUSEFULLADD = b'fulladd'
3545
3548
3546 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3549 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3547
3550
3548 def clone(
3551 def clone(
3549 self,
3552 self,
3550 tr,
3553 tr,
3551 destrevlog,
3554 destrevlog,
3552 addrevisioncb=None,
3555 addrevisioncb=None,
3553 deltareuse=DELTAREUSESAMEREVS,
3556 deltareuse=DELTAREUSESAMEREVS,
3554 forcedeltabothparents=None,
3557 forcedeltabothparents=None,
3555 sidedata_helpers=None,
3558 sidedata_helpers=None,
3556 ):
3559 ):
3557 """Copy this revlog to another, possibly with format changes.
3560 """Copy this revlog to another, possibly with format changes.
3558
3561
3559 The destination revlog will contain the same revisions and nodes.
3562 The destination revlog will contain the same revisions and nodes.
3560 However, it may not be bit-for-bit identical due to e.g. delta encoding
3563 However, it may not be bit-for-bit identical due to e.g. delta encoding
3561 differences.
3564 differences.
3562
3565
3563 The ``deltareuse`` argument control how deltas from the existing revlog
3566 The ``deltareuse`` argument control how deltas from the existing revlog
3564 are preserved in the destination revlog. The argument can have the
3567 are preserved in the destination revlog. The argument can have the
3565 following values:
3568 following values:
3566
3569
3567 DELTAREUSEALWAYS
3570 DELTAREUSEALWAYS
3568 Deltas will always be reused (if possible), even if the destination
3571 Deltas will always be reused (if possible), even if the destination
3569 revlog would not select the same revisions for the delta. This is the
3572 revlog would not select the same revisions for the delta. This is the
3570 fastest mode of operation.
3573 fastest mode of operation.
3571 DELTAREUSESAMEREVS
3574 DELTAREUSESAMEREVS
3572 Deltas will be reused if the destination revlog would pick the same
3575 Deltas will be reused if the destination revlog would pick the same
3573 revisions for the delta. This mode strikes a balance between speed
3576 revisions for the delta. This mode strikes a balance between speed
3574 and optimization.
3577 and optimization.
3575 DELTAREUSENEVER
3578 DELTAREUSENEVER
3576 Deltas will never be reused. This is the slowest mode of execution.
3579 Deltas will never be reused. This is the slowest mode of execution.
3577 This mode can be used to recompute deltas (e.g. if the diff/delta
3580 This mode can be used to recompute deltas (e.g. if the diff/delta
3578 algorithm changes).
3581 algorithm changes).
3579 DELTAREUSEFULLADD
3582 DELTAREUSEFULLADD
3580 Revision will be re-added as if their were new content. This is
3583 Revision will be re-added as if their were new content. This is
3581 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3584 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3582 eg: large file detection and handling.
3585 eg: large file detection and handling.
3583
3586
3584 Delta computation can be slow, so the choice of delta reuse policy can
3587 Delta computation can be slow, so the choice of delta reuse policy can
3585 significantly affect run time.
3588 significantly affect run time.
3586
3589
3587 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3590 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3588 two extremes. Deltas will be reused if they are appropriate. But if the
3591 two extremes. Deltas will be reused if they are appropriate. But if the
3589 delta could choose a better revision, it will do so. This means if you
3592 delta could choose a better revision, it will do so. This means if you
3590 are converting a non-generaldelta revlog to a generaldelta revlog,
3593 are converting a non-generaldelta revlog to a generaldelta revlog,
3591 deltas will be recomputed if the delta's parent isn't a parent of the
3594 deltas will be recomputed if the delta's parent isn't a parent of the
3592 revision.
3595 revision.
3593
3596
3594 In addition to the delta policy, the ``forcedeltabothparents``
3597 In addition to the delta policy, the ``forcedeltabothparents``
3595 argument controls whether to force compute deltas against both parents
3598 argument controls whether to force compute deltas against both parents
3596 for merges. By default, the current default is used.
3599 for merges. By default, the current default is used.
3597
3600
3598 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3601 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3599 `sidedata_helpers`.
3602 `sidedata_helpers`.
3600 """
3603 """
3601 if deltareuse not in self.DELTAREUSEALL:
3604 if deltareuse not in self.DELTAREUSEALL:
3602 raise ValueError(
3605 raise ValueError(
3603 _(b'value for deltareuse invalid: %s') % deltareuse
3606 _(b'value for deltareuse invalid: %s') % deltareuse
3604 )
3607 )
3605
3608
3606 if len(destrevlog):
3609 if len(destrevlog):
3607 raise ValueError(_(b'destination revlog is not empty'))
3610 raise ValueError(_(b'destination revlog is not empty'))
3608
3611
3609 if getattr(self, 'filteredrevs', None):
3612 if getattr(self, 'filteredrevs', None):
3610 raise ValueError(_(b'source revlog has filtered revisions'))
3613 raise ValueError(_(b'source revlog has filtered revisions'))
3611 if getattr(destrevlog, 'filteredrevs', None):
3614 if getattr(destrevlog, 'filteredrevs', None):
3612 raise ValueError(_(b'destination revlog has filtered revisions'))
3615 raise ValueError(_(b'destination revlog has filtered revisions'))
3613
3616
3614 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3617 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3615 # if possible.
3618 # if possible.
3616 old_delta_config = destrevlog.delta_config
3619 old_delta_config = destrevlog.delta_config
3617 destrevlog.delta_config = destrevlog.delta_config.copy()
3620 destrevlog.delta_config = destrevlog.delta_config.copy()
3618
3621
3619 try:
3622 try:
3620 if deltareuse == self.DELTAREUSEALWAYS:
3623 if deltareuse == self.DELTAREUSEALWAYS:
3621 destrevlog.delta_config.lazy_delta_base = True
3624 destrevlog.delta_config.lazy_delta_base = True
3622 destrevlog.delta_config.lazy_delta = True
3625 destrevlog.delta_config.lazy_delta = True
3623 elif deltareuse == self.DELTAREUSESAMEREVS:
3626 elif deltareuse == self.DELTAREUSESAMEREVS:
3624 destrevlog.delta_config.lazy_delta_base = False
3627 destrevlog.delta_config.lazy_delta_base = False
3625 destrevlog.delta_config.lazy_delta = True
3628 destrevlog.delta_config.lazy_delta = True
3626 elif deltareuse == self.DELTAREUSENEVER:
3629 elif deltareuse == self.DELTAREUSENEVER:
3627 destrevlog.delta_config.lazy_delta_base = False
3630 destrevlog.delta_config.lazy_delta_base = False
3628 destrevlog.delta_config.lazy_delta = False
3631 destrevlog.delta_config.lazy_delta = False
3629
3632
3630 delta_both_parents = (
3633 delta_both_parents = (
3631 forcedeltabothparents or old_delta_config.delta_both_parents
3634 forcedeltabothparents or old_delta_config.delta_both_parents
3632 )
3635 )
3633 destrevlog.delta_config.delta_both_parents = delta_both_parents
3636 destrevlog.delta_config.delta_both_parents = delta_both_parents
3634
3637
3635 with self.reading(), destrevlog._writing(tr):
3638 with self.reading(), destrevlog._writing(tr):
3636 self._clone(
3639 self._clone(
3637 tr,
3640 tr,
3638 destrevlog,
3641 destrevlog,
3639 addrevisioncb,
3642 addrevisioncb,
3640 deltareuse,
3643 deltareuse,
3641 forcedeltabothparents,
3644 forcedeltabothparents,
3642 sidedata_helpers,
3645 sidedata_helpers,
3643 )
3646 )
3644
3647
3645 finally:
3648 finally:
3646 destrevlog.delta_config = old_delta_config
3649 destrevlog.delta_config = old_delta_config
3647
3650
3648 def _clone(
3651 def _clone(
3649 self,
3652 self,
3650 tr,
3653 tr,
3651 destrevlog,
3654 destrevlog,
3652 addrevisioncb,
3655 addrevisioncb,
3653 deltareuse,
3656 deltareuse,
3654 forcedeltabothparents,
3657 forcedeltabothparents,
3655 sidedata_helpers,
3658 sidedata_helpers,
3656 ):
3659 ):
3657 """perform the core duty of `revlog.clone` after parameter processing"""
3660 """perform the core duty of `revlog.clone` after parameter processing"""
3658 write_debug = None
3661 write_debug = None
3659 if self.delta_config.debug_delta:
3662 if self.delta_config.debug_delta:
3660 write_debug = tr._report
3663 write_debug = tr._report
3661 deltacomputer = deltautil.deltacomputer(
3664 deltacomputer = deltautil.deltacomputer(
3662 destrevlog,
3665 destrevlog,
3663 write_debug=write_debug,
3666 write_debug=write_debug,
3664 )
3667 )
3665 index = self.index
3668 index = self.index
3666 for rev in self:
3669 for rev in self:
3667 entry = index[rev]
3670 entry = index[rev]
3668
3671
3669 # Some classes override linkrev to take filtered revs into
3672 # Some classes override linkrev to take filtered revs into
3670 # account. Use raw entry from index.
3673 # account. Use raw entry from index.
3671 flags = entry[0] & 0xFFFF
3674 flags = entry[0] & 0xFFFF
3672 linkrev = entry[4]
3675 linkrev = entry[4]
3673 p1 = index[entry[5]][7]
3676 p1 = index[entry[5]][7]
3674 p2 = index[entry[6]][7]
3677 p2 = index[entry[6]][7]
3675 node = entry[7]
3678 node = entry[7]
3676
3679
3677 # (Possibly) reuse the delta from the revlog if allowed and
3680 # (Possibly) reuse the delta from the revlog if allowed and
3678 # the revlog chunk is a delta.
3681 # the revlog chunk is a delta.
3679 cachedelta = None
3682 cachedelta = None
3680 rawtext = None
3683 rawtext = None
3681 if deltareuse == self.DELTAREUSEFULLADD:
3684 if deltareuse == self.DELTAREUSEFULLADD:
3682 text = self._revisiondata(rev)
3685 text = self._revisiondata(rev)
3683 sidedata = self.sidedata(rev)
3686 sidedata = self.sidedata(rev)
3684
3687
3685 if sidedata_helpers is not None:
3688 if sidedata_helpers is not None:
3686 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3689 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3687 self, sidedata_helpers, sidedata, rev
3690 self, sidedata_helpers, sidedata, rev
3688 )
3691 )
3689 flags = flags | new_flags[0] & ~new_flags[1]
3692 flags = flags | new_flags[0] & ~new_flags[1]
3690
3693
3691 destrevlog.addrevision(
3694 destrevlog.addrevision(
3692 text,
3695 text,
3693 tr,
3696 tr,
3694 linkrev,
3697 linkrev,
3695 p1,
3698 p1,
3696 p2,
3699 p2,
3697 cachedelta=cachedelta,
3700 cachedelta=cachedelta,
3698 node=node,
3701 node=node,
3699 flags=flags,
3702 flags=flags,
3700 deltacomputer=deltacomputer,
3703 deltacomputer=deltacomputer,
3701 sidedata=sidedata,
3704 sidedata=sidedata,
3702 )
3705 )
3703 else:
3706 else:
3704 if destrevlog.delta_config.lazy_delta:
3707 if destrevlog.delta_config.lazy_delta:
3705 dp = self.deltaparent(rev)
3708 dp = self.deltaparent(rev)
3706 if dp != nullrev:
3709 if dp != nullrev:
3707 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3710 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3708
3711
3709 sidedata = None
3712 sidedata = None
3710 if not cachedelta:
3713 if not cachedelta:
3711 try:
3714 try:
3712 rawtext = self._revisiondata(rev)
3715 rawtext = self._revisiondata(rev)
3713 except error.CensoredNodeError as censored:
3716 except error.CensoredNodeError as censored:
3714 assert flags & REVIDX_ISCENSORED
3717 assert flags & REVIDX_ISCENSORED
3715 rawtext = censored.tombstone
3718 rawtext = censored.tombstone
3716 sidedata = self.sidedata(rev)
3719 sidedata = self.sidedata(rev)
3717 if sidedata is None:
3720 if sidedata is None:
3718 sidedata = self.sidedata(rev)
3721 sidedata = self.sidedata(rev)
3719
3722
3720 if sidedata_helpers is not None:
3723 if sidedata_helpers is not None:
3721 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3724 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3722 self, sidedata_helpers, sidedata, rev
3725 self, sidedata_helpers, sidedata, rev
3723 )
3726 )
3724 flags = flags | new_flags[0] & ~new_flags[1]
3727 flags = flags | new_flags[0] & ~new_flags[1]
3725
3728
3726 destrevlog._addrevision(
3729 destrevlog._addrevision(
3727 node,
3730 node,
3728 rawtext,
3731 rawtext,
3729 tr,
3732 tr,
3730 linkrev,
3733 linkrev,
3731 p1,
3734 p1,
3732 p2,
3735 p2,
3733 flags,
3736 flags,
3734 cachedelta,
3737 cachedelta,
3735 deltacomputer=deltacomputer,
3738 deltacomputer=deltacomputer,
3736 sidedata=sidedata,
3739 sidedata=sidedata,
3737 )
3740 )
3738
3741
3739 if addrevisioncb:
3742 if addrevisioncb:
3740 addrevisioncb(self, rev, node)
3743 addrevisioncb(self, rev, node)
3741
3744
3742 def censorrevision(self, tr, censornode, tombstone=b''):
3745 def censorrevision(self, tr, censornode, tombstone=b''):
3743 if self._format_version == REVLOGV0:
3746 if self._format_version == REVLOGV0:
3744 raise error.RevlogError(
3747 raise error.RevlogError(
3745 _(b'cannot censor with version %d revlogs')
3748 _(b'cannot censor with version %d revlogs')
3746 % self._format_version
3749 % self._format_version
3747 )
3750 )
3748 elif self._format_version == REVLOGV1:
3751 elif self._format_version == REVLOGV1:
3749 rewrite.v1_censor(self, tr, censornode, tombstone)
3752 rewrite.v1_censor(self, tr, censornode, tombstone)
3750 else:
3753 else:
3751 rewrite.v2_censor(self, tr, censornode, tombstone)
3754 rewrite.v2_censor(self, tr, censornode, tombstone)
3752
3755
3753 def verifyintegrity(self, state):
3756 def verifyintegrity(self, state):
3754 """Verifies the integrity of the revlog.
3757 """Verifies the integrity of the revlog.
3755
3758
3756 Yields ``revlogproblem`` instances describing problems that are
3759 Yields ``revlogproblem`` instances describing problems that are
3757 found.
3760 found.
3758 """
3761 """
3759 dd, di = self.checksize()
3762 dd, di = self.checksize()
3760 if dd:
3763 if dd:
3761 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3764 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3762 if di:
3765 if di:
3763 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3766 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3764
3767
3765 version = self._format_version
3768 version = self._format_version
3766
3769
3767 # The verifier tells us what version revlog we should be.
3770 # The verifier tells us what version revlog we should be.
3768 if version != state[b'expectedversion']:
3771 if version != state[b'expectedversion']:
3769 yield revlogproblem(
3772 yield revlogproblem(
3770 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3773 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3771 % (self.display_id, version, state[b'expectedversion'])
3774 % (self.display_id, version, state[b'expectedversion'])
3772 )
3775 )
3773
3776
3774 state[b'skipread'] = set()
3777 state[b'skipread'] = set()
3775 state[b'safe_renamed'] = set()
3778 state[b'safe_renamed'] = set()
3776
3779
3777 for rev in self:
3780 for rev in self:
3778 node = self.node(rev)
3781 node = self.node(rev)
3779
3782
3780 # Verify contents. 4 cases to care about:
3783 # Verify contents. 4 cases to care about:
3781 #
3784 #
3782 # common: the most common case
3785 # common: the most common case
3783 # rename: with a rename
3786 # rename: with a rename
3784 # meta: file content starts with b'\1\n', the metadata
3787 # meta: file content starts with b'\1\n', the metadata
3785 # header defined in filelog.py, but without a rename
3788 # header defined in filelog.py, but without a rename
3786 # ext: content stored externally
3789 # ext: content stored externally
3787 #
3790 #
3788 # More formally, their differences are shown below:
3791 # More formally, their differences are shown below:
3789 #
3792 #
3790 # | common | rename | meta | ext
3793 # | common | rename | meta | ext
3791 # -------------------------------------------------------
3794 # -------------------------------------------------------
3792 # flags() | 0 | 0 | 0 | not 0
3795 # flags() | 0 | 0 | 0 | not 0
3793 # renamed() | False | True | False | ?
3796 # renamed() | False | True | False | ?
3794 # rawtext[0:2]=='\1\n'| False | True | True | ?
3797 # rawtext[0:2]=='\1\n'| False | True | True | ?
3795 #
3798 #
3796 # "rawtext" means the raw text stored in revlog data, which
3799 # "rawtext" means the raw text stored in revlog data, which
3797 # could be retrieved by "rawdata(rev)". "text"
3800 # could be retrieved by "rawdata(rev)". "text"
3798 # mentioned below is "revision(rev)".
3801 # mentioned below is "revision(rev)".
3799 #
3802 #
3800 # There are 3 different lengths stored physically:
3803 # There are 3 different lengths stored physically:
3801 # 1. L1: rawsize, stored in revlog index
3804 # 1. L1: rawsize, stored in revlog index
3802 # 2. L2: len(rawtext), stored in revlog data
3805 # 2. L2: len(rawtext), stored in revlog data
3803 # 3. L3: len(text), stored in revlog data if flags==0, or
3806 # 3. L3: len(text), stored in revlog data if flags==0, or
3804 # possibly somewhere else if flags!=0
3807 # possibly somewhere else if flags!=0
3805 #
3808 #
3806 # L1 should be equal to L2. L3 could be different from them.
3809 # L1 should be equal to L2. L3 could be different from them.
3807 # "text" may or may not affect commit hash depending on flag
3810 # "text" may or may not affect commit hash depending on flag
3808 # processors (see flagutil.addflagprocessor).
3811 # processors (see flagutil.addflagprocessor).
3809 #
3812 #
3810 # | common | rename | meta | ext
3813 # | common | rename | meta | ext
3811 # -------------------------------------------------
3814 # -------------------------------------------------
3812 # rawsize() | L1 | L1 | L1 | L1
3815 # rawsize() | L1 | L1 | L1 | L1
3813 # size() | L1 | L2-LM | L1(*) | L1 (?)
3816 # size() | L1 | L2-LM | L1(*) | L1 (?)
3814 # len(rawtext) | L2 | L2 | L2 | L2
3817 # len(rawtext) | L2 | L2 | L2 | L2
3815 # len(text) | L2 | L2 | L2 | L3
3818 # len(text) | L2 | L2 | L2 | L3
3816 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3819 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3817 #
3820 #
3818 # LM: length of metadata, depending on rawtext
3821 # LM: length of metadata, depending on rawtext
3819 # (*): not ideal, see comment in filelog.size
3822 # (*): not ideal, see comment in filelog.size
3820 # (?): could be "- len(meta)" if the resolved content has
3823 # (?): could be "- len(meta)" if the resolved content has
3821 # rename metadata
3824 # rename metadata
3822 #
3825 #
3823 # Checks needed to be done:
3826 # Checks needed to be done:
3824 # 1. length check: L1 == L2, in all cases.
3827 # 1. length check: L1 == L2, in all cases.
3825 # 2. hash check: depending on flag processor, we may need to
3828 # 2. hash check: depending on flag processor, we may need to
3826 # use either "text" (external), or "rawtext" (in revlog).
3829 # use either "text" (external), or "rawtext" (in revlog).
3827
3830
3828 try:
3831 try:
3829 skipflags = state.get(b'skipflags', 0)
3832 skipflags = state.get(b'skipflags', 0)
3830 if skipflags:
3833 if skipflags:
3831 skipflags &= self.flags(rev)
3834 skipflags &= self.flags(rev)
3832
3835
3833 _verify_revision(self, skipflags, state, node)
3836 _verify_revision(self, skipflags, state, node)
3834
3837
3835 l1 = self.rawsize(rev)
3838 l1 = self.rawsize(rev)
3836 l2 = len(self.rawdata(node))
3839 l2 = len(self.rawdata(node))
3837
3840
3838 if l1 != l2:
3841 if l1 != l2:
3839 yield revlogproblem(
3842 yield revlogproblem(
3840 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3843 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3841 node=node,
3844 node=node,
3842 )
3845 )
3843
3846
3844 except error.CensoredNodeError:
3847 except error.CensoredNodeError:
3845 if state[b'erroroncensored']:
3848 if state[b'erroroncensored']:
3846 yield revlogproblem(
3849 yield revlogproblem(
3847 error=_(b'censored file data'), node=node
3850 error=_(b'censored file data'), node=node
3848 )
3851 )
3849 state[b'skipread'].add(node)
3852 state[b'skipread'].add(node)
3850 except Exception as e:
3853 except Exception as e:
3851 yield revlogproblem(
3854 yield revlogproblem(
3852 error=_(b'unpacking %s: %s')
3855 error=_(b'unpacking %s: %s')
3853 % (short(node), stringutil.forcebytestr(e)),
3856 % (short(node), stringutil.forcebytestr(e)),
3854 node=node,
3857 node=node,
3855 )
3858 )
3856 state[b'skipread'].add(node)
3859 state[b'skipread'].add(node)
3857
3860
3858 def storageinfo(
3861 def storageinfo(
3859 self,
3862 self,
3860 exclusivefiles=False,
3863 exclusivefiles=False,
3861 sharedfiles=False,
3864 sharedfiles=False,
3862 revisionscount=False,
3865 revisionscount=False,
3863 trackedsize=False,
3866 trackedsize=False,
3864 storedsize=False,
3867 storedsize=False,
3865 ):
3868 ):
3866 d = {}
3869 d = {}
3867
3870
3868 if exclusivefiles:
3871 if exclusivefiles:
3869 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3872 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3870 if not self._inline:
3873 if not self._inline:
3871 d[b'exclusivefiles'].append((self.opener, self._datafile))
3874 d[b'exclusivefiles'].append((self.opener, self._datafile))
3872
3875
3873 if sharedfiles:
3876 if sharedfiles:
3874 d[b'sharedfiles'] = []
3877 d[b'sharedfiles'] = []
3875
3878
3876 if revisionscount:
3879 if revisionscount:
3877 d[b'revisionscount'] = len(self)
3880 d[b'revisionscount'] = len(self)
3878
3881
3879 if trackedsize:
3882 if trackedsize:
3880 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3883 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3881
3884
3882 if storedsize:
3885 if storedsize:
3883 d[b'storedsize'] = sum(
3886 d[b'storedsize'] = sum(
3884 self.opener.stat(path).st_size for path in self.files()
3887 self.opener.stat(path).st_size for path in self.files()
3885 )
3888 )
3886
3889
3887 return d
3890 return d
3888
3891
3889 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3892 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3890 if not self.feature_config.has_side_data:
3893 if not self.feature_config.has_side_data:
3891 return
3894 return
3892 # revlog formats with sidedata support does not support inline
3895 # revlog formats with sidedata support does not support inline
3893 assert not self._inline
3896 assert not self._inline
3894 if not helpers[1] and not helpers[2]:
3897 if not helpers[1] and not helpers[2]:
3895 # Nothing to generate or remove
3898 # Nothing to generate or remove
3896 return
3899 return
3897
3900
3898 new_entries = []
3901 new_entries = []
3899 # append the new sidedata
3902 # append the new sidedata
3900 with self._writing(transaction):
3903 with self._writing(transaction):
3901 ifh, dfh, sdfh = self._inner._writinghandles
3904 ifh, dfh, sdfh = self._inner._writinghandles
3902 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3905 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3903
3906
3904 current_offset = sdfh.tell()
3907 current_offset = sdfh.tell()
3905 for rev in range(startrev, endrev + 1):
3908 for rev in range(startrev, endrev + 1):
3906 entry = self.index[rev]
3909 entry = self.index[rev]
3907 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3910 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3908 store=self,
3911 store=self,
3909 sidedata_helpers=helpers,
3912 sidedata_helpers=helpers,
3910 sidedata={},
3913 sidedata={},
3911 rev=rev,
3914 rev=rev,
3912 )
3915 )
3913
3916
3914 serialized_sidedata = sidedatautil.serialize_sidedata(
3917 serialized_sidedata = sidedatautil.serialize_sidedata(
3915 new_sidedata
3918 new_sidedata
3916 )
3919 )
3917
3920
3918 sidedata_compression_mode = COMP_MODE_INLINE
3921 sidedata_compression_mode = COMP_MODE_INLINE
3919 if serialized_sidedata and self.feature_config.has_side_data:
3922 if serialized_sidedata and self.feature_config.has_side_data:
3920 sidedata_compression_mode = COMP_MODE_PLAIN
3923 sidedata_compression_mode = COMP_MODE_PLAIN
3921 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3924 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3922 if (
3925 if (
3923 h != b'u'
3926 h != b'u'
3924 and comp_sidedata[0] != b'\0'
3927 and comp_sidedata[0] != b'\0'
3925 and len(comp_sidedata) < len(serialized_sidedata)
3928 and len(comp_sidedata) < len(serialized_sidedata)
3926 ):
3929 ):
3927 assert not h
3930 assert not h
3928 if (
3931 if (
3929 comp_sidedata[0]
3932 comp_sidedata[0]
3930 == self._docket.default_compression_header
3933 == self._docket.default_compression_header
3931 ):
3934 ):
3932 sidedata_compression_mode = COMP_MODE_DEFAULT
3935 sidedata_compression_mode = COMP_MODE_DEFAULT
3933 serialized_sidedata = comp_sidedata
3936 serialized_sidedata = comp_sidedata
3934 else:
3937 else:
3935 sidedata_compression_mode = COMP_MODE_INLINE
3938 sidedata_compression_mode = COMP_MODE_INLINE
3936 serialized_sidedata = comp_sidedata
3939 serialized_sidedata = comp_sidedata
3937 if entry[8] != 0 or entry[9] != 0:
3940 if entry[8] != 0 or entry[9] != 0:
3938 # rewriting entries that already have sidedata is not
3941 # rewriting entries that already have sidedata is not
3939 # supported yet, because it introduces garbage data in the
3942 # supported yet, because it introduces garbage data in the
3940 # revlog.
3943 # revlog.
3941 msg = b"rewriting existing sidedata is not supported yet"
3944 msg = b"rewriting existing sidedata is not supported yet"
3942 raise error.Abort(msg)
3945 raise error.Abort(msg)
3943
3946
3944 # Apply (potential) flags to add and to remove after running
3947 # Apply (potential) flags to add and to remove after running
3945 # the sidedata helpers
3948 # the sidedata helpers
3946 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3949 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3947 entry_update = (
3950 entry_update = (
3948 current_offset,
3951 current_offset,
3949 len(serialized_sidedata),
3952 len(serialized_sidedata),
3950 new_offset_flags,
3953 new_offset_flags,
3951 sidedata_compression_mode,
3954 sidedata_compression_mode,
3952 )
3955 )
3953
3956
3954 # the sidedata computation might have move the file cursors around
3957 # the sidedata computation might have move the file cursors around
3955 sdfh.seek(current_offset, os.SEEK_SET)
3958 sdfh.seek(current_offset, os.SEEK_SET)
3956 sdfh.write(serialized_sidedata)
3959 sdfh.write(serialized_sidedata)
3957 new_entries.append(entry_update)
3960 new_entries.append(entry_update)
3958 current_offset += len(serialized_sidedata)
3961 current_offset += len(serialized_sidedata)
3959 self._docket.sidedata_end = sdfh.tell()
3962 self._docket.sidedata_end = sdfh.tell()
3960
3963
3961 # rewrite the new index entries
3964 # rewrite the new index entries
3962 ifh.seek(startrev * self.index.entry_size)
3965 ifh.seek(startrev * self.index.entry_size)
3963 for i, e in enumerate(new_entries):
3966 for i, e in enumerate(new_entries):
3964 rev = startrev + i
3967 rev = startrev + i
3965 self.index.replace_sidedata_info(rev, *e)
3968 self.index.replace_sidedata_info(rev, *e)
3966 packed = self.index.entry_binary(rev)
3969 packed = self.index.entry_binary(rev)
3967 if rev == 0 and self._docket is None:
3970 if rev == 0 and self._docket is None:
3968 header = self._format_flags | self._format_version
3971 header = self._format_flags | self._format_version
3969 header = self.index.pack_header(header)
3972 header = self.index.pack_header(header)
3970 packed = header + packed
3973 packed = header + packed
3971 ifh.write(packed)
3974 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now