##// END OF EJS Templates
bundlerepo: fix mismatches with repository and revlog classes...
Matt Harbison -
r52764:cfd30df0 default
parent child Browse files
Show More
@@ -1,766 +1,770
1 # bundlerepo.py - repository class for viewing uncompressed bundles
1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 #
2 #
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Repository class for viewing uncompressed bundles.
8 """Repository class for viewing uncompressed bundles.
9
9
10 This provides a read-only repository interface to bundles as if they
10 This provides a read-only repository interface to bundles as if they
11 were part of the actual repository.
11 were part of the actual repository.
12 """
12 """
13
13
14 from __future__ import annotations
14 from __future__ import annotations
15
15
16 import contextlib
16 import contextlib
17 import os
17 import os
18 import shutil
18 import shutil
19 import typing
19 import typing
20
20
21 from .i18n import _
21 from .i18n import _
22 from .node import (
22 from .node import (
23 hex,
23 hex,
24 nullrev,
24 nullrev,
25 )
25 )
26
26
27 from . import (
27 from . import (
28 bundle2,
28 bundle2,
29 changegroup,
29 changegroup,
30 changelog,
30 changelog,
31 cmdutil,
31 cmdutil,
32 discovery,
32 discovery,
33 encoding,
33 encoding,
34 error,
34 error,
35 exchange,
35 exchange,
36 filelog,
36 filelog,
37 localrepo,
37 localrepo,
38 manifest,
38 manifest,
39 mdiff,
39 mdiff,
40 pathutil,
40 pathutil,
41 phases,
41 phases,
42 pycompat,
42 pycompat,
43 revlog,
43 revlog,
44 revlogutils,
44 revlogutils,
45 util,
45 util,
46 vfs as vfsmod,
46 vfs as vfsmod,
47 )
47 )
48 from .utils import (
48 from .utils import (
49 urlutil,
49 urlutil,
50 )
50 )
51
51
52 from .revlogutils import (
52 from .revlogutils import (
53 constants as revlog_constants,
53 constants as revlog_constants,
54 )
54 )
55
55
56
56
57 class bundlerevlog(revlog.revlog):
57 class bundlerevlog(revlog.revlog):
58 def __init__(self, opener, target, radix, cgunpacker, linkmapper):
58 def __init__(
59 self, opener: typing.Any, target, radix, cgunpacker, linkmapper
60 ):
61 # TODO: figure out real type of opener
62 #
59 # How it works:
63 # How it works:
60 # To retrieve a revision, we need to know the offset of the revision in
64 # To retrieve a revision, we need to know the offset of the revision in
61 # the bundle (an unbundle object). We store this offset in the index
65 # the bundle (an unbundle object). We store this offset in the index
62 # (start). The base of the delta is stored in the base field.
66 # (start). The base of the delta is stored in the base field.
63 #
67 #
64 # To differentiate a rev in the bundle from a rev in the revlog, we
68 # To differentiate a rev in the bundle from a rev in the revlog, we
65 # check revision against repotiprev.
69 # check revision against repotiprev.
66 opener = vfsmod.readonlyvfs(opener)
70 opener = vfsmod.readonlyvfs(opener)
67 revlog.revlog.__init__(self, opener, target=target, radix=radix)
71 revlog.revlog.__init__(self, opener, target=target, radix=radix)
68 self.bundle = cgunpacker
72 self.bundle = cgunpacker
69 n = len(self)
73 n = len(self)
70 self.repotiprev = n - 1
74 self.repotiprev = n - 1
71 self.bundlerevs = set() # used by 'bundle()' revset expression
75 self.bundlerevs = set() # used by 'bundle()' revset expression
72 for deltadata in cgunpacker.deltaiter():
76 for deltadata in cgunpacker.deltaiter():
73 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
77 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
74
78
75 size = len(delta)
79 size = len(delta)
76 start = cgunpacker.tell() - size
80 start = cgunpacker.tell() - size
77
81
78 if self.index.has_node(node):
82 if self.index.has_node(node):
79 # this can happen if two branches make the same change
83 # this can happen if two branches make the same change
80 self.bundlerevs.add(self.index.rev(node))
84 self.bundlerevs.add(self.index.rev(node))
81 continue
85 continue
82 if cs == node:
86 if cs == node:
83 linkrev = nullrev
87 linkrev = nullrev
84 else:
88 else:
85 linkrev = linkmapper(cs)
89 linkrev = linkmapper(cs)
86
90
87 for p in (p1, p2):
91 for p in (p1, p2):
88 if not self.index.has_node(p):
92 if not self.index.has_node(p):
89 raise error.LookupError(
93 raise error.LookupError(
90 p, self.display_id, _(b"unknown parent")
94 p, self.display_id, _(b"unknown parent")
91 )
95 )
92
96
93 if not self.index.has_node(deltabase):
97 if not self.index.has_node(deltabase):
94 raise error.LookupError(
98 raise error.LookupError(
95 deltabase, self.display_id, _(b'unknown delta base')
99 deltabase, self.display_id, _(b'unknown delta base')
96 )
100 )
97
101
98 baserev = self.rev(deltabase)
102 baserev = self.rev(deltabase)
99 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
103 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
100 e = revlogutils.entry(
104 e = revlogutils.entry(
101 flags=flags,
105 flags=flags,
102 data_offset=start,
106 data_offset=start,
103 data_compressed_length=size,
107 data_compressed_length=size,
104 data_delta_base=baserev,
108 data_delta_base=baserev,
105 link_rev=linkrev,
109 link_rev=linkrev,
106 parent_rev_1=self.rev(p1),
110 parent_rev_1=self.rev(p1),
107 parent_rev_2=self.rev(p2),
111 parent_rev_2=self.rev(p2),
108 node_id=node,
112 node_id=node,
109 )
113 )
110 self.index.append(e)
114 self.index.append(e)
111 self.bundlerevs.add(n)
115 self.bundlerevs.add(n)
112 n += 1
116 n += 1
113
117
114 @contextlib.contextmanager
118 @contextlib.contextmanager
115 def reading(self):
119 def reading(self):
116 if self.repotiprev < 0:
120 if self.repotiprev < 0:
117 yield
121 yield
118 else:
122 else:
119 with super().reading() as x:
123 with super().reading() as x:
120 yield x
124 yield x
121
125
122 def _chunk(self, rev):
126 def _chunk(self, rev):
123 # Warning: in case of bundle, the diff is against what we stored as
127 # Warning: in case of bundle, the diff is against what we stored as
124 # delta base, not against rev - 1
128 # delta base, not against rev - 1
125 # XXX: could use some caching
129 # XXX: could use some caching
126 if rev <= self.repotiprev:
130 if rev <= self.repotiprev:
127 return revlog.revlog._chunk(self, rev)
131 return super(bundlerevlog, self)._inner._chunk(rev)
128 self.bundle.seek(self.start(rev))
132 self.bundle.seek(self.start(rev))
129 return self.bundle.read(self.length(rev))
133 return self.bundle.read(self.length(rev))
130
134
131 def revdiff(self, rev1, rev2):
135 def revdiff(self, rev1, rev2):
132 """return or calculate a delta between two revisions"""
136 """return or calculate a delta between two revisions"""
133 if rev1 > self.repotiprev and rev2 > self.repotiprev:
137 if rev1 > self.repotiprev and rev2 > self.repotiprev:
134 # hot path for bundle
138 # hot path for bundle
135 revb = self.index[rev2][3]
139 revb = self.index[rev2][3]
136 if revb == rev1:
140 if revb == rev1:
137 return self._chunk(rev2)
141 return self._chunk(rev2)
138 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
142 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
139 return revlog.revlog.revdiff(self, rev1, rev2)
143 return revlog.revlog.revdiff(self, rev1, rev2)
140
144
141 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
145 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
142
146
143 def _rawtext(self, node, rev):
147 def _rawtext(self, node, rev):
144 if rev is None:
148 if rev is None:
145 rev = self.rev(node)
149 rev = self.rev(node)
146 validated = False
150 validated = False
147 rawtext = None
151 rawtext = None
148 chain = []
152 chain = []
149 iterrev = rev
153 iterrev = rev
150 # reconstruct the revision if it is from a changegroup
154 # reconstruct the revision if it is from a changegroup
151 while iterrev > self.repotiprev:
155 while iterrev > self.repotiprev:
152 if (
156 if (
153 self._inner._revisioncache
157 self._inner._revisioncache
154 and self._inner._revisioncache[1] == iterrev
158 and self._inner._revisioncache[1] == iterrev
155 ):
159 ):
156 rawtext = self._inner._revisioncache[2]
160 rawtext = self._inner._revisioncache[2]
157 break
161 break
158 chain.append(iterrev)
162 chain.append(iterrev)
159 iterrev = self.index[iterrev][3]
163 iterrev = self.index[iterrev][3]
160 if iterrev == nullrev:
164 if iterrev == nullrev:
161 rawtext = b''
165 rawtext = b''
162 elif rawtext is None:
166 elif rawtext is None:
163 r = super(bundlerevlog, self)._rawtext(
167 r = super(bundlerevlog, self)._rawtext(
164 self.node(iterrev),
168 self.node(iterrev),
165 iterrev,
169 iterrev,
166 )
170 )
167 __, rawtext, validated = r
171 __, rawtext, validated = r
168 if chain:
172 if chain:
169 validated = False
173 validated = False
170 while chain:
174 while chain:
171 delta = self._chunk(chain.pop())
175 delta = self._chunk(chain.pop())
172 rawtext = mdiff.patches(rawtext, [delta])
176 rawtext = mdiff.patches(rawtext, [delta])
173 return rev, rawtext, validated
177 return rev, rawtext, validated
174
178
175 def addrevision(self, *args, **kwargs):
179 def addrevision(self, *args, **kwargs):
176 raise NotImplementedError
180 raise NotImplementedError
177
181
178 def addgroup(self, *args, **kwargs):
182 def addgroup(self, *args, **kwargs):
179 raise NotImplementedError
183 raise NotImplementedError
180
184
181 def strip(self, *args, **kwargs):
185 def strip(self, *args, **kwargs):
182 raise NotImplementedError
186 raise NotImplementedError
183
187
184 def checksize(self):
188 def checksize(self):
185 raise NotImplementedError
189 raise NotImplementedError
186
190
187
191
188 class bundlechangelog(bundlerevlog, changelog.changelog):
192 class bundlechangelog(bundlerevlog, changelog.changelog):
189 def __init__(self, opener, cgunpacker):
193 def __init__(self, opener, cgunpacker):
190 changelog.changelog.__init__(self, opener)
194 changelog.changelog.__init__(self, opener)
191 linkmapper = lambda x: x
195 linkmapper = lambda x: x
192 bundlerevlog.__init__(
196 bundlerevlog.__init__(
193 self,
197 self,
194 opener,
198 opener,
195 (revlog_constants.KIND_CHANGELOG, None),
199 (revlog_constants.KIND_CHANGELOG, None),
196 self.radix,
200 self.radix,
197 cgunpacker,
201 cgunpacker,
198 linkmapper,
202 linkmapper,
199 )
203 )
200
204
201
205
202 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
206 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
203 def __init__(
207 def __init__(
204 self,
208 self,
205 nodeconstants,
209 nodeconstants,
206 opener,
210 opener,
207 cgunpacker,
211 cgunpacker,
208 linkmapper,
212 linkmapper,
209 dirlogstarts=None,
213 dirlogstarts=None,
210 dir=b'',
214 dir=b'',
211 ):
215 ):
212 # XXX manifestrevlog is not actually a revlog , so mixing it with
216 # XXX manifestrevlog is not actually a revlog , so mixing it with
213 # bundlerevlog is not a good idea.
217 # bundlerevlog is not a good idea.
214 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
218 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
215 bundlerevlog.__init__(
219 bundlerevlog.__init__(
216 self,
220 self,
217 opener,
221 opener,
218 (revlog_constants.KIND_MANIFESTLOG, dir),
222 (revlog_constants.KIND_MANIFESTLOG, dir),
219 self._revlog.radix,
223 self._revlog.radix,
220 cgunpacker,
224 cgunpacker,
221 linkmapper,
225 linkmapper,
222 )
226 )
223 if dirlogstarts is None:
227 if dirlogstarts is None:
224 dirlogstarts = {}
228 dirlogstarts = {}
225 if self.bundle.version == b"03":
229 if self.bundle.version == b"03":
226 dirlogstarts = _getfilestarts(self.bundle)
230 dirlogstarts = _getfilestarts(self.bundle)
227 self._dirlogstarts = dirlogstarts
231 self._dirlogstarts = dirlogstarts
228 self._linkmapper = linkmapper
232 self._linkmapper = linkmapper
229
233
230 def dirlog(self, d):
234 def dirlog(self, d):
231 if d in self._dirlogstarts:
235 if d in self._dirlogstarts:
232 self.bundle.seek(self._dirlogstarts[d])
236 self.bundle.seek(self._dirlogstarts[d])
233 return bundlemanifest(
237 return bundlemanifest(
234 self.nodeconstants,
238 self.nodeconstants,
235 self.opener,
239 self.opener,
236 self.bundle,
240 self.bundle,
237 self._linkmapper,
241 self._linkmapper,
238 self._dirlogstarts,
242 self._dirlogstarts,
239 dir=d,
243 dir=d,
240 )
244 )
241 return super(bundlemanifest, self).dirlog(d)
245 return super(bundlemanifest, self).dirlog(d)
242
246
243
247
244 class bundlefilelog(filelog.filelog):
248 class bundlefilelog(filelog.filelog):
245 def __init__(self, opener, path, cgunpacker, linkmapper):
249 def __init__(self, opener, path, cgunpacker, linkmapper):
246 filelog.filelog.__init__(self, opener, path)
250 filelog.filelog.__init__(self, opener, path)
247 self._revlog = bundlerevlog(
251 self._revlog = bundlerevlog(
248 opener,
252 opener,
249 # XXX should use the unencoded path
253 # XXX should use the unencoded path
250 target=(revlog_constants.KIND_FILELOG, path),
254 target=(revlog_constants.KIND_FILELOG, path),
251 radix=self._revlog.radix,
255 radix=self._revlog.radix,
252 cgunpacker=cgunpacker,
256 cgunpacker=cgunpacker,
253 linkmapper=linkmapper,
257 linkmapper=linkmapper,
254 )
258 )
255
259
256
260
257 class bundlepeer(localrepo.localpeer):
261 class bundlepeer(localrepo.localpeer):
258 def canpush(self):
262 def canpush(self):
259 return False
263 return False
260
264
261
265
262 class bundlephasecache(phases.phasecache):
266 class bundlephasecache(phases.phasecache):
263 def __init__(self, *args, **kwargs):
267 def __init__(self, *args, **kwargs):
264 super(bundlephasecache, self).__init__(*args, **kwargs)
268 super(bundlephasecache, self).__init__(*args, **kwargs)
265 if hasattr(self, 'opener'):
269 if hasattr(self, 'opener'):
266 self.opener = vfsmod.readonlyvfs(self.opener)
270 self.opener = vfsmod.readonlyvfs(self.opener)
267
271
268 def write(self):
272 def write(self, repo):
269 raise NotImplementedError
273 raise NotImplementedError
270
274
271 def _write(self, fp):
275 def _write(self, repo, fp):
272 raise NotImplementedError
276 raise NotImplementedError
273
277
274 def _updateroots(self, repo, phase, newroots, tr, invalidate=True):
278 def _updateroots(self, repo, phase, newroots, tr, invalidate=True):
275 self._phaseroots[phase] = newroots
279 self._phaseroots[phase] = newroots
276 if invalidate:
280 if invalidate:
277 self.invalidate()
281 self.invalidate()
278 self.dirty = True
282 self.dirty = True
279
283
280
284
281 def _getfilestarts(cgunpacker):
285 def _getfilestarts(cgunpacker):
282 filespos = {}
286 filespos = {}
283 for chunkdata in iter(cgunpacker.filelogheader, {}):
287 for chunkdata in iter(cgunpacker.filelogheader, {}):
284 fname = chunkdata[b'filename']
288 fname = chunkdata[b'filename']
285 filespos[fname] = cgunpacker.tell()
289 filespos[fname] = cgunpacker.tell()
286 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
290 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
287 pass
291 pass
288 return filespos
292 return filespos
289
293
290
294
291 _bundle_repo_baseclass = object
295 _bundle_repo_baseclass = object
292
296
293 if typing.TYPE_CHECKING:
297 if typing.TYPE_CHECKING:
294 _bundle_repo_baseclass = localrepo.localrepository
298 _bundle_repo_baseclass = localrepo.localrepository
295
299
296
300
297 class bundlerepository(_bundle_repo_baseclass):
301 class bundlerepository(_bundle_repo_baseclass):
298 """A repository instance that is a union of a local repo and a bundle.
302 """A repository instance that is a union of a local repo and a bundle.
299
303
300 Instances represent a read-only repository composed of a local repository
304 Instances represent a read-only repository composed of a local repository
301 with the contents of a bundle file applied. The repository instance is
305 with the contents of a bundle file applied. The repository instance is
302 conceptually similar to the state of a repository after an
306 conceptually similar to the state of a repository after an
303 ``hg unbundle`` operation. However, the contents of the bundle are never
307 ``hg unbundle`` operation. However, the contents of the bundle are never
304 applied to the actual base repository.
308 applied to the actual base repository.
305
309
306 Instances constructed directly are not usable as repository objects.
310 Instances constructed directly are not usable as repository objects.
307 Use instance() or makebundlerepository() to create instances.
311 Use instance() or makebundlerepository() to create instances.
308 """
312 """
309
313
310 def __init__(self, bundlepath, url, tempparent):
314 def __init__(self, bundlepath, url, tempparent):
311 self._tempparent = tempparent
315 self._tempparent = tempparent
312 self._url = url
316 self._url = url
313
317
314 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
318 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
315
319
316 # dict with the mapping 'filename' -> position in the changegroup.
320 # dict with the mapping 'filename' -> position in the changegroup.
317 self._cgfilespos = {}
321 self._cgfilespos = {}
318 self._bundlefile = None
322 self._bundlefile = None
319 self._cgunpacker = None
323 self._cgunpacker = None
320 self.tempfile = None
324 self.tempfile = None
321 f = util.posixfile(bundlepath, b"rb")
325 f = util.posixfile(bundlepath, b"rb")
322 bundle = exchange.readbundle(self.ui, f, bundlepath)
326 bundle = exchange.readbundle(self.ui, f, bundlepath)
323
327
324 if isinstance(bundle, bundle2.unbundle20):
328 if isinstance(bundle, bundle2.unbundle20):
325 self._bundlefile = bundle
329 self._bundlefile = bundle
326
330
327 cgpart = None
331 cgpart = None
328 for part in bundle.iterparts(seekable=True):
332 for part in bundle.iterparts(seekable=True):
329 if part.type == b'phase-heads':
333 if part.type == b'phase-heads':
330 self._handle_bundle2_phase_part(bundle, part)
334 self._handle_bundle2_phase_part(bundle, part)
331 elif part.type == b'changegroup':
335 elif part.type == b'changegroup':
332 if cgpart:
336 if cgpart:
333 raise NotImplementedError(
337 raise NotImplementedError(
334 b"can't process multiple changegroups"
338 b"can't process multiple changegroups"
335 )
339 )
336 cgpart = part
340 cgpart = part
337 self._handle_bundle2_cg_part(bundle, part)
341 self._handle_bundle2_cg_part(bundle, part)
338
342
339 if not cgpart:
343 if not cgpart:
340 raise error.Abort(_(b"No changegroups found"))
344 raise error.Abort(_(b"No changegroups found"))
341
345
342 # This is required to placate a later consumer, which expects
346 # This is required to placate a later consumer, which expects
343 # the payload offset to be at the beginning of the changegroup.
347 # the payload offset to be at the beginning of the changegroup.
344 # We need to do this after the iterparts() generator advances
348 # We need to do this after the iterparts() generator advances
345 # because iterparts() will seek to end of payload after the
349 # because iterparts() will seek to end of payload after the
346 # generator returns control to iterparts().
350 # generator returns control to iterparts().
347 cgpart.seek(0, os.SEEK_SET)
351 cgpart.seek(0, os.SEEK_SET)
348
352
349 elif isinstance(bundle, changegroup.cg1unpacker):
353 elif isinstance(bundle, changegroup.cg1unpacker):
350 self._handle_bundle1(bundle, bundlepath)
354 self._handle_bundle1(bundle, bundlepath)
351 else:
355 else:
352 raise error.Abort(
356 raise error.Abort(
353 _(b'bundle type %r cannot be read') % type(bundle)
357 _(b'bundle type %r cannot be read') % type(bundle)
354 )
358 )
355
359
356 def _handle_bundle1(self, bundle, bundlepath):
360 def _handle_bundle1(self, bundle, bundlepath):
357 if bundle.compressed():
361 if bundle.compressed():
358 f = self._writetempbundle(bundle.read, b'.hg10un', header=b'HG10UN')
362 f = self._writetempbundle(bundle.read, b'.hg10un', header=b'HG10UN')
359 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
363 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
360
364
361 self._bundlefile = bundle
365 self._bundlefile = bundle
362 self._cgunpacker = bundle
366 self._cgunpacker = bundle
363
367
364 self.firstnewrev = self.changelog.repotiprev + 1
368 self.firstnewrev = self.changelog.repotiprev + 1
365 phases.retractboundary(
369 phases.retractboundary(
366 self,
370 self,
367 None,
371 None,
368 phases.draft,
372 phases.draft,
369 [ctx.node() for ctx in self[self.firstnewrev :]],
373 [ctx.node() for ctx in self[self.firstnewrev :]],
370 )
374 )
371
375
372 def _handle_bundle2_cg_part(self, bundle, part):
376 def _handle_bundle2_cg_part(self, bundle, part):
373 assert part.type == b'changegroup'
377 assert part.type == b'changegroup'
374 cgstream = part
378 cgstream = part
375 targetphase = part.params.get(b'targetphase')
379 targetphase = part.params.get(b'targetphase')
376 try:
380 try:
377 targetphase = int(targetphase)
381 targetphase = int(targetphase)
378 except TypeError:
382 except TypeError:
379 pass
383 pass
380 if targetphase is None:
384 if targetphase is None:
381 targetphase = phases.draft
385 targetphase = phases.draft
382 if targetphase not in phases.allphases:
386 if targetphase not in phases.allphases:
383 m = _(b'unsupported targetphase: %d')
387 m = _(b'unsupported targetphase: %d')
384 m %= targetphase
388 m %= targetphase
385 raise error.Abort(m)
389 raise error.Abort(m)
386 version = part.params.get(b'version', b'01')
390 version = part.params.get(b'version', b'01')
387 legalcgvers = changegroup.supportedincomingversions(self)
391 legalcgvers = changegroup.supportedincomingversions(self)
388 if version not in legalcgvers:
392 if version not in legalcgvers:
389 msg = _(b'Unsupported changegroup version: %s')
393 msg = _(b'Unsupported changegroup version: %s')
390 raise error.Abort(msg % version)
394 raise error.Abort(msg % version)
391 if bundle.compressed():
395 if bundle.compressed():
392 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
396 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
393
397
394 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
398 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
395
399
396 self.firstnewrev = self.changelog.repotiprev + 1
400 self.firstnewrev = self.changelog.repotiprev + 1
397 phases.retractboundary(
401 phases.retractboundary(
398 self,
402 self,
399 None,
403 None,
400 targetphase,
404 targetphase,
401 [ctx.node() for ctx in self[self.firstnewrev :]],
405 [ctx.node() for ctx in self[self.firstnewrev :]],
402 )
406 )
403
407
404 def _handle_bundle2_phase_part(self, bundle, part):
408 def _handle_bundle2_phase_part(self, bundle, part):
405 assert part.type == b'phase-heads'
409 assert part.type == b'phase-heads'
406
410
407 unfi = self.unfiltered()
411 unfi = self.unfiltered()
408 headsbyphase = phases.binarydecode(part)
412 headsbyphase = phases.binarydecode(part)
409 phases.updatephases(unfi, lambda: None, headsbyphase)
413 phases.updatephases(unfi, lambda: None, headsbyphase)
410
414
411 def _writetempbundle(self, readfn, suffix, header=b''):
415 def _writetempbundle(self, readfn, suffix, header=b''):
412 """Write a temporary file to disk"""
416 """Write a temporary file to disk"""
413 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
417 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
414 self.tempfile = temp
418 self.tempfile = temp
415
419
416 with os.fdopen(fdtemp, 'wb') as fptemp:
420 with os.fdopen(fdtemp, 'wb') as fptemp:
417 fptemp.write(header)
421 fptemp.write(header)
418 while True:
422 while True:
419 chunk = readfn(2**18)
423 chunk = readfn(2**18)
420 if not chunk:
424 if not chunk:
421 break
425 break
422 fptemp.write(chunk)
426 fptemp.write(chunk)
423
427
424 return self.vfs.open(self.tempfile, mode=b"rb")
428 return self.vfs.open(self.tempfile, mode=b"rb")
425
429
426 @localrepo.unfilteredpropertycache
430 @localrepo.unfilteredpropertycache
427 def _phasecache(self):
431 def _phasecache(self):
428 return bundlephasecache(self, self._phasedefaults)
432 return bundlephasecache(self, self._phasedefaults)
429
433
430 @localrepo.unfilteredpropertycache
434 @localrepo.unfilteredpropertycache
431 def changelog(self):
435 def changelog(self):
432 # consume the header if it exists
436 # consume the header if it exists
433 self._cgunpacker.changelogheader()
437 self._cgunpacker.changelogheader()
434 c = bundlechangelog(self.svfs, self._cgunpacker)
438 c = bundlechangelog(self.svfs, self._cgunpacker)
435 self.manstart = self._cgunpacker.tell()
439 self.manstart = self._cgunpacker.tell()
436 return c
440 return c
437
441
438 def _refreshchangelog(self):
442 def _refreshchangelog(self):
439 # changelog for bundle repo are not filecache, this method is not
443 # changelog for bundle repo are not filecache, this method is not
440 # applicable.
444 # applicable.
441 pass
445 pass
442
446
443 @localrepo.unfilteredpropertycache
447 @localrepo.unfilteredpropertycache
444 def manifestlog(self):
448 def manifestlog(self):
445 self._cgunpacker.seek(self.manstart)
449 self._cgunpacker.seek(self.manstart)
446 # consume the header if it exists
450 # consume the header if it exists
447 self._cgunpacker.manifestheader()
451 self._cgunpacker.manifestheader()
448 linkmapper = self.unfiltered().changelog.rev
452 linkmapper = self.unfiltered().changelog.rev
449 rootstore = bundlemanifest(
453 rootstore = bundlemanifest(
450 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
454 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
451 )
455 )
452 self.filestart = self._cgunpacker.tell()
456 self.filestart = self._cgunpacker.tell()
453
457
454 return manifest.manifestlog(
458 return manifest.manifestlog(
455 self.svfs, self, rootstore, self.narrowmatch()
459 self.svfs, self, rootstore, self.narrowmatch()
456 )
460 )
457
461
458 def _consumemanifest(self):
462 def _consumemanifest(self):
459 """Consumes the manifest portion of the bundle, setting filestart so the
463 """Consumes the manifest portion of the bundle, setting filestart so the
460 file portion can be read."""
464 file portion can be read."""
461 self._cgunpacker.seek(self.manstart)
465 self._cgunpacker.seek(self.manstart)
462 self._cgunpacker.manifestheader()
466 self._cgunpacker.manifestheader()
463 for delta in self._cgunpacker.deltaiter():
467 for delta in self._cgunpacker.deltaiter():
464 pass
468 pass
465 self.filestart = self._cgunpacker.tell()
469 self.filestart = self._cgunpacker.tell()
466
470
467 @localrepo.unfilteredpropertycache
471 @localrepo.unfilteredpropertycache
468 def manstart(self):
472 def manstart(self):
469 self.changelog
473 self.changelog
470 return self.manstart
474 return self.manstart
471
475
472 @localrepo.unfilteredpropertycache
476 @localrepo.unfilteredpropertycache
473 def filestart(self):
477 def filestart(self):
474 self.manifestlog
478 self.manifestlog
475
479
476 # If filestart was not set by self.manifestlog, that means the
480 # If filestart was not set by self.manifestlog, that means the
477 # manifestlog implementation did not consume the manifests from the
481 # manifestlog implementation did not consume the manifests from the
478 # changegroup (ex: it might be consuming trees from a separate bundle2
482 # changegroup (ex: it might be consuming trees from a separate bundle2
479 # part instead). So we need to manually consume it.
483 # part instead). So we need to manually consume it.
480 if 'filestart' not in self.__dict__:
484 if 'filestart' not in self.__dict__:
481 self._consumemanifest()
485 self._consumemanifest()
482
486
483 return self.filestart
487 return self.filestart
484
488
485 def url(self):
489 def url(self):
486 return self._url
490 return self._url
487
491
488 def file(self, f):
492 def file(self, f):
489 if not self._cgfilespos:
493 if not self._cgfilespos:
490 self._cgunpacker.seek(self.filestart)
494 self._cgunpacker.seek(self.filestart)
491 self._cgfilespos = _getfilestarts(self._cgunpacker)
495 self._cgfilespos = _getfilestarts(self._cgunpacker)
492
496
493 if f in self._cgfilespos:
497 if f in self._cgfilespos:
494 self._cgunpacker.seek(self._cgfilespos[f])
498 self._cgunpacker.seek(self._cgfilespos[f])
495 linkmapper = self.unfiltered().changelog.rev
499 linkmapper = self.unfiltered().changelog.rev
496 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
500 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
497 else:
501 else:
498 return super(bundlerepository, self).file(f)
502 return super(bundlerepository, self).file(f)
499
503
500 def close(self):
504 def close(self):
501 """Close assigned bundle file immediately."""
505 """Close assigned bundle file immediately."""
502 self._bundlefile.close()
506 self._bundlefile.close()
503 if self.tempfile is not None:
507 if self.tempfile is not None:
504 self.vfs.unlink(self.tempfile)
508 self.vfs.unlink(self.tempfile)
505 if self._tempparent:
509 if self._tempparent:
506 shutil.rmtree(self._tempparent, True)
510 shutil.rmtree(self._tempparent, True)
507
511
508 def cancopy(self):
512 def cancopy(self):
509 return False
513 return False
510
514
511 def peer(self, path=None, remotehidden=False):
515 def peer(self, path=None, remotehidden=False):
512 return bundlepeer(self, path=path, remotehidden=remotehidden)
516 return bundlepeer(self, path=path, remotehidden=remotehidden)
513
517
514 def getcwd(self):
518 def getcwd(self):
515 return encoding.getcwd() # always outside the repo
519 return encoding.getcwd() # always outside the repo
516
520
517 # Check if parents exist in localrepo before setting
521 # Check if parents exist in localrepo before setting
518 def setparents(self, p1, p2=None):
522 def setparents(self, p1, p2=None):
519 if p2 is None:
523 if p2 is None:
520 p2 = self.nullid
524 p2 = self.nullid
521 p1rev = self.changelog.rev(p1)
525 p1rev = self.changelog.rev(p1)
522 p2rev = self.changelog.rev(p2)
526 p2rev = self.changelog.rev(p2)
523 msg = _(b"setting parent to node %s that only exists in the bundle\n")
527 msg = _(b"setting parent to node %s that only exists in the bundle\n")
524 if self.changelog.repotiprev < p1rev:
528 if self.changelog.repotiprev < p1rev:
525 self.ui.warn(msg % hex(p1))
529 self.ui.warn(msg % hex(p1))
526 if self.changelog.repotiprev < p2rev:
530 if self.changelog.repotiprev < p2rev:
527 self.ui.warn(msg % hex(p2))
531 self.ui.warn(msg % hex(p2))
528 return super(bundlerepository, self).setparents(p1, p2)
532 return super(bundlerepository, self).setparents(p1, p2)
529
533
530
534
531 def instance(ui, path, create, intents=None, createopts=None):
535 def instance(ui, path, create, intents=None, createopts=None):
532 if create:
536 if create:
533 raise error.Abort(_(b'cannot create new bundle repository'))
537 raise error.Abort(_(b'cannot create new bundle repository'))
534 # internal config: bundle.mainreporoot
538 # internal config: bundle.mainreporoot
535 parentpath = ui.config(b"bundle", b"mainreporoot")
539 parentpath = ui.config(b"bundle", b"mainreporoot")
536 if not parentpath:
540 if not parentpath:
537 # try to find the correct path to the working directory repo
541 # try to find the correct path to the working directory repo
538 parentpath = cmdutil.findrepo(encoding.getcwd())
542 parentpath = cmdutil.findrepo(encoding.getcwd())
539 if parentpath is None:
543 if parentpath is None:
540 parentpath = b''
544 parentpath = b''
541 if parentpath:
545 if parentpath:
542 # Try to make the full path relative so we get a nice, short URL.
546 # Try to make the full path relative so we get a nice, short URL.
543 # In particular, we don't want temp dir names in test outputs.
547 # In particular, we don't want temp dir names in test outputs.
544 cwd = encoding.getcwd()
548 cwd = encoding.getcwd()
545 if parentpath == cwd:
549 if parentpath == cwd:
546 parentpath = b''
550 parentpath = b''
547 else:
551 else:
548 cwd = pathutil.normasprefix(cwd)
552 cwd = pathutil.normasprefix(cwd)
549 if parentpath.startswith(cwd):
553 if parentpath.startswith(cwd):
550 parentpath = parentpath[len(cwd) :]
554 parentpath = parentpath[len(cwd) :]
551 u = urlutil.url(path)
555 u = urlutil.url(path)
552 path = u.localpath()
556 path = u.localpath()
553 if u.scheme == b'bundle':
557 if u.scheme == b'bundle':
554 s = path.split(b"+", 1)
558 s = path.split(b"+", 1)
555 if len(s) == 1:
559 if len(s) == 1:
556 repopath, bundlename = parentpath, s[0]
560 repopath, bundlename = parentpath, s[0]
557 else:
561 else:
558 repopath, bundlename = s
562 repopath, bundlename = s
559 else:
563 else:
560 repopath, bundlename = parentpath, path
564 repopath, bundlename = parentpath, path
561
565
562 return makebundlerepository(ui, repopath, bundlename)
566 return makebundlerepository(ui, repopath, bundlename)
563
567
564
568
565 def makebundlerepository(ui, repopath, bundlepath):
569 def makebundlerepository(ui, repopath, bundlepath):
566 """Make a bundle repository object based on repo and bundle paths."""
570 """Make a bundle repository object based on repo and bundle paths."""
567 if repopath:
571 if repopath:
568 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
572 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
569 else:
573 else:
570 url = b'bundle:%s' % bundlepath
574 url = b'bundle:%s' % bundlepath
571
575
572 # Because we can't make any guarantees about the type of the base
576 # Because we can't make any guarantees about the type of the base
573 # repository, we can't have a static class representing the bundle
577 # repository, we can't have a static class representing the bundle
574 # repository. We also can't make any guarantees about how to even
578 # repository. We also can't make any guarantees about how to even
575 # call the base repository's constructor!
579 # call the base repository's constructor!
576 #
580 #
577 # So, our strategy is to go through ``localrepo.instance()`` to construct
581 # So, our strategy is to go through ``localrepo.instance()`` to construct
578 # a repo instance. Then, we dynamically create a new type derived from
582 # a repo instance. Then, we dynamically create a new type derived from
579 # both it and our ``bundlerepository`` class which overrides some
583 # both it and our ``bundlerepository`` class which overrides some
580 # functionality. We then change the type of the constructed repository
584 # functionality. We then change the type of the constructed repository
581 # to this new type and initialize the bundle-specific bits of it.
585 # to this new type and initialize the bundle-specific bits of it.
582
586
583 try:
587 try:
584 repo = localrepo.instance(ui, repopath, create=False)
588 repo = localrepo.instance(ui, repopath, create=False)
585 tempparent = None
589 tempparent = None
586 except error.RequirementError:
590 except error.RequirementError:
587 raise # no fallback if the backing repo is unsupported
591 raise # no fallback if the backing repo is unsupported
588 except error.RepoError:
592 except error.RepoError:
589 tempparent = pycompat.mkdtemp()
593 tempparent = pycompat.mkdtemp()
590 try:
594 try:
591 repo = localrepo.instance(ui, tempparent, create=True)
595 repo = localrepo.instance(ui, tempparent, create=True)
592 except Exception:
596 except Exception:
593 shutil.rmtree(tempparent)
597 shutil.rmtree(tempparent)
594 raise
598 raise
595
599
596 class derivedbundlerepository(bundlerepository, repo.__class__):
600 class derivedbundlerepository(bundlerepository, repo.__class__):
597 pass
601 pass
598
602
599 repo.__class__ = derivedbundlerepository
603 repo.__class__ = derivedbundlerepository
600 bundlerepository.__init__(repo, bundlepath, url, tempparent)
604 bundlerepository.__init__(repo, bundlepath, url, tempparent)
601
605
602 return repo
606 return repo
603
607
604
608
605 class bundletransactionmanager:
609 class bundletransactionmanager:
606 def transaction(self):
610 def transaction(self):
607 return None
611 return None
608
612
609 def close(self):
613 def close(self):
610 raise NotImplementedError
614 raise NotImplementedError
611
615
612 def release(self):
616 def release(self):
613 raise NotImplementedError
617 raise NotImplementedError
614
618
615
619
616 def getremotechanges(
620 def getremotechanges(
617 ui, repo, peer, onlyheads=None, bundlename=None, force=False
621 ui, repo, peer, onlyheads=None, bundlename=None, force=False
618 ):
622 ):
619 """obtains a bundle of changes incoming from peer
623 """obtains a bundle of changes incoming from peer
620
624
621 "onlyheads" restricts the returned changes to those reachable from the
625 "onlyheads" restricts the returned changes to those reachable from the
622 specified heads.
626 specified heads.
623 "bundlename", if given, stores the bundle to this file path permanently;
627 "bundlename", if given, stores the bundle to this file path permanently;
624 otherwise it's stored to a temp file and gets deleted again when you call
628 otherwise it's stored to a temp file and gets deleted again when you call
625 the returned "cleanupfn".
629 the returned "cleanupfn".
626 "force" indicates whether to proceed on unrelated repos.
630 "force" indicates whether to proceed on unrelated repos.
627
631
628 Returns a tuple (local, csets, cleanupfn):
632 Returns a tuple (local, csets, cleanupfn):
629
633
630 "local" is a local repo from which to obtain the actual incoming
634 "local" is a local repo from which to obtain the actual incoming
631 changesets; it is a bundlerepo for the obtained bundle when the
635 changesets; it is a bundlerepo for the obtained bundle when the
632 original "peer" is remote.
636 original "peer" is remote.
633 "csets" lists the incoming changeset node ids.
637 "csets" lists the incoming changeset node ids.
634 "cleanupfn" must be called without arguments when you're done processing
638 "cleanupfn" must be called without arguments when you're done processing
635 the changes; it closes both the original "peer" and the one returned
639 the changes; it closes both the original "peer" and the one returned
636 here.
640 here.
637 """
641 """
638 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
642 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
639 common, incoming, rheads = tmp
643 common, incoming, rheads = tmp
640 if not incoming:
644 if not incoming:
641 try:
645 try:
642 if bundlename:
646 if bundlename:
643 os.unlink(bundlename)
647 os.unlink(bundlename)
644 except OSError:
648 except OSError:
645 pass
649 pass
646 return repo, [], peer.close
650 return repo, [], peer.close
647
651
648 commonset = set(common)
652 commonset = set(common)
649 rheads = [x for x in rheads if x not in commonset]
653 rheads = [x for x in rheads if x not in commonset]
650
654
651 bundle = None
655 bundle = None
652 bundlerepo = None
656 bundlerepo = None
653 localrepo = peer.local()
657 localrepo = peer.local()
654 if bundlename or not localrepo:
658 if bundlename or not localrepo:
655 # create a bundle (uncompressed if peer repo is not local)
659 # create a bundle (uncompressed if peer repo is not local)
656
660
657 # developer config: devel.legacy.exchange
661 # developer config: devel.legacy.exchange
658 legexc = ui.configlist(b'devel', b'legacy.exchange')
662 legexc = ui.configlist(b'devel', b'legacy.exchange')
659 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
663 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
660 canbundle2 = (
664 canbundle2 = (
661 not forcebundle1
665 not forcebundle1
662 and peer.capable(b'getbundle')
666 and peer.capable(b'getbundle')
663 and peer.capable(b'bundle2')
667 and peer.capable(b'bundle2')
664 )
668 )
665 if canbundle2:
669 if canbundle2:
666 with peer.commandexecutor() as e:
670 with peer.commandexecutor() as e:
667 b2 = e.callcommand(
671 b2 = e.callcommand(
668 b'getbundle',
672 b'getbundle',
669 {
673 {
670 b'source': b'incoming',
674 b'source': b'incoming',
671 b'common': common,
675 b'common': common,
672 b'heads': rheads,
676 b'heads': rheads,
673 b'bundlecaps': exchange.caps20to10(
677 b'bundlecaps': exchange.caps20to10(
674 repo, role=b'client'
678 repo, role=b'client'
675 ),
679 ),
676 b'cg': True,
680 b'cg': True,
677 },
681 },
678 ).result()
682 ).result()
679
683
680 fname = bundle = changegroup.writechunks(
684 fname = bundle = changegroup.writechunks(
681 ui, b2._forwardchunks(), bundlename
685 ui, b2._forwardchunks(), bundlename
682 )
686 )
683 else:
687 else:
684 if peer.capable(b'getbundle'):
688 if peer.capable(b'getbundle'):
685 with peer.commandexecutor() as e:
689 with peer.commandexecutor() as e:
686 cg = e.callcommand(
690 cg = e.callcommand(
687 b'getbundle',
691 b'getbundle',
688 {
692 {
689 b'source': b'incoming',
693 b'source': b'incoming',
690 b'common': common,
694 b'common': common,
691 b'heads': rheads,
695 b'heads': rheads,
692 },
696 },
693 ).result()
697 ).result()
694 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
698 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
695 # compat with older servers when pulling all remote heads
699 # compat with older servers when pulling all remote heads
696
700
697 with peer.commandexecutor() as e:
701 with peer.commandexecutor() as e:
698 cg = e.callcommand(
702 cg = e.callcommand(
699 b'changegroup',
703 b'changegroup',
700 {
704 {
701 b'nodes': incoming,
705 b'nodes': incoming,
702 b'source': b'incoming',
706 b'source': b'incoming',
703 },
707 },
704 ).result()
708 ).result()
705
709
706 rheads = None
710 rheads = None
707 else:
711 else:
708 with peer.commandexecutor() as e:
712 with peer.commandexecutor() as e:
709 cg = e.callcommand(
713 cg = e.callcommand(
710 b'changegroupsubset',
714 b'changegroupsubset',
711 {
715 {
712 b'bases': incoming,
716 b'bases': incoming,
713 b'heads': rheads,
717 b'heads': rheads,
714 b'source': b'incoming',
718 b'source': b'incoming',
715 },
719 },
716 ).result()
720 ).result()
717
721
718 if localrepo:
722 if localrepo:
719 bundletype = b"HG10BZ"
723 bundletype = b"HG10BZ"
720 else:
724 else:
721 bundletype = b"HG10UN"
725 bundletype = b"HG10UN"
722 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
726 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
723 # keep written bundle?
727 # keep written bundle?
724 if bundlename:
728 if bundlename:
725 bundle = None
729 bundle = None
726 if not localrepo:
730 if not localrepo:
727 # use the created uncompressed bundlerepo
731 # use the created uncompressed bundlerepo
728 localrepo = bundlerepo = makebundlerepository(
732 localrepo = bundlerepo = makebundlerepository(
729 repo.baseui, repo.root, fname
733 repo.baseui, repo.root, fname
730 )
734 )
731
735
732 # this repo contains local and peer now, so filter out local again
736 # this repo contains local and peer now, so filter out local again
733 common = repo.heads()
737 common = repo.heads()
734 if localrepo:
738 if localrepo:
735 # Part of common may be remotely filtered
739 # Part of common may be remotely filtered
736 # So use an unfiltered version
740 # So use an unfiltered version
737 # The discovery process probably need cleanup to avoid that
741 # The discovery process probably need cleanup to avoid that
738 localrepo = localrepo.unfiltered()
742 localrepo = localrepo.unfiltered()
739
743
740 csets = localrepo.changelog.findmissing(common, rheads)
744 csets = localrepo.changelog.findmissing(common, rheads)
741
745
742 if bundlerepo:
746 if bundlerepo:
743 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
747 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
744
748
745 with peer.commandexecutor() as e:
749 with peer.commandexecutor() as e:
746 remotephases = e.callcommand(
750 remotephases = e.callcommand(
747 b'listkeys',
751 b'listkeys',
748 {
752 {
749 b'namespace': b'phases',
753 b'namespace': b'phases',
750 },
754 },
751 ).result()
755 ).result()
752
756
753 pullop = exchange.pulloperation(
757 pullop = exchange.pulloperation(
754 bundlerepo, peer, path=None, heads=reponodes
758 bundlerepo, peer, path=None, heads=reponodes
755 )
759 )
756 pullop.trmanager = bundletransactionmanager()
760 pullop.trmanager = bundletransactionmanager()
757 exchange._pullapplyphases(pullop, remotephases)
761 exchange._pullapplyphases(pullop, remotephases)
758
762
759 def cleanup():
763 def cleanup():
760 if bundlerepo:
764 if bundlerepo:
761 bundlerepo.close()
765 bundlerepo.close()
762 if bundle:
766 if bundle:
763 os.unlink(bundle)
767 os.unlink(bundle)
764 peer.close()
768 peer.close()
765
769
766 return (localrepo, csets, cleanup)
770 return (localrepo, csets, cleanup)
@@ -1,4127 +1,4128
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import annotations
15 from __future__ import annotations
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import functools
20 import functools
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import typing
24 import typing
25 import weakref
25 import weakref
26 import zlib
26 import zlib
27
27
28 from typing import (
28 from typing import (
29 Iterable,
29 Iterable,
30 Iterator,
30 Iterator,
31 Optional,
31 Optional,
32 Tuple,
32 Tuple,
33 )
33 )
34
34
35 # import stuff from node for others to import from revlog
35 # import stuff from node for others to import from revlog
36 from .node import (
36 from .node import (
37 bin,
37 bin,
38 hex,
38 hex,
39 nullrev,
39 nullrev,
40 sha1nodeconstants,
40 sha1nodeconstants,
41 short,
41 short,
42 wdirrev,
42 wdirrev,
43 )
43 )
44 from .i18n import _
44 from .i18n import _
45 from .revlogutils.constants import (
45 from .revlogutils.constants import (
46 ALL_KINDS,
46 ALL_KINDS,
47 CHANGELOGV2,
47 CHANGELOGV2,
48 COMP_MODE_DEFAULT,
48 COMP_MODE_DEFAULT,
49 COMP_MODE_INLINE,
49 COMP_MODE_INLINE,
50 COMP_MODE_PLAIN,
50 COMP_MODE_PLAIN,
51 DELTA_BASE_REUSE_NO,
51 DELTA_BASE_REUSE_NO,
52 DELTA_BASE_REUSE_TRY,
52 DELTA_BASE_REUSE_TRY,
53 ENTRY_RANK,
53 ENTRY_RANK,
54 FEATURES_BY_VERSION,
54 FEATURES_BY_VERSION,
55 FLAG_GENERALDELTA,
55 FLAG_GENERALDELTA,
56 FLAG_INLINE_DATA,
56 FLAG_INLINE_DATA,
57 INDEX_HEADER,
57 INDEX_HEADER,
58 KIND_CHANGELOG,
58 KIND_CHANGELOG,
59 KIND_FILELOG,
59 KIND_FILELOG,
60 RANK_UNKNOWN,
60 RANK_UNKNOWN,
61 REVLOGV0,
61 REVLOGV0,
62 REVLOGV1,
62 REVLOGV1,
63 REVLOGV1_FLAGS,
63 REVLOGV1_FLAGS,
64 REVLOGV2,
64 REVLOGV2,
65 REVLOGV2_FLAGS,
65 REVLOGV2_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
67 REVLOG_DEFAULT_FORMAT,
67 REVLOG_DEFAULT_FORMAT,
68 REVLOG_DEFAULT_VERSION,
68 REVLOG_DEFAULT_VERSION,
69 SUPPORTED_FLAGS,
69 SUPPORTED_FLAGS,
70 )
70 )
71 from .revlogutils.flagutil import (
71 from .revlogutils.flagutil import (
72 REVIDX_DEFAULT_FLAGS,
72 REVIDX_DEFAULT_FLAGS,
73 REVIDX_ELLIPSIS,
73 REVIDX_ELLIPSIS,
74 REVIDX_EXTSTORED,
74 REVIDX_EXTSTORED,
75 REVIDX_FLAGS_ORDER,
75 REVIDX_FLAGS_ORDER,
76 REVIDX_HASCOPIESINFO,
76 REVIDX_HASCOPIESINFO,
77 REVIDX_ISCENSORED,
77 REVIDX_ISCENSORED,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
79 )
79 )
80 from .thirdparty import attr
80 from .thirdparty import attr
81
81
82 # Force pytype to use the non-vendored package
82 # Force pytype to use the non-vendored package
83 if typing.TYPE_CHECKING:
83 if typing.TYPE_CHECKING:
84 # noinspection PyPackageRequirements
84 # noinspection PyPackageRequirements
85 import attr
85 import attr
86
86
87 from . import (
87 from . import (
88 ancestor,
88 ancestor,
89 dagop,
89 dagop,
90 error,
90 error,
91 mdiff,
91 mdiff,
92 policy,
92 policy,
93 pycompat,
93 pycompat,
94 revlogutils,
94 revlogutils,
95 templatefilters,
95 templatefilters,
96 util,
96 util,
97 vfs as vfsmod,
97 vfs as vfsmod,
98 )
98 )
99 from .interfaces import (
99 from .interfaces import (
100 repository,
100 repository,
101 util as interfaceutil,
101 util as interfaceutil,
102 )
102 )
103 from .revlogutils import (
103 from .revlogutils import (
104 deltas as deltautil,
104 deltas as deltautil,
105 docket as docketutil,
105 docket as docketutil,
106 flagutil,
106 flagutil,
107 nodemap as nodemaputil,
107 nodemap as nodemaputil,
108 randomaccessfile,
108 randomaccessfile,
109 revlogv0,
109 revlogv0,
110 rewrite,
110 rewrite,
111 sidedata as sidedatautil,
111 sidedata as sidedatautil,
112 )
112 )
113 from .utils import (
113 from .utils import (
114 storageutil,
114 storageutil,
115 stringutil,
115 stringutil,
116 )
116 )
117
117
118 # blanked usage of all the name to prevent pyflakes constraints
118 # blanked usage of all the name to prevent pyflakes constraints
119 # We need these name available in the module for extensions.
119 # We need these name available in the module for extensions.
120
120
121 REVLOGV0
121 REVLOGV0
122 REVLOGV1
122 REVLOGV1
123 REVLOGV2
123 REVLOGV2
124 CHANGELOGV2
124 CHANGELOGV2
125 FLAG_INLINE_DATA
125 FLAG_INLINE_DATA
126 FLAG_GENERALDELTA
126 FLAG_GENERALDELTA
127 REVLOG_DEFAULT_FLAGS
127 REVLOG_DEFAULT_FLAGS
128 REVLOG_DEFAULT_FORMAT
128 REVLOG_DEFAULT_FORMAT
129 REVLOG_DEFAULT_VERSION
129 REVLOG_DEFAULT_VERSION
130 REVLOGV1_FLAGS
130 REVLOGV1_FLAGS
131 REVLOGV2_FLAGS
131 REVLOGV2_FLAGS
132 REVIDX_ISCENSORED
132 REVIDX_ISCENSORED
133 REVIDX_ELLIPSIS
133 REVIDX_ELLIPSIS
134 REVIDX_HASCOPIESINFO
134 REVIDX_HASCOPIESINFO
135 REVIDX_EXTSTORED
135 REVIDX_EXTSTORED
136 REVIDX_DEFAULT_FLAGS
136 REVIDX_DEFAULT_FLAGS
137 REVIDX_FLAGS_ORDER
137 REVIDX_FLAGS_ORDER
138 REVIDX_RAWTEXT_CHANGING_FLAGS
138 REVIDX_RAWTEXT_CHANGING_FLAGS
139
139
140 parsers = policy.importmod('parsers')
140 parsers = policy.importmod('parsers')
141 rustancestor = policy.importrust('ancestor')
141 rustancestor = policy.importrust('ancestor')
142 rustdagop = policy.importrust('dagop')
142 rustdagop = policy.importrust('dagop')
143 rustrevlog = policy.importrust('revlog')
143 rustrevlog = policy.importrust('revlog')
144
144
145 # Aliased for performance.
145 # Aliased for performance.
146 _zlibdecompress = zlib.decompress
146 _zlibdecompress = zlib.decompress
147
147
148 # max size of inline data embedded into a revlog
148 # max size of inline data embedded into a revlog
149 _maxinline = 131072
149 _maxinline = 131072
150
150
151
151
152 # Flag processors for REVIDX_ELLIPSIS.
152 # Flag processors for REVIDX_ELLIPSIS.
153 def ellipsisreadprocessor(rl, text):
153 def ellipsisreadprocessor(rl, text):
154 return text, False
154 return text, False
155
155
156
156
157 def ellipsiswriteprocessor(rl, text):
157 def ellipsiswriteprocessor(rl, text):
158 return text, False
158 return text, False
159
159
160
160
161 def ellipsisrawprocessor(rl, text):
161 def ellipsisrawprocessor(rl, text):
162 return False
162 return False
163
163
164
164
165 ellipsisprocessor = (
165 ellipsisprocessor = (
166 ellipsisreadprocessor,
166 ellipsisreadprocessor,
167 ellipsiswriteprocessor,
167 ellipsiswriteprocessor,
168 ellipsisrawprocessor,
168 ellipsisrawprocessor,
169 )
169 )
170
170
171
171
172 def _verify_revision(rl, skipflags, state, node):
172 def _verify_revision(rl, skipflags, state, node):
173 """Verify the integrity of the given revlog ``node`` while providing a hook
173 """Verify the integrity of the given revlog ``node`` while providing a hook
174 point for extensions to influence the operation."""
174 point for extensions to influence the operation."""
175 if skipflags:
175 if skipflags:
176 state[b'skipread'].add(node)
176 state[b'skipread'].add(node)
177 else:
177 else:
178 # Side-effect: read content and verify hash.
178 # Side-effect: read content and verify hash.
179 rl.revision(node)
179 rl.revision(node)
180
180
181
181
182 # True if a fast implementation for persistent-nodemap is available
182 # True if a fast implementation for persistent-nodemap is available
183 #
183 #
184 # We also consider we have a "fast" implementation in "pure" python because
184 # We also consider we have a "fast" implementation in "pure" python because
185 # people using pure don't really have performance consideration (and a
185 # people using pure don't really have performance consideration (and a
186 # wheelbarrow of other slowness source)
186 # wheelbarrow of other slowness source)
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
188 parsers, 'BaseIndexObject'
188 parsers, 'BaseIndexObject'
189 )
189 )
190
190
191
191
192 @attr.s(slots=True)
192 @attr.s(slots=True)
193 class RevLogRevisionDelta:
193 class RevLogRevisionDelta:
194 node = attr.ib()
194 node = attr.ib()
195 p1node = attr.ib()
195 p1node = attr.ib()
196 p2node = attr.ib()
196 p2node = attr.ib()
197 basenode = attr.ib()
197 basenode = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199 baserevisionsize = attr.ib()
199 baserevisionsize = attr.ib()
200 revision = attr.ib()
200 revision = attr.ib()
201 delta = attr.ib()
201 delta = attr.ib()
202 sidedata = attr.ib()
202 sidedata = attr.ib()
203 protocol_flags = attr.ib()
203 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
204 linknode = attr.ib(default=None)
205
205
206
206
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
208 RevLogRevisionDelta
208 RevLogRevisionDelta
209 )
209 )
210
210
211 if typing.TYPE_CHECKING:
211 if typing.TYPE_CHECKING:
212 revlogrevisiondelta = RevLogRevisionDelta
212 revlogrevisiondelta = RevLogRevisionDelta
213
213
214
214
215 @attr.s(frozen=True)
215 @attr.s(frozen=True)
216 class RevLogProblem:
216 class RevLogProblem:
217 warning = attr.ib(default=None, type=Optional[bytes])
217 warning = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
220
220
221
221
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
223 RevLogProblem
223 RevLogProblem
224 )
224 )
225
225
226 if typing.TYPE_CHECKING:
226 if typing.TYPE_CHECKING:
227 revlogproblem = RevLogProblem
227 revlogproblem = RevLogProblem
228
228
229
229
230 def parse_index_v1(data, inline):
230 def parse_index_v1(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 index, cache = parsers.parse_index2(data, inline)
232 index, cache = parsers.parse_index2(data, inline)
233 return index, cache
233 return index, cache
234
234
235
235
236 def parse_index_v2(data, inline):
236 def parse_index_v2(data, inline):
237 # call the C implementation to parse the index data
237 # call the C implementation to parse the index data
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
239 return index, cache
239 return index, cache
240
240
241
241
242 def parse_index_cl_v2(data, inline):
242 def parse_index_cl_v2(data, inline):
243 # call the C implementation to parse the index data
243 # call the C implementation to parse the index data
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
245 return index, cache
245 return index, cache
246
246
247
247
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
249
249
250 def parse_index_v1_nodemap(data, inline):
250 def parse_index_v1_nodemap(data, inline):
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
252 return index, cache
252 return index, cache
253
253
254 else:
254 else:
255 parse_index_v1_nodemap = None
255 parse_index_v1_nodemap = None
256
256
257
257
258 def parse_index_v1_rust(data, inline, default_header):
258 def parse_index_v1_rust(data, inline, default_header):
259 cache = (0, data) if inline else None
259 cache = (0, data) if inline else None
260 return rustrevlog.Index(data, default_header), cache
260 return rustrevlog.Index(data, default_header), cache
261
261
262
262
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
264 # signed integer)
264 # signed integer)
265 _maxentrysize = 0x7FFFFFFF
265 _maxentrysize = 0x7FFFFFFF
266
266
267 FILE_TOO_SHORT_MSG = _(
267 FILE_TOO_SHORT_MSG = _(
268 b'cannot read from revlog %s;'
268 b'cannot read from revlog %s;'
269 b' expected %d bytes from offset %d, data size is %d'
269 b' expected %d bytes from offset %d, data size is %d'
270 )
270 )
271
271
272 hexdigits = b'0123456789abcdefABCDEF'
272 hexdigits = b'0123456789abcdefABCDEF'
273
273
274
274
275 class _Config:
275 class _Config:
276 def copy(self):
276 def copy(self):
277 return self.__class__(**self.__dict__)
277 return self.__class__(**self.__dict__)
278
278
279
279
280 @attr.s()
280 @attr.s()
281 class FeatureConfig(_Config):
281 class FeatureConfig(_Config):
282 """Hold configuration values about the available revlog features"""
282 """Hold configuration values about the available revlog features"""
283
283
284 # the default compression engine
284 # the default compression engine
285 compression_engine = attr.ib(default=b'zlib')
285 compression_engine = attr.ib(default=b'zlib')
286 # compression engines options
286 # compression engines options
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
288
288
289 # can we use censor on this revlog
289 # can we use censor on this revlog
290 censorable = attr.ib(default=False)
290 censorable = attr.ib(default=False)
291 # does this revlog use the "side data" feature
291 # does this revlog use the "side data" feature
292 has_side_data = attr.ib(default=False)
292 has_side_data = attr.ib(default=False)
293 # might remove rank configuration once the computation has no impact
293 # might remove rank configuration once the computation has no impact
294 compute_rank = attr.ib(default=False)
294 compute_rank = attr.ib(default=False)
295 # parent order is supposed to be semantically irrelevant, so we
295 # parent order is supposed to be semantically irrelevant, so we
296 # normally resort parents to ensure that the first parent is non-null,
296 # normally resort parents to ensure that the first parent is non-null,
297 # if there is a non-null parent at all.
297 # if there is a non-null parent at all.
298 # filelog abuses the parent order as flag to mark some instances of
298 # filelog abuses the parent order as flag to mark some instances of
299 # meta-encoded files, so allow it to disable this behavior.
299 # meta-encoded files, so allow it to disable this behavior.
300 canonical_parent_order = attr.ib(default=False)
300 canonical_parent_order = attr.ib(default=False)
301 # can ellipsis commit be used
301 # can ellipsis commit be used
302 enable_ellipsis = attr.ib(default=False)
302 enable_ellipsis = attr.ib(default=False)
303
303
304 def copy(self):
304 def copy(self):
305 new = super().copy()
305 new = super().copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
307 return new
307 return new
308
308
309
309
310 @attr.s()
310 @attr.s()
311 class DataConfig(_Config):
311 class DataConfig(_Config):
312 """Hold configuration value about how the revlog data are read"""
312 """Hold configuration value about how the revlog data are read"""
313
313
314 # should we try to open the "pending" version of the revlog
314 # should we try to open the "pending" version of the revlog
315 try_pending = attr.ib(default=False)
315 try_pending = attr.ib(default=False)
316 # should we try to open the "splitted" version of the revlog
316 # should we try to open the "splitted" version of the revlog
317 try_split = attr.ib(default=False)
317 try_split = attr.ib(default=False)
318 # When True, indexfile should be opened with checkambig=True at writing,
318 # When True, indexfile should be opened with checkambig=True at writing,
319 # to avoid file stat ambiguity.
319 # to avoid file stat ambiguity.
320 check_ambig = attr.ib(default=False)
320 check_ambig = attr.ib(default=False)
321
321
322 # If true, use mmap instead of reading to deal with large index
322 # If true, use mmap instead of reading to deal with large index
323 mmap_large_index = attr.ib(default=False)
323 mmap_large_index = attr.ib(default=False)
324 # how much data is large
324 # how much data is large
325 mmap_index_threshold = attr.ib(default=None)
325 mmap_index_threshold = attr.ib(default=None)
326 # How much data to read and cache into the raw revlog data cache.
326 # How much data to read and cache into the raw revlog data cache.
327 chunk_cache_size = attr.ib(default=65536)
327 chunk_cache_size = attr.ib(default=65536)
328
328
329 # The size of the uncompressed cache compared to the largest revision seen.
329 # The size of the uncompressed cache compared to the largest revision seen.
330 uncompressed_cache_factor = attr.ib(default=None)
330 uncompressed_cache_factor = attr.ib(default=None)
331
331
332 # The number of chunk cached
332 # The number of chunk cached
333 uncompressed_cache_count = attr.ib(default=None)
333 uncompressed_cache_count = attr.ib(default=None)
334
334
335 # Allow sparse reading of the revlog data
335 # Allow sparse reading of the revlog data
336 with_sparse_read = attr.ib(default=False)
336 with_sparse_read = attr.ib(default=False)
337 # minimal density of a sparse read chunk
337 # minimal density of a sparse read chunk
338 sr_density_threshold = attr.ib(default=0.50)
338 sr_density_threshold = attr.ib(default=0.50)
339 # minimal size of data we skip when performing sparse read
339 # minimal size of data we skip when performing sparse read
340 sr_min_gap_size = attr.ib(default=262144)
340 sr_min_gap_size = attr.ib(default=262144)
341
341
342 # are delta encoded against arbitrary bases.
342 # are delta encoded against arbitrary bases.
343 generaldelta = attr.ib(default=False)
343 generaldelta = attr.ib(default=False)
344
344
345
345
346 @attr.s()
346 @attr.s()
347 class DeltaConfig(_Config):
347 class DeltaConfig(_Config):
348 """Hold configuration value about how new delta are computed
348 """Hold configuration value about how new delta are computed
349
349
350 Some attributes are duplicated from DataConfig to help havign each object
350 Some attributes are duplicated from DataConfig to help havign each object
351 self contained.
351 self contained.
352 """
352 """
353
353
354 # can delta be encoded against arbitrary bases.
354 # can delta be encoded against arbitrary bases.
355 general_delta = attr.ib(default=False)
355 general_delta = attr.ib(default=False)
356 # Allow sparse writing of the revlog data
356 # Allow sparse writing of the revlog data
357 sparse_revlog = attr.ib(default=False)
357 sparse_revlog = attr.ib(default=False)
358 # maximum length of a delta chain
358 # maximum length of a delta chain
359 max_chain_len = attr.ib(default=None)
359 max_chain_len = attr.ib(default=None)
360 # Maximum distance between delta chain base start and end
360 # Maximum distance between delta chain base start and end
361 max_deltachain_span = attr.ib(default=-1)
361 max_deltachain_span = attr.ib(default=-1)
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
363 # compression for the data content.
363 # compression for the data content.
364 upper_bound_comp = attr.ib(default=None)
364 upper_bound_comp = attr.ib(default=None)
365 # Should we try a delta against both parent
365 # Should we try a delta against both parent
366 delta_both_parents = attr.ib(default=True)
366 delta_both_parents = attr.ib(default=True)
367 # Test delta base candidate group by chunk of this maximal size.
367 # Test delta base candidate group by chunk of this maximal size.
368 candidate_group_chunk_size = attr.ib(default=0)
368 candidate_group_chunk_size = attr.ib(default=0)
369 # Should we display debug information about delta computation
369 # Should we display debug information about delta computation
370 debug_delta = attr.ib(default=False)
370 debug_delta = attr.ib(default=False)
371 # trust incoming delta by default
371 # trust incoming delta by default
372 lazy_delta = attr.ib(default=True)
372 lazy_delta = attr.ib(default=True)
373 # trust the base of incoming delta by default
373 # trust the base of incoming delta by default
374 lazy_delta_base = attr.ib(default=False)
374 lazy_delta_base = attr.ib(default=False)
375
375
376
376
377 class _InnerRevlog:
377 class _InnerRevlog:
378 """An inner layer of the revlog object
378 """An inner layer of the revlog object
379
379
380 That layer exist to be able to delegate some operation to Rust, its
380 That layer exist to be able to delegate some operation to Rust, its
381 boundaries are arbitrary and based on what we can delegate to Rust.
381 boundaries are arbitrary and based on what we can delegate to Rust.
382 """
382 """
383
383
384 opener: vfsmod.vfs
384 opener: vfsmod.vfs
385
385
386 def __init__(
386 def __init__(
387 self,
387 self,
388 opener: vfsmod.vfs,
388 opener: vfsmod.vfs,
389 index,
389 index,
390 index_file,
390 index_file,
391 data_file,
391 data_file,
392 sidedata_file,
392 sidedata_file,
393 inline,
393 inline,
394 data_config,
394 data_config,
395 delta_config,
395 delta_config,
396 feature_config,
396 feature_config,
397 chunk_cache,
397 chunk_cache,
398 default_compression_header,
398 default_compression_header,
399 ):
399 ):
400 self.opener = opener
400 self.opener = opener
401 self.index = index
401 self.index = index
402
402
403 self.index_file = index_file
403 self.index_file = index_file
404 self.data_file = data_file
404 self.data_file = data_file
405 self.sidedata_file = sidedata_file
405 self.sidedata_file = sidedata_file
406 self.inline = inline
406 self.inline = inline
407 self.data_config = data_config
407 self.data_config = data_config
408 self.delta_config = delta_config
408 self.delta_config = delta_config
409 self.feature_config = feature_config
409 self.feature_config = feature_config
410
410
411 # used during diverted write.
411 # used during diverted write.
412 self._orig_index_file = None
412 self._orig_index_file = None
413
413
414 self._default_compression_header = default_compression_header
414 self._default_compression_header = default_compression_header
415
415
416 # index
416 # index
417
417
418 # 3-tuple of file handles being used for active writing.
418 # 3-tuple of file handles being used for active writing.
419 self._writinghandles = None
419 self._writinghandles = None
420
420
421 self._segmentfile = randomaccessfile.randomaccessfile(
421 self._segmentfile = randomaccessfile.randomaccessfile(
422 self.opener,
422 self.opener,
423 (self.index_file if self.inline else self.data_file),
423 (self.index_file if self.inline else self.data_file),
424 self.data_config.chunk_cache_size,
424 self.data_config.chunk_cache_size,
425 chunk_cache,
425 chunk_cache,
426 )
426 )
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
428 self.opener,
428 self.opener,
429 self.sidedata_file,
429 self.sidedata_file,
430 self.data_config.chunk_cache_size,
430 self.data_config.chunk_cache_size,
431 )
431 )
432
432
433 # revlog header -> revlog compressor
433 # revlog header -> revlog compressor
434 self._decompressors = {}
434 self._decompressors = {}
435 # 3-tuple of (node, rev, text) for a raw revision.
435 # 3-tuple of (node, rev, text) for a raw revision.
436 self._revisioncache = None
436 self._revisioncache = None
437
437
438 # cache some uncompressed chunks
438 # cache some uncompressed chunks
439 # rev β†’ uncompressed_chunk
439 # rev β†’ uncompressed_chunk
440 #
440 #
441 # the max cost is dynamically updated to be proportionnal to the
441 # the max cost is dynamically updated to be proportionnal to the
442 # size of revision we actually encounter.
442 # size of revision we actually encounter.
443 self._uncompressed_chunk_cache = None
443 self._uncompressed_chunk_cache = None
444 if self.data_config.uncompressed_cache_factor is not None:
444 if self.data_config.uncompressed_cache_factor is not None:
445 self._uncompressed_chunk_cache = util.lrucachedict(
445 self._uncompressed_chunk_cache = util.lrucachedict(
446 self.data_config.uncompressed_cache_count,
446 self.data_config.uncompressed_cache_count,
447 maxcost=65536, # some arbitrary initial value
447 maxcost=65536, # some arbitrary initial value
448 )
448 )
449
449
450 self._delay_buffer = None
450 self._delay_buffer = None
451
451
452 def __len__(self):
452 def __len__(self):
453 return len(self.index)
453 return len(self.index)
454
454
455 def clear_cache(self):
455 def clear_cache(self):
456 assert not self.is_delaying
456 assert not self.is_delaying
457 self._revisioncache = None
457 self._revisioncache = None
458 if self._uncompressed_chunk_cache is not None:
458 if self._uncompressed_chunk_cache is not None:
459 self._uncompressed_chunk_cache.clear()
459 self._uncompressed_chunk_cache.clear()
460 self._segmentfile.clear_cache()
460 self._segmentfile.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
462
462
463 @property
463 @property
464 def canonical_index_file(self):
464 def canonical_index_file(self):
465 if self._orig_index_file is not None:
465 if self._orig_index_file is not None:
466 return self._orig_index_file
466 return self._orig_index_file
467 return self.index_file
467 return self.index_file
468
468
469 @property
469 @property
470 def is_delaying(self):
470 def is_delaying(self):
471 """is the revlog is currently delaying the visibility of written data?
471 """is the revlog is currently delaying the visibility of written data?
472
472
473 The delaying mechanism can be either in-memory or written on disk in a
473 The delaying mechanism can be either in-memory or written on disk in a
474 side-file."""
474 side-file."""
475 return (self._delay_buffer is not None) or (
475 return (self._delay_buffer is not None) or (
476 self._orig_index_file is not None
476 self._orig_index_file is not None
477 )
477 )
478
478
479 # Derived from index values.
479 # Derived from index values.
480
480
481 def start(self, rev):
481 def start(self, rev):
482 """the offset of the data chunk for this revision"""
482 """the offset of the data chunk for this revision"""
483 return int(self.index[rev][0] >> 16)
483 return int(self.index[rev][0] >> 16)
484
484
485 def length(self, rev):
485 def length(self, rev):
486 """the length of the data chunk for this revision"""
486 """the length of the data chunk for this revision"""
487 return self.index[rev][1]
487 return self.index[rev][1]
488
488
489 def end(self, rev):
489 def end(self, rev):
490 """the end of the data chunk for this revision"""
490 """the end of the data chunk for this revision"""
491 return self.start(rev) + self.length(rev)
491 return self.start(rev) + self.length(rev)
492
492
493 def deltaparent(self, rev):
493 def deltaparent(self, rev):
494 """return deltaparent of the given revision"""
494 """return deltaparent of the given revision"""
495 base = self.index[rev][3]
495 base = self.index[rev][3]
496 if base == rev:
496 if base == rev:
497 return nullrev
497 return nullrev
498 elif self.delta_config.general_delta:
498 elif self.delta_config.general_delta:
499 return base
499 return base
500 else:
500 else:
501 return rev - 1
501 return rev - 1
502
502
503 def issnapshot(self, rev):
503 def issnapshot(self, rev):
504 """tells whether rev is a snapshot"""
504 """tells whether rev is a snapshot"""
505 if not self.delta_config.sparse_revlog:
505 if not self.delta_config.sparse_revlog:
506 return self.deltaparent(rev) == nullrev
506 return self.deltaparent(rev) == nullrev
507 elif hasattr(self.index, 'issnapshot'):
507 elif hasattr(self.index, 'issnapshot'):
508 # directly assign the method to cache the testing and access
508 # directly assign the method to cache the testing and access
509 self.issnapshot = self.index.issnapshot
509 self.issnapshot = self.index.issnapshot
510 return self.issnapshot(rev)
510 return self.issnapshot(rev)
511 if rev == nullrev:
511 if rev == nullrev:
512 return True
512 return True
513 entry = self.index[rev]
513 entry = self.index[rev]
514 base = entry[3]
514 base = entry[3]
515 if base == rev:
515 if base == rev:
516 return True
516 return True
517 if base == nullrev:
517 if base == nullrev:
518 return True
518 return True
519 p1 = entry[5]
519 p1 = entry[5]
520 while self.length(p1) == 0:
520 while self.length(p1) == 0:
521 b = self.deltaparent(p1)
521 b = self.deltaparent(p1)
522 if b == p1:
522 if b == p1:
523 break
523 break
524 p1 = b
524 p1 = b
525 p2 = entry[6]
525 p2 = entry[6]
526 while self.length(p2) == 0:
526 while self.length(p2) == 0:
527 b = self.deltaparent(p2)
527 b = self.deltaparent(p2)
528 if b == p2:
528 if b == p2:
529 break
529 break
530 p2 = b
530 p2 = b
531 if base == p1 or base == p2:
531 if base == p1 or base == p2:
532 return False
532 return False
533 return self.issnapshot(base)
533 return self.issnapshot(base)
534
534
535 def _deltachain(self, rev, stoprev=None):
535 def _deltachain(self, rev, stoprev=None):
536 """Obtain the delta chain for a revision.
536 """Obtain the delta chain for a revision.
537
537
538 ``stoprev`` specifies a revision to stop at. If not specified, we
538 ``stoprev`` specifies a revision to stop at. If not specified, we
539 stop at the base of the chain.
539 stop at the base of the chain.
540
540
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
542 revs in ascending order and ``stopped`` is a bool indicating whether
542 revs in ascending order and ``stopped`` is a bool indicating whether
543 ``stoprev`` was hit.
543 ``stoprev`` was hit.
544 """
544 """
545 generaldelta = self.delta_config.general_delta
545 generaldelta = self.delta_config.general_delta
546 # Try C implementation.
546 # Try C implementation.
547 try:
547 try:
548 return self.index.deltachain(rev, stoprev, generaldelta)
548 return self.index.deltachain(rev, stoprev, generaldelta)
549 except AttributeError:
549 except AttributeError:
550 pass
550 pass
551
551
552 chain = []
552 chain = []
553
553
554 # Alias to prevent attribute lookup in tight loop.
554 # Alias to prevent attribute lookup in tight loop.
555 index = self.index
555 index = self.index
556
556
557 iterrev = rev
557 iterrev = rev
558 e = index[iterrev]
558 e = index[iterrev]
559 while iterrev != e[3] and iterrev != stoprev:
559 while iterrev != e[3] and iterrev != stoprev:
560 chain.append(iterrev)
560 chain.append(iterrev)
561 if generaldelta:
561 if generaldelta:
562 iterrev = e[3]
562 iterrev = e[3]
563 else:
563 else:
564 iterrev -= 1
564 iterrev -= 1
565 e = index[iterrev]
565 e = index[iterrev]
566
566
567 if iterrev == stoprev:
567 if iterrev == stoprev:
568 stopped = True
568 stopped = True
569 else:
569 else:
570 chain.append(iterrev)
570 chain.append(iterrev)
571 stopped = False
571 stopped = False
572
572
573 chain.reverse()
573 chain.reverse()
574 return chain, stopped
574 return chain, stopped
575
575
576 @util.propertycache
576 @util.propertycache
577 def _compressor(self):
577 def _compressor(self):
578 engine = util.compengines[self.feature_config.compression_engine]
578 engine = util.compengines[self.feature_config.compression_engine]
579 return engine.revlogcompressor(
579 return engine.revlogcompressor(
580 self.feature_config.compression_engine_options
580 self.feature_config.compression_engine_options
581 )
581 )
582
582
583 @util.propertycache
583 @util.propertycache
584 def _decompressor(self):
584 def _decompressor(self):
585 """the default decompressor"""
585 """the default decompressor"""
586 if self._default_compression_header is None:
586 if self._default_compression_header is None:
587 return None
587 return None
588 t = self._default_compression_header
588 t = self._default_compression_header
589 c = self._get_decompressor(t)
589 c = self._get_decompressor(t)
590 return c.decompress
590 return c.decompress
591
591
592 def _get_decompressor(self, t: bytes):
592 def _get_decompressor(self, t: bytes):
593 try:
593 try:
594 compressor = self._decompressors[t]
594 compressor = self._decompressors[t]
595 except KeyError:
595 except KeyError:
596 try:
596 try:
597 engine = util.compengines.forrevlogheader(t)
597 engine = util.compengines.forrevlogheader(t)
598 compressor = engine.revlogcompressor(
598 compressor = engine.revlogcompressor(
599 self.feature_config.compression_engine_options
599 self.feature_config.compression_engine_options
600 )
600 )
601 self._decompressors[t] = compressor
601 self._decompressors[t] = compressor
602 except KeyError:
602 except KeyError:
603 raise error.RevlogError(
603 raise error.RevlogError(
604 _(b'unknown compression type %s') % binascii.hexlify(t)
604 _(b'unknown compression type %s') % binascii.hexlify(t)
605 )
605 )
606 return compressor
606 return compressor
607
607
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
609 """Generate a possibly-compressed representation of data."""
609 """Generate a possibly-compressed representation of data."""
610 if not data:
610 if not data:
611 return b'', data
611 return b'', data
612
612
613 compressed = self._compressor.compress(data)
613 compressed = self._compressor.compress(data)
614
614
615 if compressed:
615 if compressed:
616 # The revlog compressor added the header in the returned data.
616 # The revlog compressor added the header in the returned data.
617 return b'', compressed
617 return b'', compressed
618
618
619 if data[0:1] == b'\0':
619 if data[0:1] == b'\0':
620 return b'', data
620 return b'', data
621 return b'u', data
621 return b'u', data
622
622
623 def decompress(self, data: bytes):
623 def decompress(self, data: bytes):
624 """Decompress a revlog chunk.
624 """Decompress a revlog chunk.
625
625
626 The chunk is expected to begin with a header identifying the
626 The chunk is expected to begin with a header identifying the
627 format type so it can be routed to an appropriate decompressor.
627 format type so it can be routed to an appropriate decompressor.
628 """
628 """
629 if not data:
629 if not data:
630 return data
630 return data
631
631
632 # Revlogs are read much more frequently than they are written and many
632 # Revlogs are read much more frequently than they are written and many
633 # chunks only take microseconds to decompress, so performance is
633 # chunks only take microseconds to decompress, so performance is
634 # important here.
634 # important here.
635 #
635 #
636 # We can make a few assumptions about revlogs:
636 # We can make a few assumptions about revlogs:
637 #
637 #
638 # 1) the majority of chunks will be compressed (as opposed to inline
638 # 1) the majority of chunks will be compressed (as opposed to inline
639 # raw data).
639 # raw data).
640 # 2) decompressing *any* data will likely by at least 10x slower than
640 # 2) decompressing *any* data will likely by at least 10x slower than
641 # returning raw inline data.
641 # returning raw inline data.
642 # 3) we want to prioritize common and officially supported compression
642 # 3) we want to prioritize common and officially supported compression
643 # engines
643 # engines
644 #
644 #
645 # It follows that we want to optimize for "decompress compressed data
645 # It follows that we want to optimize for "decompress compressed data
646 # when encoded with common and officially supported compression engines"
646 # when encoded with common and officially supported compression engines"
647 # case over "raw data" and "data encoded by less common or non-official
647 # case over "raw data" and "data encoded by less common or non-official
648 # compression engines." That is why we have the inline lookup first
648 # compression engines." That is why we have the inline lookup first
649 # followed by the compengines lookup.
649 # followed by the compengines lookup.
650 #
650 #
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
652 # compressed chunks. And this matters for changelog and manifest reads.
652 # compressed chunks. And this matters for changelog and manifest reads.
653 t = data[0:1]
653 t = data[0:1]
654
654
655 if t == b'x':
655 if t == b'x':
656 try:
656 try:
657 return _zlibdecompress(data)
657 return _zlibdecompress(data)
658 except zlib.error as e:
658 except zlib.error as e:
659 raise error.RevlogError(
659 raise error.RevlogError(
660 _(b'revlog decompress error: %s')
660 _(b'revlog decompress error: %s')
661 % stringutil.forcebytestr(e)
661 % stringutil.forcebytestr(e)
662 )
662 )
663 # '\0' is more common than 'u' so it goes first.
663 # '\0' is more common than 'u' so it goes first.
664 elif t == b'\0':
664 elif t == b'\0':
665 return data
665 return data
666 elif t == b'u':
666 elif t == b'u':
667 return util.buffer(data, 1)
667 return util.buffer(data, 1)
668
668
669 compressor = self._get_decompressor(t)
669 compressor = self._get_decompressor(t)
670
670
671 return compressor.decompress(data)
671 return compressor.decompress(data)
672
672
673 @contextlib.contextmanager
673 @contextlib.contextmanager
674 def reading(self):
674 def reading(self):
675 """Context manager that keeps data and sidedata files open for reading"""
675 """Context manager that keeps data and sidedata files open for reading"""
676 if len(self.index) == 0:
676 if len(self.index) == 0:
677 yield # nothing to be read
677 yield # nothing to be read
678 elif self._delay_buffer is not None and self.inline:
678 elif self._delay_buffer is not None and self.inline:
679 msg = "revlog with delayed write should not be inline"
679 msg = "revlog with delayed write should not be inline"
680 raise error.ProgrammingError(msg)
680 raise error.ProgrammingError(msg)
681 else:
681 else:
682 with self._segmentfile.reading():
682 with self._segmentfile.reading():
683 with self._segmentfile_sidedata.reading():
683 with self._segmentfile_sidedata.reading():
684 yield
684 yield
685
685
686 @property
686 @property
687 def is_writing(self):
687 def is_writing(self):
688 """True is a writing context is open"""
688 """True is a writing context is open"""
689 return self._writinghandles is not None
689 return self._writinghandles is not None
690
690
691 @property
691 @property
692 def is_open(self):
692 def is_open(self):
693 """True if any file handle is being held
693 """True if any file handle is being held
694
694
695 Used for assert and debug in the python code"""
695 Used for assert and debug in the python code"""
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
697
697
698 @contextlib.contextmanager
698 @contextlib.contextmanager
699 def writing(self, transaction, data_end=None, sidedata_end=None):
699 def writing(self, transaction, data_end=None, sidedata_end=None):
700 """Open the revlog files for writing
700 """Open the revlog files for writing
701
701
702 Add content to a revlog should be done within such context.
702 Add content to a revlog should be done within such context.
703 """
703 """
704 if self.is_writing:
704 if self.is_writing:
705 yield
705 yield
706 else:
706 else:
707 ifh = dfh = sdfh = None
707 ifh = dfh = sdfh = None
708 try:
708 try:
709 r = len(self.index)
709 r = len(self.index)
710 # opening the data file.
710 # opening the data file.
711 dsize = 0
711 dsize = 0
712 if r:
712 if r:
713 dsize = self.end(r - 1)
713 dsize = self.end(r - 1)
714 dfh = None
714 dfh = None
715 if not self.inline:
715 if not self.inline:
716 try:
716 try:
717 dfh = self.opener(self.data_file, mode=b"r+")
717 dfh = self.opener(self.data_file, mode=b"r+")
718 if data_end is None:
718 if data_end is None:
719 dfh.seek(0, os.SEEK_END)
719 dfh.seek(0, os.SEEK_END)
720 else:
720 else:
721 dfh.seek(data_end, os.SEEK_SET)
721 dfh.seek(data_end, os.SEEK_SET)
722 except FileNotFoundError:
722 except FileNotFoundError:
723 dfh = self.opener(self.data_file, mode=b"w+")
723 dfh = self.opener(self.data_file, mode=b"w+")
724 transaction.add(self.data_file, dsize)
724 transaction.add(self.data_file, dsize)
725 if self.sidedata_file is not None:
725 if self.sidedata_file is not None:
726 assert sidedata_end is not None
726 assert sidedata_end is not None
727 # revlog-v2 does not inline, help Pytype
727 # revlog-v2 does not inline, help Pytype
728 assert dfh is not None
728 assert dfh is not None
729 try:
729 try:
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
731 dfh.seek(sidedata_end, os.SEEK_SET)
731 dfh.seek(sidedata_end, os.SEEK_SET)
732 except FileNotFoundError:
732 except FileNotFoundError:
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
734 transaction.add(self.sidedata_file, sidedata_end)
734 transaction.add(self.sidedata_file, sidedata_end)
735
735
736 # opening the index file.
736 # opening the index file.
737 isize = r * self.index.entry_size
737 isize = r * self.index.entry_size
738 ifh = self.__index_write_fp()
738 ifh = self.__index_write_fp()
739 if self.inline:
739 if self.inline:
740 transaction.add(self.index_file, dsize + isize)
740 transaction.add(self.index_file, dsize + isize)
741 else:
741 else:
742 transaction.add(self.index_file, isize)
742 transaction.add(self.index_file, isize)
743 # exposing all file handle for writing.
743 # exposing all file handle for writing.
744 self._writinghandles = (ifh, dfh, sdfh)
744 self._writinghandles = (ifh, dfh, sdfh)
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
746 self._segmentfile_sidedata.writing_handle = sdfh
746 self._segmentfile_sidedata.writing_handle = sdfh
747 yield
747 yield
748 finally:
748 finally:
749 self._writinghandles = None
749 self._writinghandles = None
750 self._segmentfile.writing_handle = None
750 self._segmentfile.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
752 if dfh is not None:
752 if dfh is not None:
753 dfh.close()
753 dfh.close()
754 if sdfh is not None:
754 if sdfh is not None:
755 sdfh.close()
755 sdfh.close()
756 # closing the index file last to avoid exposing referent to
756 # closing the index file last to avoid exposing referent to
757 # potential unflushed data content.
757 # potential unflushed data content.
758 if ifh is not None:
758 if ifh is not None:
759 ifh.close()
759 ifh.close()
760
760
761 def __index_write_fp(self, index_end=None):
761 def __index_write_fp(self, index_end=None):
762 """internal method to open the index file for writing
762 """internal method to open the index file for writing
763
763
764 You should not use this directly and use `_writing` instead
764 You should not use this directly and use `_writing` instead
765 """
765 """
766 try:
766 try:
767 if self._delay_buffer is None:
767 if self._delay_buffer is None:
768 f = self.opener(
768 f = self.opener(
769 self.index_file,
769 self.index_file,
770 mode=b"r+",
770 mode=b"r+",
771 checkambig=self.data_config.check_ambig,
771 checkambig=self.data_config.check_ambig,
772 )
772 )
773 else:
773 else:
774 # check_ambig affect we way we open file for writing, however
774 # check_ambig affect we way we open file for writing, however
775 # here, we do not actually open a file for writting as write
775 # here, we do not actually open a file for writting as write
776 # will appened to a delay_buffer. So check_ambig is not
776 # will appened to a delay_buffer. So check_ambig is not
777 # meaningful and unneeded here.
777 # meaningful and unneeded here.
778 f = randomaccessfile.appender(
778 f = randomaccessfile.appender(
779 self.opener, self.index_file, b"r+", self._delay_buffer
779 self.opener, self.index_file, b"r+", self._delay_buffer
780 )
780 )
781 if index_end is None:
781 if index_end is None:
782 f.seek(0, os.SEEK_END)
782 f.seek(0, os.SEEK_END)
783 else:
783 else:
784 f.seek(index_end, os.SEEK_SET)
784 f.seek(index_end, os.SEEK_SET)
785 return f
785 return f
786 except FileNotFoundError:
786 except FileNotFoundError:
787 if self._delay_buffer is None:
787 if self._delay_buffer is None:
788 return self.opener(
788 return self.opener(
789 self.index_file,
789 self.index_file,
790 mode=b"w+",
790 mode=b"w+",
791 checkambig=self.data_config.check_ambig,
791 checkambig=self.data_config.check_ambig,
792 )
792 )
793 else:
793 else:
794 return randomaccessfile.appender(
794 return randomaccessfile.appender(
795 self.opener, self.index_file, b"w+", self._delay_buffer
795 self.opener, self.index_file, b"w+", self._delay_buffer
796 )
796 )
797
797
798 def __index_new_fp(self):
798 def __index_new_fp(self):
799 """internal method to create a new index file for writing
799 """internal method to create a new index file for writing
800
800
801 You should not use this unless you are upgrading from inline revlog
801 You should not use this unless you are upgrading from inline revlog
802 """
802 """
803 return self.opener(
803 return self.opener(
804 self.index_file,
804 self.index_file,
805 mode=b"w",
805 mode=b"w",
806 checkambig=self.data_config.check_ambig,
806 checkambig=self.data_config.check_ambig,
807 )
807 )
808
808
809 def split_inline(self, tr, header, new_index_file_path=None):
809 def split_inline(self, tr, header, new_index_file_path=None):
810 """split the data of an inline revlog into an index and a data file"""
810 """split the data of an inline revlog into an index and a data file"""
811 assert self._delay_buffer is None
811 assert self._delay_buffer is None
812 existing_handles = False
812 existing_handles = False
813 if self._writinghandles is not None:
813 if self._writinghandles is not None:
814 existing_handles = True
814 existing_handles = True
815 fp = self._writinghandles[0]
815 fp = self._writinghandles[0]
816 fp.flush()
816 fp.flush()
817 fp.close()
817 fp.close()
818 # We can't use the cached file handle after close(). So prevent
818 # We can't use the cached file handle after close(). So prevent
819 # its usage.
819 # its usage.
820 self._writinghandles = None
820 self._writinghandles = None
821 self._segmentfile.writing_handle = None
821 self._segmentfile.writing_handle = None
822 # No need to deal with sidedata writing handle as it is only
822 # No need to deal with sidedata writing handle as it is only
823 # relevant with revlog-v2 which is never inline, not reaching
823 # relevant with revlog-v2 which is never inline, not reaching
824 # this code
824 # this code
825
825
826 new_dfh = self.opener(self.data_file, mode=b"w+")
826 new_dfh = self.opener(self.data_file, mode=b"w+")
827 new_dfh.truncate(0) # drop any potentially existing data
827 new_dfh.truncate(0) # drop any potentially existing data
828 try:
828 try:
829 with self.reading():
829 with self.reading():
830 for r in range(len(self.index)):
830 for r in range(len(self.index)):
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
832 new_dfh.flush()
832 new_dfh.flush()
833
833
834 if new_index_file_path is not None:
834 if new_index_file_path is not None:
835 self.index_file = new_index_file_path
835 self.index_file = new_index_file_path
836 with self.__index_new_fp() as fp:
836 with self.__index_new_fp() as fp:
837 self.inline = False
837 self.inline = False
838 for i in range(len(self.index)):
838 for i in range(len(self.index)):
839 e = self.index.entry_binary(i)
839 e = self.index.entry_binary(i)
840 if i == 0:
840 if i == 0:
841 packed_header = self.index.pack_header(header)
841 packed_header = self.index.pack_header(header)
842 e = packed_header + e
842 e = packed_header + e
843 fp.write(e)
843 fp.write(e)
844
844
845 # If we don't use side-write, the temp file replace the real
845 # If we don't use side-write, the temp file replace the real
846 # index when we exit the context manager
846 # index when we exit the context manager
847
847
848 self._segmentfile = randomaccessfile.randomaccessfile(
848 self._segmentfile = randomaccessfile.randomaccessfile(
849 self.opener,
849 self.opener,
850 self.data_file,
850 self.data_file,
851 self.data_config.chunk_cache_size,
851 self.data_config.chunk_cache_size,
852 )
852 )
853
853
854 if existing_handles:
854 if existing_handles:
855 # switched from inline to conventional reopen the index
855 # switched from inline to conventional reopen the index
856 ifh = self.__index_write_fp()
856 ifh = self.__index_write_fp()
857 self._writinghandles = (ifh, new_dfh, None)
857 self._writinghandles = (ifh, new_dfh, None)
858 self._segmentfile.writing_handle = new_dfh
858 self._segmentfile.writing_handle = new_dfh
859 new_dfh = None
859 new_dfh = None
860 # No need to deal with sidedata writing handle as it is only
860 # No need to deal with sidedata writing handle as it is only
861 # relevant with revlog-v2 which is never inline, not reaching
861 # relevant with revlog-v2 which is never inline, not reaching
862 # this code
862 # this code
863 finally:
863 finally:
864 if new_dfh is not None:
864 if new_dfh is not None:
865 new_dfh.close()
865 new_dfh.close()
866 return self.index_file
866 return self.index_file
867
867
868 def get_segment_for_revs(self, startrev, endrev):
868 def get_segment_for_revs(self, startrev, endrev):
869 """Obtain a segment of raw data corresponding to a range of revisions.
869 """Obtain a segment of raw data corresponding to a range of revisions.
870
870
871 Accepts the start and end revisions and an optional already-open
871 Accepts the start and end revisions and an optional already-open
872 file handle to be used for reading. If the file handle is read, its
872 file handle to be used for reading. If the file handle is read, its
873 seek position will not be preserved.
873 seek position will not be preserved.
874
874
875 Requests for data may be satisfied by a cache.
875 Requests for data may be satisfied by a cache.
876
876
877 Returns a 2-tuple of (offset, data) for the requested range of
877 Returns a 2-tuple of (offset, data) for the requested range of
878 revisions. Offset is the integer offset from the beginning of the
878 revisions. Offset is the integer offset from the beginning of the
879 revlog and data is a str or buffer of the raw byte data.
879 revlog and data is a str or buffer of the raw byte data.
880
880
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
882 to determine where each revision's data begins and ends.
882 to determine where each revision's data begins and ends.
883
883
884 API: we should consider making this a private part of the InnerRevlog
884 API: we should consider making this a private part of the InnerRevlog
885 at some point.
885 at some point.
886 """
886 """
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
888 # (functions are expensive).
888 # (functions are expensive).
889 index = self.index
889 index = self.index
890 istart = index[startrev]
890 istart = index[startrev]
891 start = int(istart[0] >> 16)
891 start = int(istart[0] >> 16)
892 if startrev == endrev:
892 if startrev == endrev:
893 end = start + istart[1]
893 end = start + istart[1]
894 else:
894 else:
895 iend = index[endrev]
895 iend = index[endrev]
896 end = int(iend[0] >> 16) + iend[1]
896 end = int(iend[0] >> 16) + iend[1]
897
897
898 if self.inline:
898 if self.inline:
899 start += (startrev + 1) * self.index.entry_size
899 start += (startrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
901 length = end - start
901 length = end - start
902
902
903 return start, self._segmentfile.read_chunk(start, length)
903 return start, self._segmentfile.read_chunk(start, length)
904
904
905 def _chunk(self, rev):
905 def _chunk(self, rev):
906 """Obtain a single decompressed chunk for a revision.
906 """Obtain a single decompressed chunk for a revision.
907
907
908 Accepts an integer revision and an optional already-open file handle
908 Accepts an integer revision and an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will not
909 to be used for reading. If used, the seek position of the file will not
910 be preserved.
910 be preserved.
911
911
912 Returns a str holding uncompressed data for the requested revision.
912 Returns a str holding uncompressed data for the requested revision.
913 """
913 """
914 if self._uncompressed_chunk_cache is not None:
914 if self._uncompressed_chunk_cache is not None:
915 uncomp = self._uncompressed_chunk_cache.get(rev)
915 uncomp = self._uncompressed_chunk_cache.get(rev)
916 if uncomp is not None:
916 if uncomp is not None:
917 return uncomp
917 return uncomp
918
918
919 compression_mode = self.index[rev][10]
919 compression_mode = self.index[rev][10]
920 data = self.get_segment_for_revs(rev, rev)[1]
920 data = self.get_segment_for_revs(rev, rev)[1]
921 if compression_mode == COMP_MODE_PLAIN:
921 if compression_mode == COMP_MODE_PLAIN:
922 uncomp = data
922 uncomp = data
923 elif compression_mode == COMP_MODE_DEFAULT:
923 elif compression_mode == COMP_MODE_DEFAULT:
924 uncomp = self._decompressor(data)
924 uncomp = self._decompressor(data)
925 elif compression_mode == COMP_MODE_INLINE:
925 elif compression_mode == COMP_MODE_INLINE:
926 uncomp = self.decompress(data)
926 uncomp = self.decompress(data)
927 else:
927 else:
928 msg = b'unknown compression mode %d'
928 msg = b'unknown compression mode %d'
929 msg %= compression_mode
929 msg %= compression_mode
930 raise error.RevlogError(msg)
930 raise error.RevlogError(msg)
931 if self._uncompressed_chunk_cache is not None:
931 if self._uncompressed_chunk_cache is not None:
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
933 return uncomp
933 return uncomp
934
934
935 def _chunks(self, revs, targetsize=None):
935 def _chunks(self, revs, targetsize=None):
936 """Obtain decompressed chunks for the specified revisions.
936 """Obtain decompressed chunks for the specified revisions.
937
937
938 Accepts an iterable of numeric revisions that are assumed to be in
938 Accepts an iterable of numeric revisions that are assumed to be in
939 ascending order.
939 ascending order.
940
940
941 This function is similar to calling ``self._chunk()`` multiple times,
941 This function is similar to calling ``self._chunk()`` multiple times,
942 but is faster.
942 but is faster.
943
943
944 Returns a list with decompressed data for each requested revision.
944 Returns a list with decompressed data for each requested revision.
945 """
945 """
946 if not revs:
946 if not revs:
947 return []
947 return []
948 start = self.start
948 start = self.start
949 length = self.length
949 length = self.length
950 inline = self.inline
950 inline = self.inline
951 iosize = self.index.entry_size
951 iosize = self.index.entry_size
952 buffer = util.buffer
952 buffer = util.buffer
953
953
954 fetched_revs = []
954 fetched_revs = []
955 fadd = fetched_revs.append
955 fadd = fetched_revs.append
956
956
957 chunks = []
957 chunks = []
958 ladd = chunks.append
958 ladd = chunks.append
959
959
960 if self._uncompressed_chunk_cache is None:
960 if self._uncompressed_chunk_cache is None:
961 fetched_revs = revs
961 fetched_revs = revs
962 else:
962 else:
963 for rev in revs:
963 for rev in revs:
964 cached_value = self._uncompressed_chunk_cache.get(rev)
964 cached_value = self._uncompressed_chunk_cache.get(rev)
965 if cached_value is None:
965 if cached_value is None:
966 fadd(rev)
966 fadd(rev)
967 else:
967 else:
968 ladd((rev, cached_value))
968 ladd((rev, cached_value))
969
969
970 if not fetched_revs:
970 if not fetched_revs:
971 slicedchunks = ()
971 slicedchunks = ()
972 elif not self.data_config.with_sparse_read:
972 elif not self.data_config.with_sparse_read:
973 slicedchunks = (fetched_revs,)
973 slicedchunks = (fetched_revs,)
974 else:
974 else:
975 slicedchunks = deltautil.slicechunk(
975 slicedchunks = deltautil.slicechunk(
976 self,
976 self,
977 fetched_revs,
977 fetched_revs,
978 targetsize=targetsize,
978 targetsize=targetsize,
979 )
979 )
980
980
981 for revschunk in slicedchunks:
981 for revschunk in slicedchunks:
982 firstrev = revschunk[0]
982 firstrev = revschunk[0]
983 # Skip trailing revisions with empty diff
983 # Skip trailing revisions with empty diff
984 for lastrev in revschunk[::-1]:
984 for lastrev in revschunk[::-1]:
985 if length(lastrev) != 0:
985 if length(lastrev) != 0:
986 break
986 break
987
987
988 try:
988 try:
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
990 except OverflowError:
990 except OverflowError:
991 # issue4215 - we can't cache a run of chunks greater than
991 # issue4215 - we can't cache a run of chunks greater than
992 # 2G on Windows
992 # 2G on Windows
993 for rev in revschunk:
993 for rev in revschunk:
994 ladd((rev, self._chunk(rev)))
994 ladd((rev, self._chunk(rev)))
995
995
996 decomp = self.decompress
996 decomp = self.decompress
997 # self._decompressor might be None, but will not be used in that case
997 # self._decompressor might be None, but will not be used in that case
998 def_decomp = self._decompressor
998 def_decomp = self._decompressor
999 for rev in revschunk:
999 for rev in revschunk:
1000 chunkstart = start(rev)
1000 chunkstart = start(rev)
1001 if inline:
1001 if inline:
1002 chunkstart += (rev + 1) * iosize
1002 chunkstart += (rev + 1) * iosize
1003 chunklength = length(rev)
1003 chunklength = length(rev)
1004 comp_mode = self.index[rev][10]
1004 comp_mode = self.index[rev][10]
1005 c = buffer(data, chunkstart - offset, chunklength)
1005 c = buffer(data, chunkstart - offset, chunklength)
1006 if comp_mode == COMP_MODE_PLAIN:
1006 if comp_mode == COMP_MODE_PLAIN:
1007 c = c
1007 c = c
1008 elif comp_mode == COMP_MODE_INLINE:
1008 elif comp_mode == COMP_MODE_INLINE:
1009 c = decomp(c)
1009 c = decomp(c)
1010 elif comp_mode == COMP_MODE_DEFAULT:
1010 elif comp_mode == COMP_MODE_DEFAULT:
1011 c = def_decomp(c)
1011 c = def_decomp(c)
1012 else:
1012 else:
1013 msg = b'unknown compression mode %d'
1013 msg = b'unknown compression mode %d'
1014 msg %= comp_mode
1014 msg %= comp_mode
1015 raise error.RevlogError(msg)
1015 raise error.RevlogError(msg)
1016 ladd((rev, c))
1016 ladd((rev, c))
1017 if self._uncompressed_chunk_cache is not None:
1017 if self._uncompressed_chunk_cache is not None:
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1019
1019
1020 chunks.sort()
1020 chunks.sort()
1021 return [x[1] for x in chunks]
1021 return [x[1] for x in chunks]
1022
1022
1023 def raw_text(self, node, rev) -> bytes:
1023 def raw_text(self, node, rev) -> bytes:
1024 """return the possibly unvalidated rawtext for a revision
1024 """return the possibly unvalidated rawtext for a revision
1025
1025
1026 returns rawtext
1026 returns rawtext
1027 """
1027 """
1028
1028
1029 # revision in the cache (could be useful to apply delta)
1029 # revision in the cache (could be useful to apply delta)
1030 cachedrev = None
1030 cachedrev = None
1031 # An intermediate text to apply deltas to
1031 # An intermediate text to apply deltas to
1032 basetext = None
1032 basetext = None
1033
1033
1034 # Check if we have the entry in cache
1034 # Check if we have the entry in cache
1035 # The cache entry looks like (node, rev, rawtext)
1035 # The cache entry looks like (node, rev, rawtext)
1036 if self._revisioncache:
1036 if self._revisioncache:
1037 cachedrev = self._revisioncache[1]
1037 cachedrev = self._revisioncache[1]
1038
1038
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1040 if stopped:
1040 if stopped:
1041 basetext = self._revisioncache[2]
1041 basetext = self._revisioncache[2]
1042
1042
1043 # drop cache to save memory, the caller is expected to
1043 # drop cache to save memory, the caller is expected to
1044 # update self._inner._revisioncache after validating the text
1044 # update self._inner._revisioncache after validating the text
1045 self._revisioncache = None
1045 self._revisioncache = None
1046
1046
1047 targetsize = None
1047 targetsize = None
1048 rawsize = self.index[rev][2]
1048 rawsize = self.index[rev][2]
1049 if 0 <= rawsize:
1049 if 0 <= rawsize:
1050 targetsize = 4 * rawsize
1050 targetsize = 4 * rawsize
1051
1051
1052 if self._uncompressed_chunk_cache is not None:
1052 if self._uncompressed_chunk_cache is not None:
1053 # dynamically update the uncompressed_chunk_cache size to the
1053 # dynamically update the uncompressed_chunk_cache size to the
1054 # largest revision we saw in this revlog.
1054 # largest revision we saw in this revlog.
1055 factor = self.data_config.uncompressed_cache_factor
1055 factor = self.data_config.uncompressed_cache_factor
1056 candidate_size = rawsize * factor
1056 candidate_size = rawsize * factor
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1059
1059
1060 bins = self._chunks(chain, targetsize=targetsize)
1060 bins = self._chunks(chain, targetsize=targetsize)
1061 if basetext is None:
1061 if basetext is None:
1062 basetext = bytes(bins[0])
1062 basetext = bytes(bins[0])
1063 bins = bins[1:]
1063 bins = bins[1:]
1064
1064
1065 rawtext = mdiff.patches(basetext, bins)
1065 rawtext = mdiff.patches(basetext, bins)
1066 del basetext # let us have a chance to free memory early
1066 del basetext # let us have a chance to free memory early
1067 return rawtext
1067 return rawtext
1068
1068
1069 def sidedata(self, rev, sidedata_end):
1069 def sidedata(self, rev, sidedata_end):
1070 """Return the sidedata for a given revision number."""
1070 """Return the sidedata for a given revision number."""
1071 index_entry = self.index[rev]
1071 index_entry = self.index[rev]
1072 sidedata_offset = index_entry[8]
1072 sidedata_offset = index_entry[8]
1073 sidedata_size = index_entry[9]
1073 sidedata_size = index_entry[9]
1074
1074
1075 if self.inline:
1075 if self.inline:
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1077 if sidedata_size == 0:
1077 if sidedata_size == 0:
1078 return {}
1078 return {}
1079
1079
1080 if sidedata_end < sidedata_offset + sidedata_size:
1080 if sidedata_end < sidedata_offset + sidedata_size:
1081 filename = self.sidedata_file
1081 filename = self.sidedata_file
1082 end = sidedata_end
1082 end = sidedata_end
1083 offset = sidedata_offset
1083 offset = sidedata_offset
1084 length = sidedata_size
1084 length = sidedata_size
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1086 raise error.RevlogError(m)
1086 raise error.RevlogError(m)
1087
1087
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1089 sidedata_offset, sidedata_size
1089 sidedata_offset, sidedata_size
1090 )
1090 )
1091
1091
1092 comp = self.index[rev][11]
1092 comp = self.index[rev][11]
1093 if comp == COMP_MODE_PLAIN:
1093 if comp == COMP_MODE_PLAIN:
1094 segment = comp_segment
1094 segment = comp_segment
1095 elif comp == COMP_MODE_DEFAULT:
1095 elif comp == COMP_MODE_DEFAULT:
1096 segment = self._decompressor(comp_segment)
1096 segment = self._decompressor(comp_segment)
1097 elif comp == COMP_MODE_INLINE:
1097 elif comp == COMP_MODE_INLINE:
1098 segment = self.decompress(comp_segment)
1098 segment = self.decompress(comp_segment)
1099 else:
1099 else:
1100 msg = b'unknown compression mode %d'
1100 msg = b'unknown compression mode %d'
1101 msg %= comp
1101 msg %= comp
1102 raise error.RevlogError(msg)
1102 raise error.RevlogError(msg)
1103
1103
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1105 return sidedata
1105 return sidedata
1106
1106
1107 def write_entry(
1107 def write_entry(
1108 self,
1108 self,
1109 transaction,
1109 transaction,
1110 entry,
1110 entry,
1111 data,
1111 data,
1112 link,
1112 link,
1113 offset,
1113 offset,
1114 sidedata,
1114 sidedata,
1115 sidedata_offset,
1115 sidedata_offset,
1116 index_end,
1116 index_end,
1117 data_end,
1117 data_end,
1118 sidedata_end,
1118 sidedata_end,
1119 ):
1119 ):
1120 # Files opened in a+ mode have inconsistent behavior on various
1120 # Files opened in a+ mode have inconsistent behavior on various
1121 # platforms. Windows requires that a file positioning call be made
1121 # platforms. Windows requires that a file positioning call be made
1122 # when the file handle transitions between reads and writes. See
1122 # when the file handle transitions between reads and writes. See
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1124 # platforms, Python or the platform itself can be buggy. Some versions
1124 # platforms, Python or the platform itself can be buggy. Some versions
1125 # of Solaris have been observed to not append at the end of the file
1125 # of Solaris have been observed to not append at the end of the file
1126 # if the file was seeked to before the end. See issue4943 for more.
1126 # if the file was seeked to before the end. See issue4943 for more.
1127 #
1127 #
1128 # We work around this issue by inserting a seek() before writing.
1128 # We work around this issue by inserting a seek() before writing.
1129 # Note: This is likely not necessary on Python 3. However, because
1129 # Note: This is likely not necessary on Python 3. However, because
1130 # the file handle is reused for reads and may be seeked there, we need
1130 # the file handle is reused for reads and may be seeked there, we need
1131 # to be careful before changing this.
1131 # to be careful before changing this.
1132 if self._writinghandles is None:
1132 if self._writinghandles is None:
1133 msg = b'adding revision outside `revlog._writing` context'
1133 msg = b'adding revision outside `revlog._writing` context'
1134 raise error.ProgrammingError(msg)
1134 raise error.ProgrammingError(msg)
1135 ifh, dfh, sdfh = self._writinghandles
1135 ifh, dfh, sdfh = self._writinghandles
1136 if index_end is None:
1136 if index_end is None:
1137 ifh.seek(0, os.SEEK_END)
1137 ifh.seek(0, os.SEEK_END)
1138 else:
1138 else:
1139 ifh.seek(index_end, os.SEEK_SET)
1139 ifh.seek(index_end, os.SEEK_SET)
1140 if dfh:
1140 if dfh:
1141 if data_end is None:
1141 if data_end is None:
1142 dfh.seek(0, os.SEEK_END)
1142 dfh.seek(0, os.SEEK_END)
1143 else:
1143 else:
1144 dfh.seek(data_end, os.SEEK_SET)
1144 dfh.seek(data_end, os.SEEK_SET)
1145 if sdfh:
1145 if sdfh:
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1147
1147
1148 curr = len(self.index) - 1
1148 curr = len(self.index) - 1
1149 if not self.inline:
1149 if not self.inline:
1150 transaction.add(self.data_file, offset)
1150 transaction.add(self.data_file, offset)
1151 if self.sidedata_file:
1151 if self.sidedata_file:
1152 transaction.add(self.sidedata_file, sidedata_offset)
1152 transaction.add(self.sidedata_file, sidedata_offset)
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1154 if data[0]:
1154 if data[0]:
1155 dfh.write(data[0])
1155 dfh.write(data[0])
1156 dfh.write(data[1])
1156 dfh.write(data[1])
1157 if sidedata:
1157 if sidedata:
1158 sdfh.write(sidedata)
1158 sdfh.write(sidedata)
1159 if self._delay_buffer is None:
1159 if self._delay_buffer is None:
1160 ifh.write(entry)
1160 ifh.write(entry)
1161 else:
1161 else:
1162 self._delay_buffer.append(entry)
1162 self._delay_buffer.append(entry)
1163 elif self._delay_buffer is not None:
1163 elif self._delay_buffer is not None:
1164 msg = b'invalid delayed write on inline revlog'
1164 msg = b'invalid delayed write on inline revlog'
1165 raise error.ProgrammingError(msg)
1165 raise error.ProgrammingError(msg)
1166 else:
1166 else:
1167 offset += curr * self.index.entry_size
1167 offset += curr * self.index.entry_size
1168 transaction.add(self.canonical_index_file, offset)
1168 transaction.add(self.canonical_index_file, offset)
1169 assert not sidedata
1169 assert not sidedata
1170 ifh.write(entry)
1170 ifh.write(entry)
1171 ifh.write(data[0])
1171 ifh.write(data[0])
1172 ifh.write(data[1])
1172 ifh.write(data[1])
1173 return (
1173 return (
1174 ifh.tell(),
1174 ifh.tell(),
1175 dfh.tell() if dfh else None,
1175 dfh.tell() if dfh else None,
1176 sdfh.tell() if sdfh else None,
1176 sdfh.tell() if sdfh else None,
1177 )
1177 )
1178
1178
1179 def _divert_index(self):
1179 def _divert_index(self):
1180 index_file = self.index_file
1180 index_file = self.index_file
1181 # when we encounter a legacy inline-changelog, split it. However it is
1181 # when we encounter a legacy inline-changelog, split it. However it is
1182 # important to use the expected filename for pending content
1182 # important to use the expected filename for pending content
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1184 # pending transaction.
1184 # pending transaction.
1185 if index_file.endswith(b'.s'):
1185 if index_file.endswith(b'.s'):
1186 index_file = self.index_file[:-2]
1186 index_file = self.index_file[:-2]
1187 return index_file + b'.a'
1187 return index_file + b'.a'
1188
1188
1189 def delay(self):
1189 def delay(self):
1190 assert not self.is_open
1190 assert not self.is_open
1191 if self.inline:
1191 if self.inline:
1192 msg = "revlog with delayed write should not be inline"
1192 msg = "revlog with delayed write should not be inline"
1193 raise error.ProgrammingError(msg)
1193 raise error.ProgrammingError(msg)
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1195 # delay or divert already in place
1195 # delay or divert already in place
1196 return None
1196 return None
1197 elif len(self.index) == 0:
1197 elif len(self.index) == 0:
1198 self._orig_index_file = self.index_file
1198 self._orig_index_file = self.index_file
1199 self.index_file = self._divert_index()
1199 self.index_file = self._divert_index()
1200 assert self._orig_index_file is not None
1200 assert self._orig_index_file is not None
1201 assert self.index_file is not None
1201 assert self.index_file is not None
1202 if self.opener.exists(self.index_file):
1202 if self.opener.exists(self.index_file):
1203 self.opener.unlink(self.index_file)
1203 self.opener.unlink(self.index_file)
1204 return self.index_file
1204 return self.index_file
1205 else:
1205 else:
1206 self._delay_buffer = []
1206 self._delay_buffer = []
1207 return None
1207 return None
1208
1208
1209 def write_pending(self):
1209 def write_pending(self):
1210 assert not self.is_open
1210 assert not self.is_open
1211 if self.inline:
1211 if self.inline:
1212 msg = "revlog with delayed write should not be inline"
1212 msg = "revlog with delayed write should not be inline"
1213 raise error.ProgrammingError(msg)
1213 raise error.ProgrammingError(msg)
1214 if self._orig_index_file is not None:
1214 if self._orig_index_file is not None:
1215 return None, True
1215 return None, True
1216 any_pending = False
1216 any_pending = False
1217 pending_index_file = self._divert_index()
1217 pending_index_file = self._divert_index()
1218 if self.opener.exists(pending_index_file):
1218 if self.opener.exists(pending_index_file):
1219 self.opener.unlink(pending_index_file)
1219 self.opener.unlink(pending_index_file)
1220 util.copyfile(
1220 util.copyfile(
1221 self.opener.join(self.index_file),
1221 self.opener.join(self.index_file),
1222 self.opener.join(pending_index_file),
1222 self.opener.join(pending_index_file),
1223 )
1223 )
1224 if self._delay_buffer:
1224 if self._delay_buffer:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1226 ifh.seek(0, os.SEEK_END)
1226 ifh.seek(0, os.SEEK_END)
1227 ifh.write(b"".join(self._delay_buffer))
1227 ifh.write(b"".join(self._delay_buffer))
1228 any_pending = True
1228 any_pending = True
1229 self._delay_buffer = None
1229 self._delay_buffer = None
1230 self._orig_index_file = self.index_file
1230 self._orig_index_file = self.index_file
1231 self.index_file = pending_index_file
1231 self.index_file = pending_index_file
1232 return self.index_file, any_pending
1232 return self.index_file, any_pending
1233
1233
1234 def finalize_pending(self):
1234 def finalize_pending(self):
1235 assert not self.is_open
1235 assert not self.is_open
1236 if self.inline:
1236 if self.inline:
1237 msg = "revlog with delayed write should not be inline"
1237 msg = "revlog with delayed write should not be inline"
1238 raise error.ProgrammingError(msg)
1238 raise error.ProgrammingError(msg)
1239
1239
1240 delay = self._delay_buffer is not None
1240 delay = self._delay_buffer is not None
1241 divert = self._orig_index_file is not None
1241 divert = self._orig_index_file is not None
1242
1242
1243 if delay and divert:
1243 if delay and divert:
1244 assert False, "unreachable"
1244 assert False, "unreachable"
1245 elif delay:
1245 elif delay:
1246 if self._delay_buffer:
1246 if self._delay_buffer:
1247 with self.opener(self.index_file, b'r+') as ifh:
1247 with self.opener(self.index_file, b'r+') as ifh:
1248 ifh.seek(0, os.SEEK_END)
1248 ifh.seek(0, os.SEEK_END)
1249 ifh.write(b"".join(self._delay_buffer))
1249 ifh.write(b"".join(self._delay_buffer))
1250 self._delay_buffer = None
1250 self._delay_buffer = None
1251 elif divert:
1251 elif divert:
1252 if self.opener.exists(self.index_file):
1252 if self.opener.exists(self.index_file):
1253 self.opener.rename(
1253 self.opener.rename(
1254 self.index_file,
1254 self.index_file,
1255 self._orig_index_file,
1255 self._orig_index_file,
1256 checkambig=True,
1256 checkambig=True,
1257 )
1257 )
1258 self.index_file = self._orig_index_file
1258 self.index_file = self._orig_index_file
1259 self._orig_index_file = None
1259 self._orig_index_file = None
1260 else:
1260 else:
1261 msg = b"not delay or divert found on this revlog"
1261 msg = b"not delay or divert found on this revlog"
1262 raise error.ProgrammingError(msg)
1262 raise error.ProgrammingError(msg)
1263 return self.canonical_index_file
1263 return self.canonical_index_file
1264
1264
1265
1265
1266 class revlog:
1266 class revlog:
1267 """
1267 """
1268 the underlying revision storage object
1268 the underlying revision storage object
1269
1269
1270 A revlog consists of two parts, an index and the revision data.
1270 A revlog consists of two parts, an index and the revision data.
1271
1271
1272 The index is a file with a fixed record size containing
1272 The index is a file with a fixed record size containing
1273 information on each revision, including its nodeid (hash), the
1273 information on each revision, including its nodeid (hash), the
1274 nodeids of its parents, the position and offset of its data within
1274 nodeids of its parents, the position and offset of its data within
1275 the data file, and the revision it's based on. Finally, each entry
1275 the data file, and the revision it's based on. Finally, each entry
1276 contains a linkrev entry that can serve as a pointer to external
1276 contains a linkrev entry that can serve as a pointer to external
1277 data.
1277 data.
1278
1278
1279 The revision data itself is a linear collection of data chunks.
1279 The revision data itself is a linear collection of data chunks.
1280 Each chunk represents a revision and is usually represented as a
1280 Each chunk represents a revision and is usually represented as a
1281 delta against the previous chunk. To bound lookup time, runs of
1281 delta against the previous chunk. To bound lookup time, runs of
1282 deltas are limited to about 2 times the length of the original
1282 deltas are limited to about 2 times the length of the original
1283 version data. This makes retrieval of a version proportional to
1283 version data. This makes retrieval of a version proportional to
1284 its size, or O(1) relative to the number of revisions.
1284 its size, or O(1) relative to the number of revisions.
1285
1285
1286 Both pieces of the revlog are written to in an append-only
1286 Both pieces of the revlog are written to in an append-only
1287 fashion, which means we never need to rewrite a file to insert or
1287 fashion, which means we never need to rewrite a file to insert or
1288 remove data, and can use some simple techniques to avoid the need
1288 remove data, and can use some simple techniques to avoid the need
1289 for locking while reading.
1289 for locking while reading.
1290
1290
1291 If checkambig, indexfile is opened with checkambig=True at
1291 If checkambig, indexfile is opened with checkambig=True at
1292 writing, to avoid file stat ambiguity.
1292 writing, to avoid file stat ambiguity.
1293
1293
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1295 index will be mmapped rather than read if it is larger than the
1295 index will be mmapped rather than read if it is larger than the
1296 configured threshold.
1296 configured threshold.
1297
1297
1298 If censorable is True, the revlog can have censored revisions.
1298 If censorable is True, the revlog can have censored revisions.
1299
1299
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1301 compression for the data content.
1301 compression for the data content.
1302
1302
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1304 file handle, a filename, and an expected position. It should check whether
1304 file handle, a filename, and an expected position. It should check whether
1305 the current position in the file handle is valid, and log/warn/fail (by
1305 the current position in the file handle is valid, and log/warn/fail (by
1306 raising).
1306 raising).
1307
1307
1308 See mercurial/revlogutils/contants.py for details about the content of an
1308 See mercurial/revlogutils/contants.py for details about the content of an
1309 index entry.
1309 index entry.
1310 """
1310 """
1311
1311
1312 _flagserrorclass = error.RevlogError
1312 _flagserrorclass = error.RevlogError
1313 _inner: "_InnerRevlog"
1313
1314
1314 opener: vfsmod.vfs
1315 opener: vfsmod.vfs
1315
1316
1316 @staticmethod
1317 @staticmethod
1317 def is_inline_index(header_bytes):
1318 def is_inline_index(header_bytes):
1318 """Determine if a revlog is inline from the initial bytes of the index"""
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1319 if len(header_bytes) == 0:
1320 if len(header_bytes) == 0:
1320 return True
1321 return True
1321
1322
1322 header = INDEX_HEADER.unpack(header_bytes)[0]
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1323
1324
1324 _format_flags = header & ~0xFFFF
1325 _format_flags = header & ~0xFFFF
1325 _format_version = header & 0xFFFF
1326 _format_version = header & 0xFFFF
1326
1327
1327 features = FEATURES_BY_VERSION[_format_version]
1328 features = FEATURES_BY_VERSION[_format_version]
1328 return features[b'inline'](_format_flags)
1329 return features[b'inline'](_format_flags)
1329
1330
1330 _docket_file: Optional[bytes]
1331 _docket_file: Optional[bytes]
1331
1332
1332 def __init__(
1333 def __init__(
1333 self,
1334 self,
1334 opener: vfsmod.vfs,
1335 opener: vfsmod.vfs,
1335 target,
1336 target,
1336 radix,
1337 radix,
1337 postfix=None, # only exist for `tmpcensored` now
1338 postfix=None, # only exist for `tmpcensored` now
1338 checkambig=False,
1339 checkambig=False,
1339 mmaplargeindex=False,
1340 mmaplargeindex=False,
1340 censorable=False,
1341 censorable=False,
1341 upperboundcomp=None,
1342 upperboundcomp=None,
1342 persistentnodemap=False,
1343 persistentnodemap=False,
1343 concurrencychecker=None,
1344 concurrencychecker=None,
1344 trypending=False,
1345 trypending=False,
1345 try_split=False,
1346 try_split=False,
1346 canonical_parent_order=True,
1347 canonical_parent_order=True,
1347 data_config=None,
1348 data_config=None,
1348 delta_config=None,
1349 delta_config=None,
1349 feature_config=None,
1350 feature_config=None,
1350 may_inline=True, # may inline new revlog
1351 may_inline=True, # may inline new revlog
1351 ):
1352 ):
1352 """
1353 """
1353 create a revlog object
1354 create a revlog object
1354
1355
1355 opener is a function that abstracts the file opening operation
1356 opener is a function that abstracts the file opening operation
1356 and can be used to implement COW semantics or the like.
1357 and can be used to implement COW semantics or the like.
1357
1358
1358 `target`: a (KIND, ID) tuple that identify the content stored in
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1359 this revlog. It help the rest of the code to understand what the revlog
1360 this revlog. It help the rest of the code to understand what the revlog
1360 is about without having to resort to heuristic and index filename
1361 is about without having to resort to heuristic and index filename
1361 analysis. Note: that this must be reliably be set by normal code, but
1362 analysis. Note: that this must be reliably be set by normal code, but
1362 that test, debug, or performance measurement code might not set this to
1363 that test, debug, or performance measurement code might not set this to
1363 accurate value.
1364 accurate value.
1364 """
1365 """
1365
1366
1366 self.radix = radix
1367 self.radix = radix
1367
1368
1368 self._docket_file = None
1369 self._docket_file = None
1369 self._indexfile = None
1370 self._indexfile = None
1370 self._datafile = None
1371 self._datafile = None
1371 self._sidedatafile = None
1372 self._sidedatafile = None
1372 self._nodemap_file = None
1373 self._nodemap_file = None
1373 self.postfix = postfix
1374 self.postfix = postfix
1374 self._trypending = trypending
1375 self._trypending = trypending
1375 self._try_split = try_split
1376 self._try_split = try_split
1376 self._may_inline = may_inline
1377 self._may_inline = may_inline
1377 self.opener = opener
1378 self.opener = opener
1378 if persistentnodemap:
1379 if persistentnodemap:
1379 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1380
1381
1381 assert target[0] in ALL_KINDS
1382 assert target[0] in ALL_KINDS
1382 assert len(target) == 2
1383 assert len(target) == 2
1383 self.target = target
1384 self.target = target
1384 if feature_config is not None:
1385 if feature_config is not None:
1385 self.feature_config = feature_config.copy()
1386 self.feature_config = feature_config.copy()
1386 elif b'feature-config' in self.opener.options:
1387 elif b'feature-config' in self.opener.options:
1387 self.feature_config = self.opener.options[b'feature-config'].copy()
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1388 else:
1389 else:
1389 self.feature_config = FeatureConfig()
1390 self.feature_config = FeatureConfig()
1390 self.feature_config.censorable = censorable
1391 self.feature_config.censorable = censorable
1391 self.feature_config.canonical_parent_order = canonical_parent_order
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1392 if data_config is not None:
1393 if data_config is not None:
1393 self.data_config = data_config.copy()
1394 self.data_config = data_config.copy()
1394 elif b'data-config' in self.opener.options:
1395 elif b'data-config' in self.opener.options:
1395 self.data_config = self.opener.options[b'data-config'].copy()
1396 self.data_config = self.opener.options[b'data-config'].copy()
1396 else:
1397 else:
1397 self.data_config = DataConfig()
1398 self.data_config = DataConfig()
1398 self.data_config.check_ambig = checkambig
1399 self.data_config.check_ambig = checkambig
1399 self.data_config.mmap_large_index = mmaplargeindex
1400 self.data_config.mmap_large_index = mmaplargeindex
1400 if delta_config is not None:
1401 if delta_config is not None:
1401 self.delta_config = delta_config.copy()
1402 self.delta_config = delta_config.copy()
1402 elif b'delta-config' in self.opener.options:
1403 elif b'delta-config' in self.opener.options:
1403 self.delta_config = self.opener.options[b'delta-config'].copy()
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1404 else:
1405 else:
1405 self.delta_config = DeltaConfig()
1406 self.delta_config = DeltaConfig()
1406 self.delta_config.upper_bound_comp = upperboundcomp
1407 self.delta_config.upper_bound_comp = upperboundcomp
1407
1408
1408 # Maps rev to chain base rev.
1409 # Maps rev to chain base rev.
1409 self._chainbasecache = util.lrucachedict(100)
1410 self._chainbasecache = util.lrucachedict(100)
1410
1411
1411 self.index = None
1412 self.index = None
1412 self._docket = None
1413 self._docket = None
1413 self._nodemap_docket = None
1414 self._nodemap_docket = None
1414 # Mapping of partial identifiers to full nodes.
1415 # Mapping of partial identifiers to full nodes.
1415 self._pcache = {}
1416 self._pcache = {}
1416
1417
1417 # other optionnals features
1418 # other optionnals features
1418
1419
1419 # Make copy of flag processors so each revlog instance can support
1420 # Make copy of flag processors so each revlog instance can support
1420 # custom flags.
1421 # custom flags.
1421 self._flagprocessors = dict(flagutil.flagprocessors)
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1422 # prevent nesting of addgroup
1423 # prevent nesting of addgroup
1423 self._adding_group = None
1424 self._adding_group = None
1424
1425
1425 chunk_cache = self._loadindex()
1426 chunk_cache = self._loadindex()
1426 self._load_inner(chunk_cache)
1427 self._load_inner(chunk_cache)
1427 self._concurrencychecker = concurrencychecker
1428 self._concurrencychecker = concurrencychecker
1428
1429
1429 def _init_opts(self):
1430 def _init_opts(self):
1430 """process options (from above/config) to setup associated default revlog mode
1431 """process options (from above/config) to setup associated default revlog mode
1431
1432
1432 These values might be affected when actually reading on disk information.
1433 These values might be affected when actually reading on disk information.
1433
1434
1434 The relevant values are returned for use in _loadindex().
1435 The relevant values are returned for use in _loadindex().
1435
1436
1436 * newversionflags:
1437 * newversionflags:
1437 version header to use if we need to create a new revlog
1438 version header to use if we need to create a new revlog
1438
1439
1439 * mmapindexthreshold:
1440 * mmapindexthreshold:
1440 minimal index size for start to use mmap
1441 minimal index size for start to use mmap
1441
1442
1442 * force_nodemap:
1443 * force_nodemap:
1443 force the usage of a "development" version of the nodemap code
1444 force the usage of a "development" version of the nodemap code
1444 """
1445 """
1445 opts = self.opener.options
1446 opts = self.opener.options
1446
1447
1447 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1448 new_header = CHANGELOGV2
1449 new_header = CHANGELOGV2
1449 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1450 self.feature_config.compute_rank = compute_rank
1451 self.feature_config.compute_rank = compute_rank
1451 elif b'revlogv2' in opts:
1452 elif b'revlogv2' in opts:
1452 new_header = REVLOGV2
1453 new_header = REVLOGV2
1453 elif b'revlogv1' in opts:
1454 elif b'revlogv1' in opts:
1454 new_header = REVLOGV1
1455 new_header = REVLOGV1
1455 if self._may_inline:
1456 if self._may_inline:
1456 new_header |= FLAG_INLINE_DATA
1457 new_header |= FLAG_INLINE_DATA
1457 if b'generaldelta' in opts:
1458 if b'generaldelta' in opts:
1458 new_header |= FLAG_GENERALDELTA
1459 new_header |= FLAG_GENERALDELTA
1459 elif b'revlogv0' in self.opener.options:
1460 elif b'revlogv0' in self.opener.options:
1460 new_header = REVLOGV0
1461 new_header = REVLOGV0
1461 else:
1462 else:
1462 new_header = REVLOG_DEFAULT_VERSION
1463 new_header = REVLOG_DEFAULT_VERSION
1463
1464
1464 mmapindexthreshold = None
1465 mmapindexthreshold = None
1465 if self.data_config.mmap_large_index:
1466 if self.data_config.mmap_large_index:
1466 mmapindexthreshold = self.data_config.mmap_index_threshold
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1467 if self.feature_config.enable_ellipsis:
1468 if self.feature_config.enable_ellipsis:
1468 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1469
1470
1470 # revlog v0 doesn't have flag processors
1471 # revlog v0 doesn't have flag processors
1471 for flag, processor in opts.get(b'flagprocessors', {}).items():
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1472 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1473
1474
1474 chunk_cache_size = self.data_config.chunk_cache_size
1475 chunk_cache_size = self.data_config.chunk_cache_size
1475 if chunk_cache_size <= 0:
1476 if chunk_cache_size <= 0:
1476 raise error.RevlogError(
1477 raise error.RevlogError(
1477 _(b'revlog chunk cache size %r is not greater than 0')
1478 _(b'revlog chunk cache size %r is not greater than 0')
1478 % chunk_cache_size
1479 % chunk_cache_size
1479 )
1480 )
1480 elif chunk_cache_size & (chunk_cache_size - 1):
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1481 raise error.RevlogError(
1482 raise error.RevlogError(
1482 _(b'revlog chunk cache size %r is not a power of 2')
1483 _(b'revlog chunk cache size %r is not a power of 2')
1483 % chunk_cache_size
1484 % chunk_cache_size
1484 )
1485 )
1485 force_nodemap = opts.get(b'devel-force-nodemap', False)
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1486 return new_header, mmapindexthreshold, force_nodemap
1487 return new_header, mmapindexthreshold, force_nodemap
1487
1488
1488 def _get_data(self, filepath, mmap_threshold, size=None):
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1489 """return a file content with or without mmap
1490 """return a file content with or without mmap
1490
1491
1491 If the file is missing return the empty string"""
1492 If the file is missing return the empty string"""
1492 try:
1493 try:
1493 with self.opener(filepath) as fp:
1494 with self.opener(filepath) as fp:
1494 if mmap_threshold is not None:
1495 if mmap_threshold is not None:
1495 file_size = self.opener.fstat(fp).st_size
1496 file_size = self.opener.fstat(fp).st_size
1496 if (
1497 if (
1497 file_size >= mmap_threshold
1498 file_size >= mmap_threshold
1498 and self.opener.is_mmap_safe(filepath)
1499 and self.opener.is_mmap_safe(filepath)
1499 ):
1500 ):
1500 if size is not None:
1501 if size is not None:
1501 # avoid potentiel mmap crash
1502 # avoid potentiel mmap crash
1502 size = min(file_size, size)
1503 size = min(file_size, size)
1503 # TODO: should .close() to release resources without
1504 # TODO: should .close() to release resources without
1504 # relying on Python GC
1505 # relying on Python GC
1505 if size is None:
1506 if size is None:
1506 return util.buffer(util.mmapread(fp))
1507 return util.buffer(util.mmapread(fp))
1507 else:
1508 else:
1508 return util.buffer(util.mmapread(fp, size))
1509 return util.buffer(util.mmapread(fp, size))
1509 if size is None:
1510 if size is None:
1510 return fp.read()
1511 return fp.read()
1511 else:
1512 else:
1512 return fp.read(size)
1513 return fp.read(size)
1513 except FileNotFoundError:
1514 except FileNotFoundError:
1514 return b''
1515 return b''
1515
1516
1516 def get_streams(self, max_linkrev, force_inline=False):
1517 def get_streams(self, max_linkrev, force_inline=False):
1517 """return a list of streams that represent this revlog
1518 """return a list of streams that represent this revlog
1518
1519
1519 This is used by stream-clone to do bytes to bytes copies of a repository.
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1520
1521
1521 This streams data for all revisions that refer to a changelog revision up
1522 This streams data for all revisions that refer to a changelog revision up
1522 to `max_linkrev`.
1523 to `max_linkrev`.
1523
1524
1524 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1525
1526
1526 It returns is a list of three-tuple:
1527 It returns is a list of three-tuple:
1527
1528
1528 [
1529 [
1529 (filename, bytes_stream, stream_size),
1530 (filename, bytes_stream, stream_size),
1530 …
1531 …
1531 ]
1532 ]
1532 """
1533 """
1533 n = len(self)
1534 n = len(self)
1534 index = self.index
1535 index = self.index
1535 while n > 0:
1536 while n > 0:
1536 linkrev = index[n - 1][4]
1537 linkrev = index[n - 1][4]
1537 if linkrev < max_linkrev:
1538 if linkrev < max_linkrev:
1538 break
1539 break
1539 # note: this loop will rarely go through multiple iterations, since
1540 # note: this loop will rarely go through multiple iterations, since
1540 # it only traverses commits created during the current streaming
1541 # it only traverses commits created during the current streaming
1541 # pull operation.
1542 # pull operation.
1542 #
1543 #
1543 # If this become a problem, using a binary search should cap the
1544 # If this become a problem, using a binary search should cap the
1544 # runtime of this.
1545 # runtime of this.
1545 n = n - 1
1546 n = n - 1
1546 if n == 0:
1547 if n == 0:
1547 # no data to send
1548 # no data to send
1548 return []
1549 return []
1549 index_size = n * index.entry_size
1550 index_size = n * index.entry_size
1550 data_size = self.end(n - 1)
1551 data_size = self.end(n - 1)
1551
1552
1552 # XXX we might have been split (or stripped) since the object
1553 # XXX we might have been split (or stripped) since the object
1553 # initialization, We need to close this race too, but having a way to
1554 # initialization, We need to close this race too, but having a way to
1554 # pre-open the file we feed to the revlog and never closing them before
1555 # pre-open the file we feed to the revlog and never closing them before
1555 # we are done streaming.
1556 # we are done streaming.
1556
1557
1557 if self._inline:
1558 if self._inline:
1558
1559
1559 def get_stream():
1560 def get_stream():
1560 with self.opener(self._indexfile, mode=b"r") as fp:
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1561 yield None
1562 yield None
1562 size = index_size + data_size
1563 size = index_size + data_size
1563 if size <= 65536:
1564 if size <= 65536:
1564 yield fp.read(size)
1565 yield fp.read(size)
1565 else:
1566 else:
1566 yield from util.filechunkiter(fp, limit=size)
1567 yield from util.filechunkiter(fp, limit=size)
1567
1568
1568 inline_stream = get_stream()
1569 inline_stream = get_stream()
1569 next(inline_stream)
1570 next(inline_stream)
1570 return [
1571 return [
1571 (self._indexfile, inline_stream, index_size + data_size),
1572 (self._indexfile, inline_stream, index_size + data_size),
1572 ]
1573 ]
1573 elif force_inline:
1574 elif force_inline:
1574
1575
1575 def get_stream():
1576 def get_stream():
1576 with self.reading():
1577 with self.reading():
1577 yield None
1578 yield None
1578
1579
1579 for rev in range(n):
1580 for rev in range(n):
1580 idx = self.index.entry_binary(rev)
1581 idx = self.index.entry_binary(rev)
1581 if rev == 0 and self._docket is None:
1582 if rev == 0 and self._docket is None:
1582 # re-inject the inline flag
1583 # re-inject the inline flag
1583 header = self._format_flags
1584 header = self._format_flags
1584 header |= self._format_version
1585 header |= self._format_version
1585 header |= FLAG_INLINE_DATA
1586 header |= FLAG_INLINE_DATA
1586 header = self.index.pack_header(header)
1587 header = self.index.pack_header(header)
1587 idx = header + idx
1588 idx = header + idx
1588 yield idx
1589 yield idx
1589 yield self._inner.get_segment_for_revs(rev, rev)[1]
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1590
1591
1591 inline_stream = get_stream()
1592 inline_stream = get_stream()
1592 next(inline_stream)
1593 next(inline_stream)
1593 return [
1594 return [
1594 (self._indexfile, inline_stream, index_size + data_size),
1595 (self._indexfile, inline_stream, index_size + data_size),
1595 ]
1596 ]
1596 else:
1597 else:
1597
1598
1598 def get_index_stream():
1599 def get_index_stream():
1599 with self.opener(self._indexfile, mode=b"r") as fp:
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1600 yield None
1601 yield None
1601 if index_size <= 65536:
1602 if index_size <= 65536:
1602 yield fp.read(index_size)
1603 yield fp.read(index_size)
1603 else:
1604 else:
1604 yield from util.filechunkiter(fp, limit=index_size)
1605 yield from util.filechunkiter(fp, limit=index_size)
1605
1606
1606 def get_data_stream():
1607 def get_data_stream():
1607 with self._datafp() as fp:
1608 with self._datafp() as fp:
1608 yield None
1609 yield None
1609 if data_size <= 65536:
1610 if data_size <= 65536:
1610 yield fp.read(data_size)
1611 yield fp.read(data_size)
1611 else:
1612 else:
1612 yield from util.filechunkiter(fp, limit=data_size)
1613 yield from util.filechunkiter(fp, limit=data_size)
1613
1614
1614 index_stream = get_index_stream()
1615 index_stream = get_index_stream()
1615 next(index_stream)
1616 next(index_stream)
1616 data_stream = get_data_stream()
1617 data_stream = get_data_stream()
1617 next(data_stream)
1618 next(data_stream)
1618 return [
1619 return [
1619 (self._datafile, data_stream, data_size),
1620 (self._datafile, data_stream, data_size),
1620 (self._indexfile, index_stream, index_size),
1621 (self._indexfile, index_stream, index_size),
1621 ]
1622 ]
1622
1623
1623 def _loadindex(self, docket=None):
1624 def _loadindex(self, docket=None):
1624 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1625
1626
1626 if self.postfix is not None:
1627 if self.postfix is not None:
1627 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1628 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1629 entry_point = b'%s.i.a' % self.radix
1630 entry_point = b'%s.i.a' % self.radix
1630 elif self._try_split and self.opener.exists(self._split_index_file):
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1631 entry_point = self._split_index_file
1632 entry_point = self._split_index_file
1632 else:
1633 else:
1633 entry_point = b'%s.i' % self.radix
1634 entry_point = b'%s.i' % self.radix
1634
1635
1635 if docket is not None:
1636 if docket is not None:
1636 self._docket = docket
1637 self._docket = docket
1637 self._docket_file = entry_point
1638 self._docket_file = entry_point
1638 else:
1639 else:
1639 self._initempty = True
1640 self._initempty = True
1640 entry_data = self._get_data(entry_point, mmapindexthreshold)
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1641 if len(entry_data) > 0:
1642 if len(entry_data) > 0:
1642 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1643 self._initempty = False
1644 self._initempty = False
1644 else:
1645 else:
1645 header = new_header
1646 header = new_header
1646
1647
1647 self._format_flags = header & ~0xFFFF
1648 self._format_flags = header & ~0xFFFF
1648 self._format_version = header & 0xFFFF
1649 self._format_version = header & 0xFFFF
1649
1650
1650 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1651 if supported_flags is None:
1652 if supported_flags is None:
1652 msg = _(b'unknown version (%d) in revlog %s')
1653 msg = _(b'unknown version (%d) in revlog %s')
1653 msg %= (self._format_version, self.display_id)
1654 msg %= (self._format_version, self.display_id)
1654 raise error.RevlogError(msg)
1655 raise error.RevlogError(msg)
1655 elif self._format_flags & ~supported_flags:
1656 elif self._format_flags & ~supported_flags:
1656 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1657 display_flag = self._format_flags >> 16
1658 display_flag = self._format_flags >> 16
1658 msg %= (display_flag, self._format_version, self.display_id)
1659 msg %= (display_flag, self._format_version, self.display_id)
1659 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1660
1661
1661 features = FEATURES_BY_VERSION[self._format_version]
1662 features = FEATURES_BY_VERSION[self._format_version]
1662 self._inline = features[b'inline'](self._format_flags)
1663 self._inline = features[b'inline'](self._format_flags)
1663 self.delta_config.general_delta = features[b'generaldelta'](
1664 self.delta_config.general_delta = features[b'generaldelta'](
1664 self._format_flags
1665 self._format_flags
1665 )
1666 )
1666 self.feature_config.has_side_data = features[b'sidedata']
1667 self.feature_config.has_side_data = features[b'sidedata']
1667
1668
1668 if not features[b'docket']:
1669 if not features[b'docket']:
1669 self._indexfile = entry_point
1670 self._indexfile = entry_point
1670 index_data = entry_data
1671 index_data = entry_data
1671 else:
1672 else:
1672 self._docket_file = entry_point
1673 self._docket_file = entry_point
1673 if self._initempty:
1674 if self._initempty:
1674 self._docket = docketutil.default_docket(self, header)
1675 self._docket = docketutil.default_docket(self, header)
1675 else:
1676 else:
1676 self._docket = docketutil.parse_docket(
1677 self._docket = docketutil.parse_docket(
1677 self, entry_data, use_pending=self._trypending
1678 self, entry_data, use_pending=self._trypending
1678 )
1679 )
1679
1680
1680 if self._docket is not None:
1681 if self._docket is not None:
1681 self._indexfile = self._docket.index_filepath()
1682 self._indexfile = self._docket.index_filepath()
1682 index_data = b''
1683 index_data = b''
1683 index_size = self._docket.index_end
1684 index_size = self._docket.index_end
1684 if index_size > 0:
1685 if index_size > 0:
1685 index_data = self._get_data(
1686 index_data = self._get_data(
1686 self._indexfile, mmapindexthreshold, size=index_size
1687 self._indexfile, mmapindexthreshold, size=index_size
1687 )
1688 )
1688 if len(index_data) < index_size:
1689 if len(index_data) < index_size:
1689 msg = _(b'too few index data for %s: got %d, expected %d')
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1690 msg %= (self.display_id, len(index_data), index_size)
1691 msg %= (self.display_id, len(index_data), index_size)
1691 raise error.RevlogError(msg)
1692 raise error.RevlogError(msg)
1692
1693
1693 self._inline = False
1694 self._inline = False
1694 # generaldelta implied by version 2 revlogs.
1695 # generaldelta implied by version 2 revlogs.
1695 self.delta_config.general_delta = True
1696 self.delta_config.general_delta = True
1696 # the logic for persistent nodemap will be dealt with within the
1697 # the logic for persistent nodemap will be dealt with within the
1697 # main docket, so disable it for now.
1698 # main docket, so disable it for now.
1698 self._nodemap_file = None
1699 self._nodemap_file = None
1699
1700
1700 if self._docket is not None:
1701 if self._docket is not None:
1701 self._datafile = self._docket.data_filepath()
1702 self._datafile = self._docket.data_filepath()
1702 self._sidedatafile = self._docket.sidedata_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1703 elif self.postfix is None:
1704 elif self.postfix is None:
1704 self._datafile = b'%s.d' % self.radix
1705 self._datafile = b'%s.d' % self.radix
1705 else:
1706 else:
1706 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1707
1708
1708 self.nodeconstants = sha1nodeconstants
1709 self.nodeconstants = sha1nodeconstants
1709 self.nullid = self.nodeconstants.nullid
1710 self.nullid = self.nodeconstants.nullid
1710
1711
1711 # sparse-revlog can't be on without general-delta (issue6056)
1712 # sparse-revlog can't be on without general-delta (issue6056)
1712 if not self.delta_config.general_delta:
1713 if not self.delta_config.general_delta:
1713 self.delta_config.sparse_revlog = False
1714 self.delta_config.sparse_revlog = False
1714
1715
1715 self._storedeltachains = True
1716 self._storedeltachains = True
1716
1717
1717 devel_nodemap = (
1718 devel_nodemap = (
1718 self._nodemap_file
1719 self._nodemap_file
1719 and force_nodemap
1720 and force_nodemap
1720 and parse_index_v1_nodemap is not None
1721 and parse_index_v1_nodemap is not None
1721 )
1722 )
1722
1723
1723 use_rust_index = False
1724 use_rust_index = False
1724 if rustrevlog is not None and self._nodemap_file is not None:
1725 if rustrevlog is not None and self._nodemap_file is not None:
1725 # we would like to use the rust_index in all case, especially
1726 # we would like to use the rust_index in all case, especially
1726 # because it is necessary for AncestorsIterator and LazyAncestors
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1727 # since the 6.7 cycle.
1728 # since the 6.7 cycle.
1728 #
1729 #
1729 # However, the performance impact of inconditionnaly building the
1730 # However, the performance impact of inconditionnaly building the
1730 # nodemap is currently a problem for non-persistent nodemap
1731 # nodemap is currently a problem for non-persistent nodemap
1731 # repository.
1732 # repository.
1732 use_rust_index = True
1733 use_rust_index = True
1733
1734
1734 self._parse_index = parse_index_v1
1735 self._parse_index = parse_index_v1
1735 if self._format_version == REVLOGV0:
1736 if self._format_version == REVLOGV0:
1736 self._parse_index = revlogv0.parse_index_v0
1737 self._parse_index = revlogv0.parse_index_v0
1737 elif self._format_version == REVLOGV2:
1738 elif self._format_version == REVLOGV2:
1738 self._parse_index = parse_index_v2
1739 self._parse_index = parse_index_v2
1739 elif self._format_version == CHANGELOGV2:
1740 elif self._format_version == CHANGELOGV2:
1740 self._parse_index = parse_index_cl_v2
1741 self._parse_index = parse_index_cl_v2
1741 elif devel_nodemap:
1742 elif devel_nodemap:
1742 self._parse_index = parse_index_v1_nodemap
1743 self._parse_index = parse_index_v1_nodemap
1743 elif use_rust_index:
1744 elif use_rust_index:
1744 self._parse_index = functools.partial(
1745 self._parse_index = functools.partial(
1745 parse_index_v1_rust, default_header=new_header
1746 parse_index_v1_rust, default_header=new_header
1746 )
1747 )
1747 try:
1748 try:
1748 d = self._parse_index(index_data, self._inline)
1749 d = self._parse_index(index_data, self._inline)
1749 index, chunkcache = d
1750 index, chunkcache = d
1750 use_nodemap = (
1751 use_nodemap = (
1751 not self._inline
1752 not self._inline
1752 and self._nodemap_file is not None
1753 and self._nodemap_file is not None
1753 and hasattr(index, 'update_nodemap_data')
1754 and hasattr(index, 'update_nodemap_data')
1754 )
1755 )
1755 if use_nodemap:
1756 if use_nodemap:
1756 nodemap_data = nodemaputil.persisted_data(self)
1757 nodemap_data = nodemaputil.persisted_data(self)
1757 if nodemap_data is not None:
1758 if nodemap_data is not None:
1758 docket = nodemap_data[0]
1759 docket = nodemap_data[0]
1759 if (
1760 if (
1760 len(d[0]) > docket.tip_rev
1761 len(d[0]) > docket.tip_rev
1761 and d[0][docket.tip_rev][7] == docket.tip_node
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1762 ):
1763 ):
1763 # no changelog tampering
1764 # no changelog tampering
1764 self._nodemap_docket = docket
1765 self._nodemap_docket = docket
1765 index.update_nodemap_data(*nodemap_data)
1766 index.update_nodemap_data(*nodemap_data)
1766 except (ValueError, IndexError):
1767 except (ValueError, IndexError):
1767 raise error.RevlogError(
1768 raise error.RevlogError(
1768 _(b"index %s is corrupted") % self.display_id
1769 _(b"index %s is corrupted") % self.display_id
1769 )
1770 )
1770 self.index = index
1771 self.index = index
1771 # revnum -> (chain-length, sum-delta-length)
1772 # revnum -> (chain-length, sum-delta-length)
1772 self._chaininfocache = util.lrucachedict(500)
1773 self._chaininfocache = util.lrucachedict(500)
1773
1774
1774 return chunkcache
1775 return chunkcache
1775
1776
1776 def _load_inner(self, chunk_cache):
1777 def _load_inner(self, chunk_cache):
1777 if self._docket is None:
1778 if self._docket is None:
1778 default_compression_header = None
1779 default_compression_header = None
1779 else:
1780 else:
1780 default_compression_header = self._docket.default_compression_header
1781 default_compression_header = self._docket.default_compression_header
1781
1782
1782 self._inner = _InnerRevlog(
1783 self._inner = _InnerRevlog(
1783 opener=self.opener,
1784 opener=self.opener,
1784 index=self.index,
1785 index=self.index,
1785 index_file=self._indexfile,
1786 index_file=self._indexfile,
1786 data_file=self._datafile,
1787 data_file=self._datafile,
1787 sidedata_file=self._sidedatafile,
1788 sidedata_file=self._sidedatafile,
1788 inline=self._inline,
1789 inline=self._inline,
1789 data_config=self.data_config,
1790 data_config=self.data_config,
1790 delta_config=self.delta_config,
1791 delta_config=self.delta_config,
1791 feature_config=self.feature_config,
1792 feature_config=self.feature_config,
1792 chunk_cache=chunk_cache,
1793 chunk_cache=chunk_cache,
1793 default_compression_header=default_compression_header,
1794 default_compression_header=default_compression_header,
1794 )
1795 )
1795
1796
1796 def get_revlog(self):
1797 def get_revlog(self):
1797 """simple function to mirror API of other not-really-revlog API"""
1798 """simple function to mirror API of other not-really-revlog API"""
1798 return self
1799 return self
1799
1800
1800 @util.propertycache
1801 @util.propertycache
1801 def revlog_kind(self):
1802 def revlog_kind(self):
1802 return self.target[0]
1803 return self.target[0]
1803
1804
1804 @util.propertycache
1805 @util.propertycache
1805 def display_id(self):
1806 def display_id(self):
1806 """The public facing "ID" of the revlog that we use in message"""
1807 """The public facing "ID" of the revlog that we use in message"""
1807 if self.revlog_kind == KIND_FILELOG:
1808 if self.revlog_kind == KIND_FILELOG:
1808 # Reference the file without the "data/" prefix, so it is familiar
1809 # Reference the file without the "data/" prefix, so it is familiar
1809 # to the user.
1810 # to the user.
1810 return self.target[1]
1811 return self.target[1]
1811 else:
1812 else:
1812 return self.radix
1813 return self.radix
1813
1814
1814 def _datafp(self, mode=b'r'):
1815 def _datafp(self, mode=b'r'):
1815 """file object for the revlog's data file"""
1816 """file object for the revlog's data file"""
1816 return self.opener(self._datafile, mode=mode)
1817 return self.opener(self._datafile, mode=mode)
1817
1818
1818 def tiprev(self):
1819 def tiprev(self):
1819 return len(self.index) - 1
1820 return len(self.index) - 1
1820
1821
1821 def tip(self):
1822 def tip(self):
1822 return self.node(self.tiprev())
1823 return self.node(self.tiprev())
1823
1824
1824 def __contains__(self, rev):
1825 def __contains__(self, rev):
1825 return 0 <= rev < len(self)
1826 return 0 <= rev < len(self)
1826
1827
1827 def __len__(self):
1828 def __len__(self):
1828 return len(self.index)
1829 return len(self.index)
1829
1830
1830 def __iter__(self) -> Iterator[int]:
1831 def __iter__(self) -> Iterator[int]:
1831 return iter(range(len(self)))
1832 return iter(range(len(self)))
1832
1833
1833 def revs(self, start=0, stop=None):
1834 def revs(self, start=0, stop=None):
1834 """iterate over all rev in this revlog (from start to stop)"""
1835 """iterate over all rev in this revlog (from start to stop)"""
1835 return storageutil.iterrevs(len(self), start=start, stop=stop)
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1836
1837
1837 def hasnode(self, node):
1838 def hasnode(self, node):
1838 try:
1839 try:
1839 self.rev(node)
1840 self.rev(node)
1840 return True
1841 return True
1841 except KeyError:
1842 except KeyError:
1842 return False
1843 return False
1843
1844
1844 def _candelta(self, baserev, rev):
1845 def _candelta(self, baserev, rev):
1845 """whether two revisions (baserev, rev) can be delta-ed or not"""
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1846 # Disable delta if either rev requires a content-changing flag
1847 # Disable delta if either rev requires a content-changing flag
1847 # processor (ex. LFS). This is because such flag processor can alter
1848 # processor (ex. LFS). This is because such flag processor can alter
1848 # the rawtext content that the delta will be based on, and two clients
1849 # the rawtext content that the delta will be based on, and two clients
1849 # could have a same revlog node with different flags (i.e. different
1850 # could have a same revlog node with different flags (i.e. different
1850 # rawtext contents) and the delta could be incompatible.
1851 # rawtext contents) and the delta could be incompatible.
1851 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1852 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1853 ):
1854 ):
1854 return False
1855 return False
1855 return True
1856 return True
1856
1857
1857 def update_caches(self, transaction):
1858 def update_caches(self, transaction):
1858 """update on disk cache
1859 """update on disk cache
1859
1860
1860 If a transaction is passed, the update may be delayed to transaction
1861 If a transaction is passed, the update may be delayed to transaction
1861 commit."""
1862 commit."""
1862 if self._nodemap_file is not None:
1863 if self._nodemap_file is not None:
1863 if transaction is None:
1864 if transaction is None:
1864 nodemaputil.update_persistent_nodemap(self)
1865 nodemaputil.update_persistent_nodemap(self)
1865 else:
1866 else:
1866 nodemaputil.setup_persistent_nodemap(transaction, self)
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1867
1868
1868 def clearcaches(self):
1869 def clearcaches(self):
1869 """Clear in-memory caches"""
1870 """Clear in-memory caches"""
1870 self._chainbasecache.clear()
1871 self._chainbasecache.clear()
1871 self._inner.clear_cache()
1872 self._inner.clear_cache()
1872 self._pcache = {}
1873 self._pcache = {}
1873 self._nodemap_docket = None
1874 self._nodemap_docket = None
1874 self.index.clearcaches()
1875 self.index.clearcaches()
1875 # The python code is the one responsible for validating the docket, we
1876 # The python code is the one responsible for validating the docket, we
1876 # end up having to refresh it here.
1877 # end up having to refresh it here.
1877 use_nodemap = (
1878 use_nodemap = (
1878 not self._inline
1879 not self._inline
1879 and self._nodemap_file is not None
1880 and self._nodemap_file is not None
1880 and hasattr(self.index, 'update_nodemap_data')
1881 and hasattr(self.index, 'update_nodemap_data')
1881 )
1882 )
1882 if use_nodemap:
1883 if use_nodemap:
1883 nodemap_data = nodemaputil.persisted_data(self)
1884 nodemap_data = nodemaputil.persisted_data(self)
1884 if nodemap_data is not None:
1885 if nodemap_data is not None:
1885 self._nodemap_docket = nodemap_data[0]
1886 self._nodemap_docket = nodemap_data[0]
1886 self.index.update_nodemap_data(*nodemap_data)
1887 self.index.update_nodemap_data(*nodemap_data)
1887
1888
1888 def rev(self, node):
1889 def rev(self, node):
1889 """return the revision number associated with a <nodeid>"""
1890 """return the revision number associated with a <nodeid>"""
1890 try:
1891 try:
1891 return self.index.rev(node)
1892 return self.index.rev(node)
1892 except TypeError:
1893 except TypeError:
1893 raise
1894 raise
1894 except error.RevlogError:
1895 except error.RevlogError:
1895 # parsers.c radix tree lookup failed
1896 # parsers.c radix tree lookup failed
1896 if (
1897 if (
1897 node == self.nodeconstants.wdirid
1898 node == self.nodeconstants.wdirid
1898 or node in self.nodeconstants.wdirfilenodeids
1899 or node in self.nodeconstants.wdirfilenodeids
1899 ):
1900 ):
1900 raise error.WdirUnsupported
1901 raise error.WdirUnsupported
1901 raise error.LookupError(node, self.display_id, _(b'no node'))
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1902
1903
1903 # Accessors for index entries.
1904 # Accessors for index entries.
1904
1905
1905 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1906 # are flags.
1907 # are flags.
1907 def start(self, rev):
1908 def start(self, rev):
1908 return int(self.index[rev][0] >> 16)
1909 return int(self.index[rev][0] >> 16)
1909
1910
1910 def sidedata_cut_off(self, rev):
1911 def sidedata_cut_off(self, rev):
1911 sd_cut_off = self.index[rev][8]
1912 sd_cut_off = self.index[rev][8]
1912 if sd_cut_off != 0:
1913 if sd_cut_off != 0:
1913 return sd_cut_off
1914 return sd_cut_off
1914 # This is some annoying dance, because entries without sidedata
1915 # This is some annoying dance, because entries without sidedata
1915 # currently use 0 as their ofsset. (instead of previous-offset +
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1916 # previous-size)
1917 # previous-size)
1917 #
1918 #
1918 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1919 # In the meantime, we need this.
1920 # In the meantime, we need this.
1920 while 0 <= rev:
1921 while 0 <= rev:
1921 e = self.index[rev]
1922 e = self.index[rev]
1922 if e[9] != 0:
1923 if e[9] != 0:
1923 return e[8] + e[9]
1924 return e[8] + e[9]
1924 rev -= 1
1925 rev -= 1
1925 return 0
1926 return 0
1926
1927
1927 def flags(self, rev):
1928 def flags(self, rev):
1928 return self.index[rev][0] & 0xFFFF
1929 return self.index[rev][0] & 0xFFFF
1929
1930
1930 def length(self, rev):
1931 def length(self, rev):
1931 return self.index[rev][1]
1932 return self.index[rev][1]
1932
1933
1933 def sidedata_length(self, rev):
1934 def sidedata_length(self, rev):
1934 if not self.feature_config.has_side_data:
1935 if not self.feature_config.has_side_data:
1935 return 0
1936 return 0
1936 return self.index[rev][9]
1937 return self.index[rev][9]
1937
1938
1938 def rawsize(self, rev):
1939 def rawsize(self, rev):
1939 """return the length of the uncompressed text for a given revision"""
1940 """return the length of the uncompressed text for a given revision"""
1940 l = self.index[rev][2]
1941 l = self.index[rev][2]
1941 if l >= 0:
1942 if l >= 0:
1942 return l
1943 return l
1943
1944
1944 t = self.rawdata(rev)
1945 t = self.rawdata(rev)
1945 return len(t)
1946 return len(t)
1946
1947
1947 def size(self, rev):
1948 def size(self, rev):
1948 """length of non-raw text (processed by a "read" flag processor)"""
1949 """length of non-raw text (processed by a "read" flag processor)"""
1949 # fast path: if no "read" flag processor could change the content,
1950 # fast path: if no "read" flag processor could change the content,
1950 # size is rawsize. note: ELLIPSIS is known to not change the content.
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1951 flags = self.flags(rev)
1952 flags = self.flags(rev)
1952 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1953 return self.rawsize(rev)
1954 return self.rawsize(rev)
1954
1955
1955 return len(self.revision(rev))
1956 return len(self.revision(rev))
1956
1957
1957 def fast_rank(self, rev):
1958 def fast_rank(self, rev):
1958 """Return the rank of a revision if already known, or None otherwise.
1959 """Return the rank of a revision if already known, or None otherwise.
1959
1960
1960 The rank of a revision is the size of the sub-graph it defines as a
1961 The rank of a revision is the size of the sub-graph it defines as a
1961 head. Equivalently, the rank of a revision `r` is the size of the set
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1962 `ancestors(r)`, `r` included.
1963 `ancestors(r)`, `r` included.
1963
1964
1964 This method returns the rank retrieved from the revlog in constant
1965 This method returns the rank retrieved from the revlog in constant
1965 time. It makes no attempt at computing unknown values for versions of
1966 time. It makes no attempt at computing unknown values for versions of
1966 the revlog which do not persist the rank.
1967 the revlog which do not persist the rank.
1967 """
1968 """
1968 rank = self.index[rev][ENTRY_RANK]
1969 rank = self.index[rev][ENTRY_RANK]
1969 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1970 return None
1971 return None
1971 if rev == nullrev:
1972 if rev == nullrev:
1972 return 0 # convention
1973 return 0 # convention
1973 return rank
1974 return rank
1974
1975
1975 def chainbase(self, rev):
1976 def chainbase(self, rev):
1976 base = self._chainbasecache.get(rev)
1977 base = self._chainbasecache.get(rev)
1977 if base is not None:
1978 if base is not None:
1978 return base
1979 return base
1979
1980
1980 index = self.index
1981 index = self.index
1981 iterrev = rev
1982 iterrev = rev
1982 base = index[iterrev][3]
1983 base = index[iterrev][3]
1983 while base != iterrev:
1984 while base != iterrev:
1984 iterrev = base
1985 iterrev = base
1985 base = index[iterrev][3]
1986 base = index[iterrev][3]
1986
1987
1987 self._chainbasecache[rev] = base
1988 self._chainbasecache[rev] = base
1988 return base
1989 return base
1989
1990
1990 def linkrev(self, rev):
1991 def linkrev(self, rev):
1991 return self.index[rev][4]
1992 return self.index[rev][4]
1992
1993
1993 def parentrevs(self, rev):
1994 def parentrevs(self, rev):
1994 try:
1995 try:
1995 entry = self.index[rev]
1996 entry = self.index[rev]
1996 except IndexError:
1997 except IndexError:
1997 if rev == wdirrev:
1998 if rev == wdirrev:
1998 raise error.WdirUnsupported
1999 raise error.WdirUnsupported
1999 raise
2000 raise
2000
2001
2001 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2002 return entry[6], entry[5]
2003 return entry[6], entry[5]
2003 else:
2004 else:
2004 return entry[5], entry[6]
2005 return entry[5], entry[6]
2005
2006
2006 # fast parentrevs(rev) where rev isn't filtered
2007 # fast parentrevs(rev) where rev isn't filtered
2007 _uncheckedparentrevs = parentrevs
2008 _uncheckedparentrevs = parentrevs
2008
2009
2009 def node(self, rev):
2010 def node(self, rev):
2010 try:
2011 try:
2011 return self.index[rev][7]
2012 return self.index[rev][7]
2012 except IndexError:
2013 except IndexError:
2013 if rev == wdirrev:
2014 if rev == wdirrev:
2014 raise error.WdirUnsupported
2015 raise error.WdirUnsupported
2015 raise
2016 raise
2016
2017
2017 # Derived from index values.
2018 # Derived from index values.
2018
2019
2019 def end(self, rev):
2020 def end(self, rev):
2020 return self.start(rev) + self.length(rev)
2021 return self.start(rev) + self.length(rev)
2021
2022
2022 def parents(self, node):
2023 def parents(self, node):
2023 i = self.index
2024 i = self.index
2024 d = i[self.rev(node)]
2025 d = i[self.rev(node)]
2025 # inline node() to avoid function call overhead
2026 # inline node() to avoid function call overhead
2026 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2027 return i[d[6]][7], i[d[5]][7]
2028 return i[d[6]][7], i[d[5]][7]
2028 else:
2029 else:
2029 return i[d[5]][7], i[d[6]][7]
2030 return i[d[5]][7], i[d[6]][7]
2030
2031
2031 def chainlen(self, rev):
2032 def chainlen(self, rev):
2032 return self._chaininfo(rev)[0]
2033 return self._chaininfo(rev)[0]
2033
2034
2034 def _chaininfo(self, rev):
2035 def _chaininfo(self, rev):
2035 chaininfocache = self._chaininfocache
2036 chaininfocache = self._chaininfocache
2036 if rev in chaininfocache:
2037 if rev in chaininfocache:
2037 return chaininfocache[rev]
2038 return chaininfocache[rev]
2038 index = self.index
2039 index = self.index
2039 generaldelta = self.delta_config.general_delta
2040 generaldelta = self.delta_config.general_delta
2040 iterrev = rev
2041 iterrev = rev
2041 e = index[iterrev]
2042 e = index[iterrev]
2042 clen = 0
2043 clen = 0
2043 compresseddeltalen = 0
2044 compresseddeltalen = 0
2044 while iterrev != e[3]:
2045 while iterrev != e[3]:
2045 clen += 1
2046 clen += 1
2046 compresseddeltalen += e[1]
2047 compresseddeltalen += e[1]
2047 if generaldelta:
2048 if generaldelta:
2048 iterrev = e[3]
2049 iterrev = e[3]
2049 else:
2050 else:
2050 iterrev -= 1
2051 iterrev -= 1
2051 if iterrev in chaininfocache:
2052 if iterrev in chaininfocache:
2052 t = chaininfocache[iterrev]
2053 t = chaininfocache[iterrev]
2053 clen += t[0]
2054 clen += t[0]
2054 compresseddeltalen += t[1]
2055 compresseddeltalen += t[1]
2055 break
2056 break
2056 e = index[iterrev]
2057 e = index[iterrev]
2057 else:
2058 else:
2058 # Add text length of base since decompressing that also takes
2059 # Add text length of base since decompressing that also takes
2059 # work. For cache hits the length is already included.
2060 # work. For cache hits the length is already included.
2060 compresseddeltalen += e[1]
2061 compresseddeltalen += e[1]
2061 r = (clen, compresseddeltalen)
2062 r = (clen, compresseddeltalen)
2062 chaininfocache[rev] = r
2063 chaininfocache[rev] = r
2063 return r
2064 return r
2064
2065
2065 def _deltachain(self, rev, stoprev=None):
2066 def _deltachain(self, rev, stoprev=None):
2066 return self._inner._deltachain(rev, stoprev=stoprev)
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2067
2068
2068 def ancestors(self, revs, stoprev=0, inclusive=False):
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2069 """Generate the ancestors of 'revs' in reverse revision order.
2070 """Generate the ancestors of 'revs' in reverse revision order.
2070 Does not generate revs lower than stoprev.
2071 Does not generate revs lower than stoprev.
2071
2072
2072 See the documentation for ancestor.lazyancestors for more details."""
2073 See the documentation for ancestor.lazyancestors for more details."""
2073
2074
2074 # first, make sure start revisions aren't filtered
2075 # first, make sure start revisions aren't filtered
2075 revs = list(revs)
2076 revs = list(revs)
2076 checkrev = self.node
2077 checkrev = self.node
2077 for r in revs:
2078 for r in revs:
2078 checkrev(r)
2079 checkrev(r)
2079 # and we're sure ancestors aren't filtered as well
2080 # and we're sure ancestors aren't filtered as well
2080
2081
2081 if rustancestor is not None and self.index.rust_ext_compat:
2082 if rustancestor is not None and self.index.rust_ext_compat:
2082 lazyancestors = rustancestor.LazyAncestors
2083 lazyancestors = rustancestor.LazyAncestors
2083 arg = self.index
2084 arg = self.index
2084 else:
2085 else:
2085 lazyancestors = ancestor.lazyancestors
2086 lazyancestors = ancestor.lazyancestors
2086 arg = self._uncheckedparentrevs
2087 arg = self._uncheckedparentrevs
2087 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2088
2089
2089 def descendants(self, revs):
2090 def descendants(self, revs):
2090 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2091
2092
2092 def findcommonmissing(self, common=None, heads=None):
2093 def findcommonmissing(self, common=None, heads=None):
2093 """Return a tuple of the ancestors of common and the ancestors of heads
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2094 that are not ancestors of common. In revset terminology, we return the
2095 that are not ancestors of common. In revset terminology, we return the
2095 tuple:
2096 tuple:
2096
2097
2097 ::common, (::heads) - (::common)
2098 ::common, (::heads) - (::common)
2098
2099
2099 The list is sorted by revision number, meaning it is
2100 The list is sorted by revision number, meaning it is
2100 topologically sorted.
2101 topologically sorted.
2101
2102
2102 'heads' and 'common' are both lists of node IDs. If heads is
2103 'heads' and 'common' are both lists of node IDs. If heads is
2103 not supplied, uses all of the revlog's heads. If common is not
2104 not supplied, uses all of the revlog's heads. If common is not
2104 supplied, uses nullid."""
2105 supplied, uses nullid."""
2105 if common is None:
2106 if common is None:
2106 common = [self.nullid]
2107 common = [self.nullid]
2107 if heads is None:
2108 if heads is None:
2108 heads = self.heads()
2109 heads = self.heads()
2109
2110
2110 common = [self.rev(n) for n in common]
2111 common = [self.rev(n) for n in common]
2111 heads = [self.rev(n) for n in heads]
2112 heads = [self.rev(n) for n in heads]
2112
2113
2113 # we want the ancestors, but inclusive
2114 # we want the ancestors, but inclusive
2114 class lazyset:
2115 class lazyset:
2115 def __init__(self, lazyvalues):
2116 def __init__(self, lazyvalues):
2116 self.addedvalues = set()
2117 self.addedvalues = set()
2117 self.lazyvalues = lazyvalues
2118 self.lazyvalues = lazyvalues
2118
2119
2119 def __contains__(self, value):
2120 def __contains__(self, value):
2120 return value in self.addedvalues or value in self.lazyvalues
2121 return value in self.addedvalues or value in self.lazyvalues
2121
2122
2122 def __iter__(self):
2123 def __iter__(self):
2123 added = self.addedvalues
2124 added = self.addedvalues
2124 for r in added:
2125 for r in added:
2125 yield r
2126 yield r
2126 for r in self.lazyvalues:
2127 for r in self.lazyvalues:
2127 if not r in added:
2128 if not r in added:
2128 yield r
2129 yield r
2129
2130
2130 def add(self, value):
2131 def add(self, value):
2131 self.addedvalues.add(value)
2132 self.addedvalues.add(value)
2132
2133
2133 def update(self, values):
2134 def update(self, values):
2134 self.addedvalues.update(values)
2135 self.addedvalues.update(values)
2135
2136
2136 has = lazyset(self.ancestors(common))
2137 has = lazyset(self.ancestors(common))
2137 has.add(nullrev)
2138 has.add(nullrev)
2138 has.update(common)
2139 has.update(common)
2139
2140
2140 # take all ancestors from heads that aren't in has
2141 # take all ancestors from heads that aren't in has
2141 missing = set()
2142 missing = set()
2142 visit = collections.deque(r for r in heads if r not in has)
2143 visit = collections.deque(r for r in heads if r not in has)
2143 while visit:
2144 while visit:
2144 r = visit.popleft()
2145 r = visit.popleft()
2145 if r in missing:
2146 if r in missing:
2146 continue
2147 continue
2147 else:
2148 else:
2148 missing.add(r)
2149 missing.add(r)
2149 for p in self.parentrevs(r):
2150 for p in self.parentrevs(r):
2150 if p not in has:
2151 if p not in has:
2151 visit.append(p)
2152 visit.append(p)
2152 missing = list(missing)
2153 missing = list(missing)
2153 missing.sort()
2154 missing.sort()
2154 return has, [self.node(miss) for miss in missing]
2155 return has, [self.node(miss) for miss in missing]
2155
2156
2156 def incrementalmissingrevs(self, common=None):
2157 def incrementalmissingrevs(self, common=None):
2157 """Return an object that can be used to incrementally compute the
2158 """Return an object that can be used to incrementally compute the
2158 revision numbers of the ancestors of arbitrary sets that are not
2159 revision numbers of the ancestors of arbitrary sets that are not
2159 ancestors of common. This is an ancestor.incrementalmissingancestors
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2160 object.
2161 object.
2161
2162
2162 'common' is a list of revision numbers. If common is not supplied, uses
2163 'common' is a list of revision numbers. If common is not supplied, uses
2163 nullrev.
2164 nullrev.
2164 """
2165 """
2165 if common is None:
2166 if common is None:
2166 common = [nullrev]
2167 common = [nullrev]
2167
2168
2168 if rustancestor is not None and self.index.rust_ext_compat:
2169 if rustancestor is not None and self.index.rust_ext_compat:
2169 return rustancestor.MissingAncestors(self.index, common)
2170 return rustancestor.MissingAncestors(self.index, common)
2170 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2171
2172
2172 def findmissingrevs(self, common=None, heads=None):
2173 def findmissingrevs(self, common=None, heads=None):
2173 """Return the revision numbers of the ancestors of heads that
2174 """Return the revision numbers of the ancestors of heads that
2174 are not ancestors of common.
2175 are not ancestors of common.
2175
2176
2176 More specifically, return a list of revision numbers corresponding to
2177 More specifically, return a list of revision numbers corresponding to
2177 nodes N such that every N satisfies the following constraints:
2178 nodes N such that every N satisfies the following constraints:
2178
2179
2179 1. N is an ancestor of some node in 'heads'
2180 1. N is an ancestor of some node in 'heads'
2180 2. N is not an ancestor of any node in 'common'
2181 2. N is not an ancestor of any node in 'common'
2181
2182
2182 The list is sorted by revision number, meaning it is
2183 The list is sorted by revision number, meaning it is
2183 topologically sorted.
2184 topologically sorted.
2184
2185
2185 'heads' and 'common' are both lists of revision numbers. If heads is
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2186 not supplied, uses all of the revlog's heads. If common is not
2187 not supplied, uses all of the revlog's heads. If common is not
2187 supplied, uses nullid."""
2188 supplied, uses nullid."""
2188 if common is None:
2189 if common is None:
2189 common = [nullrev]
2190 common = [nullrev]
2190 if heads is None:
2191 if heads is None:
2191 heads = self.headrevs()
2192 heads = self.headrevs()
2192
2193
2193 inc = self.incrementalmissingrevs(common=common)
2194 inc = self.incrementalmissingrevs(common=common)
2194 return inc.missingancestors(heads)
2195 return inc.missingancestors(heads)
2195
2196
2196 def findmissing(self, common=None, heads=None):
2197 def findmissing(self, common=None, heads=None):
2197 """Return the ancestors of heads that are not ancestors of common.
2198 """Return the ancestors of heads that are not ancestors of common.
2198
2199
2199 More specifically, return a list of nodes N such that every N
2200 More specifically, return a list of nodes N such that every N
2200 satisfies the following constraints:
2201 satisfies the following constraints:
2201
2202
2202 1. N is an ancestor of some node in 'heads'
2203 1. N is an ancestor of some node in 'heads'
2203 2. N is not an ancestor of any node in 'common'
2204 2. N is not an ancestor of any node in 'common'
2204
2205
2205 The list is sorted by revision number, meaning it is
2206 The list is sorted by revision number, meaning it is
2206 topologically sorted.
2207 topologically sorted.
2207
2208
2208 'heads' and 'common' are both lists of node IDs. If heads is
2209 'heads' and 'common' are both lists of node IDs. If heads is
2209 not supplied, uses all of the revlog's heads. If common is not
2210 not supplied, uses all of the revlog's heads. If common is not
2210 supplied, uses nullid."""
2211 supplied, uses nullid."""
2211 if common is None:
2212 if common is None:
2212 common = [self.nullid]
2213 common = [self.nullid]
2213 if heads is None:
2214 if heads is None:
2214 heads = self.heads()
2215 heads = self.heads()
2215
2216
2216 common = [self.rev(n) for n in common]
2217 common = [self.rev(n) for n in common]
2217 heads = [self.rev(n) for n in heads]
2218 heads = [self.rev(n) for n in heads]
2218
2219
2219 inc = self.incrementalmissingrevs(common=common)
2220 inc = self.incrementalmissingrevs(common=common)
2220 return [self.node(r) for r in inc.missingancestors(heads)]
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2221
2222
2222 def nodesbetween(self, roots=None, heads=None):
2223 def nodesbetween(self, roots=None, heads=None):
2223 """Return a topological path from 'roots' to 'heads'.
2224 """Return a topological path from 'roots' to 'heads'.
2224
2225
2225 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2226 topologically sorted list of all nodes N that satisfy both of
2227 topologically sorted list of all nodes N that satisfy both of
2227 these constraints:
2228 these constraints:
2228
2229
2229 1. N is a descendant of some node in 'roots'
2230 1. N is a descendant of some node in 'roots'
2230 2. N is an ancestor of some node in 'heads'
2231 2. N is an ancestor of some node in 'heads'
2231
2232
2232 Every node is considered to be both a descendant and an ancestor
2233 Every node is considered to be both a descendant and an ancestor
2233 of itself, so every reachable node in 'roots' and 'heads' will be
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2234 included in 'nodes'.
2235 included in 'nodes'.
2235
2236
2236 'outroots' is the list of reachable nodes in 'roots', i.e., the
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2237 subset of 'roots' that is returned in 'nodes'. Likewise,
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2238 'outheads' is the subset of 'heads' that is also in 'nodes'.
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2239
2240
2240 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2241 unspecified, uses nullid as the only root. If 'heads' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2242 unspecified, uses list of all of the revlog's heads."""
2243 unspecified, uses list of all of the revlog's heads."""
2243 nonodes = ([], [], [])
2244 nonodes = ([], [], [])
2244 if roots is not None:
2245 if roots is not None:
2245 roots = list(roots)
2246 roots = list(roots)
2246 if not roots:
2247 if not roots:
2247 return nonodes
2248 return nonodes
2248 lowestrev = min([self.rev(n) for n in roots])
2249 lowestrev = min([self.rev(n) for n in roots])
2249 else:
2250 else:
2250 roots = [self.nullid] # Everybody's a descendant of nullid
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2251 lowestrev = nullrev
2252 lowestrev = nullrev
2252 if (lowestrev == nullrev) and (heads is None):
2253 if (lowestrev == nullrev) and (heads is None):
2253 # We want _all_ the nodes!
2254 # We want _all_ the nodes!
2254 return (
2255 return (
2255 [self.node(r) for r in self],
2256 [self.node(r) for r in self],
2256 [self.nullid],
2257 [self.nullid],
2257 list(self.heads()),
2258 list(self.heads()),
2258 )
2259 )
2259 if heads is None:
2260 if heads is None:
2260 # All nodes are ancestors, so the latest ancestor is the last
2261 # All nodes are ancestors, so the latest ancestor is the last
2261 # node.
2262 # node.
2262 highestrev = len(self) - 1
2263 highestrev = len(self) - 1
2263 # Set ancestors to None to signal that every node is an ancestor.
2264 # Set ancestors to None to signal that every node is an ancestor.
2264 ancestors = None
2265 ancestors = None
2265 # Set heads to an empty dictionary for later discovery of heads
2266 # Set heads to an empty dictionary for later discovery of heads
2266 heads = {}
2267 heads = {}
2267 else:
2268 else:
2268 heads = list(heads)
2269 heads = list(heads)
2269 if not heads:
2270 if not heads:
2270 return nonodes
2271 return nonodes
2271 ancestors = set()
2272 ancestors = set()
2272 # Turn heads into a dictionary so we can remove 'fake' heads.
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2273 # Also, later we will be using it to filter out the heads we can't
2274 # Also, later we will be using it to filter out the heads we can't
2274 # find from roots.
2275 # find from roots.
2275 heads = dict.fromkeys(heads, False)
2276 heads = dict.fromkeys(heads, False)
2276 # Start at the top and keep marking parents until we're done.
2277 # Start at the top and keep marking parents until we're done.
2277 nodestotag = set(heads)
2278 nodestotag = set(heads)
2278 # Remember where the top was so we can use it as a limit later.
2279 # Remember where the top was so we can use it as a limit later.
2279 highestrev = max([self.rev(n) for n in nodestotag])
2280 highestrev = max([self.rev(n) for n in nodestotag])
2280 while nodestotag:
2281 while nodestotag:
2281 # grab a node to tag
2282 # grab a node to tag
2282 n = nodestotag.pop()
2283 n = nodestotag.pop()
2283 # Never tag nullid
2284 # Never tag nullid
2284 if n == self.nullid:
2285 if n == self.nullid:
2285 continue
2286 continue
2286 # A node's revision number represents its place in a
2287 # A node's revision number represents its place in a
2287 # topologically sorted list of nodes.
2288 # topologically sorted list of nodes.
2288 r = self.rev(n)
2289 r = self.rev(n)
2289 if r >= lowestrev:
2290 if r >= lowestrev:
2290 if n not in ancestors:
2291 if n not in ancestors:
2291 # If we are possibly a descendant of one of the roots
2292 # If we are possibly a descendant of one of the roots
2292 # and we haven't already been marked as an ancestor
2293 # and we haven't already been marked as an ancestor
2293 ancestors.add(n) # Mark as ancestor
2294 ancestors.add(n) # Mark as ancestor
2294 # Add non-nullid parents to list of nodes to tag.
2295 # Add non-nullid parents to list of nodes to tag.
2295 nodestotag.update(
2296 nodestotag.update(
2296 [p for p in self.parents(n) if p != self.nullid]
2297 [p for p in self.parents(n) if p != self.nullid]
2297 )
2298 )
2298 elif n in heads: # We've seen it before, is it a fake head?
2299 elif n in heads: # We've seen it before, is it a fake head?
2299 # So it is, real heads should not be the ancestors of
2300 # So it is, real heads should not be the ancestors of
2300 # any other heads.
2301 # any other heads.
2301 heads.pop(n)
2302 heads.pop(n)
2302 if not ancestors:
2303 if not ancestors:
2303 return nonodes
2304 return nonodes
2304 # Now that we have our set of ancestors, we want to remove any
2305 # Now that we have our set of ancestors, we want to remove any
2305 # roots that are not ancestors.
2306 # roots that are not ancestors.
2306
2307
2307 # If one of the roots was nullid, everything is included anyway.
2308 # If one of the roots was nullid, everything is included anyway.
2308 if lowestrev > nullrev:
2309 if lowestrev > nullrev:
2309 # But, since we weren't, let's recompute the lowest rev to not
2310 # But, since we weren't, let's recompute the lowest rev to not
2310 # include roots that aren't ancestors.
2311 # include roots that aren't ancestors.
2311
2312
2312 # Filter out roots that aren't ancestors of heads
2313 # Filter out roots that aren't ancestors of heads
2313 roots = [root for root in roots if root in ancestors]
2314 roots = [root for root in roots if root in ancestors]
2314 # Recompute the lowest revision
2315 # Recompute the lowest revision
2315 if roots:
2316 if roots:
2316 lowestrev = min([self.rev(root) for root in roots])
2317 lowestrev = min([self.rev(root) for root in roots])
2317 else:
2318 else:
2318 # No more roots? Return empty list
2319 # No more roots? Return empty list
2319 return nonodes
2320 return nonodes
2320 else:
2321 else:
2321 # We are descending from nullid, and don't need to care about
2322 # We are descending from nullid, and don't need to care about
2322 # any other roots.
2323 # any other roots.
2323 lowestrev = nullrev
2324 lowestrev = nullrev
2324 roots = [self.nullid]
2325 roots = [self.nullid]
2325 # Transform our roots list into a set.
2326 # Transform our roots list into a set.
2326 descendants = set(roots)
2327 descendants = set(roots)
2327 # Also, keep the original roots so we can filter out roots that aren't
2328 # Also, keep the original roots so we can filter out roots that aren't
2328 # 'real' roots (i.e. are descended from other roots).
2329 # 'real' roots (i.e. are descended from other roots).
2329 roots = descendants.copy()
2330 roots = descendants.copy()
2330 # Our topologically sorted list of output nodes.
2331 # Our topologically sorted list of output nodes.
2331 orderedout = []
2332 orderedout = []
2332 # Don't start at nullid since we don't want nullid in our output list,
2333 # Don't start at nullid since we don't want nullid in our output list,
2333 # and if nullid shows up in descendants, empty parents will look like
2334 # and if nullid shows up in descendants, empty parents will look like
2334 # they're descendants.
2335 # they're descendants.
2335 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2336 n = self.node(r)
2337 n = self.node(r)
2337 isdescendant = False
2338 isdescendant = False
2338 if lowestrev == nullrev: # Everybody is a descendant of nullid
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2339 isdescendant = True
2340 isdescendant = True
2340 elif n in descendants:
2341 elif n in descendants:
2341 # n is already a descendant
2342 # n is already a descendant
2342 isdescendant = True
2343 isdescendant = True
2343 # This check only needs to be done here because all the roots
2344 # This check only needs to be done here because all the roots
2344 # will start being marked is descendants before the loop.
2345 # will start being marked is descendants before the loop.
2345 if n in roots:
2346 if n in roots:
2346 # If n was a root, check if it's a 'real' root.
2347 # If n was a root, check if it's a 'real' root.
2347 p = tuple(self.parents(n))
2348 p = tuple(self.parents(n))
2348 # If any of its parents are descendants, it's not a root.
2349 # If any of its parents are descendants, it's not a root.
2349 if (p[0] in descendants) or (p[1] in descendants):
2350 if (p[0] in descendants) or (p[1] in descendants):
2350 roots.remove(n)
2351 roots.remove(n)
2351 else:
2352 else:
2352 p = tuple(self.parents(n))
2353 p = tuple(self.parents(n))
2353 # A node is a descendant if either of its parents are
2354 # A node is a descendant if either of its parents are
2354 # descendants. (We seeded the dependents list with the roots
2355 # descendants. (We seeded the dependents list with the roots
2355 # up there, remember?)
2356 # up there, remember?)
2356 if (p[0] in descendants) or (p[1] in descendants):
2357 if (p[0] in descendants) or (p[1] in descendants):
2357 descendants.add(n)
2358 descendants.add(n)
2358 isdescendant = True
2359 isdescendant = True
2359 if isdescendant and ((ancestors is None) or (n in ancestors)):
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2360 # Only include nodes that are both descendants and ancestors.
2361 # Only include nodes that are both descendants and ancestors.
2361 orderedout.append(n)
2362 orderedout.append(n)
2362 if (ancestors is not None) and (n in heads):
2363 if (ancestors is not None) and (n in heads):
2363 # We're trying to figure out which heads are reachable
2364 # We're trying to figure out which heads are reachable
2364 # from roots.
2365 # from roots.
2365 # Mark this head as having been reached
2366 # Mark this head as having been reached
2366 heads[n] = True
2367 heads[n] = True
2367 elif ancestors is None:
2368 elif ancestors is None:
2368 # Otherwise, we're trying to discover the heads.
2369 # Otherwise, we're trying to discover the heads.
2369 # Assume this is a head because if it isn't, the next step
2370 # Assume this is a head because if it isn't, the next step
2370 # will eventually remove it.
2371 # will eventually remove it.
2371 heads[n] = True
2372 heads[n] = True
2372 # But, obviously its parents aren't.
2373 # But, obviously its parents aren't.
2373 for p in self.parents(n):
2374 for p in self.parents(n):
2374 heads.pop(p, None)
2375 heads.pop(p, None)
2375 heads = [head for head, flag in heads.items() if flag]
2376 heads = [head for head, flag in heads.items() if flag]
2376 roots = list(roots)
2377 roots = list(roots)
2377 assert orderedout
2378 assert orderedout
2378 assert roots
2379 assert roots
2379 assert heads
2380 assert heads
2380 return (orderedout, roots, heads)
2381 return (orderedout, roots, heads)
2381
2382
2382 def headrevs(self, revs=None):
2383 def headrevs(self, revs=None):
2383 if revs is None:
2384 if revs is None:
2384 try:
2385 try:
2385 return self.index.headrevs()
2386 return self.index.headrevs()
2386 except AttributeError:
2387 except AttributeError:
2387 return self._headrevs()
2388 return self._headrevs()
2388 if rustdagop is not None and self.index.rust_ext_compat:
2389 if rustdagop is not None and self.index.rust_ext_compat:
2389 return rustdagop.headrevs(self.index, revs)
2390 return rustdagop.headrevs(self.index, revs)
2390 return dagop.headrevs(revs, self._uncheckedparentrevs)
2391 return dagop.headrevs(revs, self._uncheckedparentrevs)
2391
2392
2392 def headrevsdiff(self, start, stop):
2393 def headrevsdiff(self, start, stop):
2393 try:
2394 try:
2394 return self.index.headrevsdiff(start, stop)
2395 return self.index.headrevsdiff(start, stop)
2395 except AttributeError:
2396 except AttributeError:
2396 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2397 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2397
2398
2398 def computephases(self, roots):
2399 def computephases(self, roots):
2399 return self.index.computephasesmapsets(roots)
2400 return self.index.computephasesmapsets(roots)
2400
2401
2401 def _headrevs(self):
2402 def _headrevs(self):
2402 count = len(self)
2403 count = len(self)
2403 if not count:
2404 if not count:
2404 return [nullrev]
2405 return [nullrev]
2405 # we won't iter over filtered rev so nobody is a head at start
2406 # we won't iter over filtered rev so nobody is a head at start
2406 ishead = [0] * (count + 1)
2407 ishead = [0] * (count + 1)
2407 index = self.index
2408 index = self.index
2408 for r in self:
2409 for r in self:
2409 ishead[r] = 1 # I may be an head
2410 ishead[r] = 1 # I may be an head
2410 e = index[r]
2411 e = index[r]
2411 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2412 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2412 return [r for r, val in enumerate(ishead) if val]
2413 return [r for r, val in enumerate(ishead) if val]
2413
2414
2414 def _head_node_ids(self):
2415 def _head_node_ids(self):
2415 try:
2416 try:
2416 return self.index.head_node_ids()
2417 return self.index.head_node_ids()
2417 except AttributeError:
2418 except AttributeError:
2418 return [self.node(r) for r in self.headrevs()]
2419 return [self.node(r) for r in self.headrevs()]
2419
2420
2420 def heads(self, start=None, stop=None):
2421 def heads(self, start=None, stop=None):
2421 """return the list of all nodes that have no children
2422 """return the list of all nodes that have no children
2422
2423
2423 if start is specified, only heads that are descendants of
2424 if start is specified, only heads that are descendants of
2424 start will be returned
2425 start will be returned
2425 if stop is specified, it will consider all the revs from stop
2426 if stop is specified, it will consider all the revs from stop
2426 as if they had no children
2427 as if they had no children
2427 """
2428 """
2428 if start is None and stop is None:
2429 if start is None and stop is None:
2429 if not len(self):
2430 if not len(self):
2430 return [self.nullid]
2431 return [self.nullid]
2431 return self._head_node_ids()
2432 return self._head_node_ids()
2432 if start is None:
2433 if start is None:
2433 start = nullrev
2434 start = nullrev
2434 else:
2435 else:
2435 start = self.rev(start)
2436 start = self.rev(start)
2436
2437
2437 stoprevs = {self.rev(n) for n in stop or []}
2438 stoprevs = {self.rev(n) for n in stop or []}
2438
2439
2439 revs = dagop.headrevssubset(
2440 revs = dagop.headrevssubset(
2440 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2441 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2441 )
2442 )
2442
2443
2443 return [self.node(rev) for rev in revs]
2444 return [self.node(rev) for rev in revs]
2444
2445
2445 def diffheads(self, start, stop):
2446 def diffheads(self, start, stop):
2446 """return the nodes that make up the difference between
2447 """return the nodes that make up the difference between
2447 heads of revs before `start` and heads of revs before `stop`"""
2448 heads of revs before `start` and heads of revs before `stop`"""
2448 removed, added = self.headrevsdiff(start, stop)
2449 removed, added = self.headrevsdiff(start, stop)
2449 return [self.node(r) for r in removed], [self.node(r) for r in added]
2450 return [self.node(r) for r in removed], [self.node(r) for r in added]
2450
2451
2451 def children(self, node):
2452 def children(self, node):
2452 """find the children of a given node"""
2453 """find the children of a given node"""
2453 c = []
2454 c = []
2454 p = self.rev(node)
2455 p = self.rev(node)
2455 for r in self.revs(start=p + 1):
2456 for r in self.revs(start=p + 1):
2456 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2457 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2457 if prevs:
2458 if prevs:
2458 for pr in prevs:
2459 for pr in prevs:
2459 if pr == p:
2460 if pr == p:
2460 c.append(self.node(r))
2461 c.append(self.node(r))
2461 elif p == nullrev:
2462 elif p == nullrev:
2462 c.append(self.node(r))
2463 c.append(self.node(r))
2463 return c
2464 return c
2464
2465
2465 def commonancestorsheads(self, a, b):
2466 def commonancestorsheads(self, a, b):
2466 """calculate all the heads of the common ancestors of nodes a and b"""
2467 """calculate all the heads of the common ancestors of nodes a and b"""
2467 a, b = self.rev(a), self.rev(b)
2468 a, b = self.rev(a), self.rev(b)
2468 ancs = self._commonancestorsheads(a, b)
2469 ancs = self._commonancestorsheads(a, b)
2469 return pycompat.maplist(self.node, ancs)
2470 return pycompat.maplist(self.node, ancs)
2470
2471
2471 def _commonancestorsheads(self, *revs):
2472 def _commonancestorsheads(self, *revs):
2472 """calculate all the heads of the common ancestors of revs"""
2473 """calculate all the heads of the common ancestors of revs"""
2473 try:
2474 try:
2474 ancs = self.index.commonancestorsheads(*revs)
2475 ancs = self.index.commonancestorsheads(*revs)
2475 except (AttributeError, OverflowError): # C implementation failed
2476 except (AttributeError, OverflowError): # C implementation failed
2476 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2477 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2477 return ancs
2478 return ancs
2478
2479
2479 def isancestor(self, a, b):
2480 def isancestor(self, a, b):
2480 """return True if node a is an ancestor of node b
2481 """return True if node a is an ancestor of node b
2481
2482
2482 A revision is considered an ancestor of itself."""
2483 A revision is considered an ancestor of itself."""
2483 a, b = self.rev(a), self.rev(b)
2484 a, b = self.rev(a), self.rev(b)
2484 return self.isancestorrev(a, b)
2485 return self.isancestorrev(a, b)
2485
2486
2486 def isancestorrev(self, a, b):
2487 def isancestorrev(self, a, b):
2487 """return True if revision a is an ancestor of revision b
2488 """return True if revision a is an ancestor of revision b
2488
2489
2489 A revision is considered an ancestor of itself.
2490 A revision is considered an ancestor of itself.
2490
2491
2491 The implementation of this is trivial but the use of
2492 The implementation of this is trivial but the use of
2492 reachableroots is not."""
2493 reachableroots is not."""
2493 if a == nullrev:
2494 if a == nullrev:
2494 return True
2495 return True
2495 elif a == b:
2496 elif a == b:
2496 return True
2497 return True
2497 elif a > b:
2498 elif a > b:
2498 return False
2499 return False
2499 return bool(self.reachableroots(a, [b], [a], includepath=False))
2500 return bool(self.reachableroots(a, [b], [a], includepath=False))
2500
2501
2501 def reachableroots(self, minroot, heads, roots, includepath=False):
2502 def reachableroots(self, minroot, heads, roots, includepath=False):
2502 """return (heads(::(<roots> and <roots>::<heads>)))
2503 """return (heads(::(<roots> and <roots>::<heads>)))
2503
2504
2504 If includepath is True, return (<roots>::<heads>)."""
2505 If includepath is True, return (<roots>::<heads>)."""
2505 try:
2506 try:
2506 return self.index.reachableroots2(
2507 return self.index.reachableroots2(
2507 minroot, heads, roots, includepath
2508 minroot, heads, roots, includepath
2508 )
2509 )
2509 except AttributeError:
2510 except AttributeError:
2510 return dagop._reachablerootspure(
2511 return dagop._reachablerootspure(
2511 self.parentrevs, minroot, roots, heads, includepath
2512 self.parentrevs, minroot, roots, heads, includepath
2512 )
2513 )
2513
2514
2514 def ancestor(self, a, b):
2515 def ancestor(self, a, b):
2515 """calculate the "best" common ancestor of nodes a and b"""
2516 """calculate the "best" common ancestor of nodes a and b"""
2516
2517
2517 a, b = self.rev(a), self.rev(b)
2518 a, b = self.rev(a), self.rev(b)
2518 try:
2519 try:
2519 ancs = self.index.ancestors(a, b)
2520 ancs = self.index.ancestors(a, b)
2520 except (AttributeError, OverflowError):
2521 except (AttributeError, OverflowError):
2521 ancs = ancestor.ancestors(self.parentrevs, a, b)
2522 ancs = ancestor.ancestors(self.parentrevs, a, b)
2522 if ancs:
2523 if ancs:
2523 # choose a consistent winner when there's a tie
2524 # choose a consistent winner when there's a tie
2524 return min(map(self.node, ancs))
2525 return min(map(self.node, ancs))
2525 return self.nullid
2526 return self.nullid
2526
2527
2527 def _match(self, id):
2528 def _match(self, id):
2528 if isinstance(id, int):
2529 if isinstance(id, int):
2529 # rev
2530 # rev
2530 return self.node(id)
2531 return self.node(id)
2531 if len(id) == self.nodeconstants.nodelen:
2532 if len(id) == self.nodeconstants.nodelen:
2532 # possibly a binary node
2533 # possibly a binary node
2533 # odds of a binary node being all hex in ASCII are 1 in 10**25
2534 # odds of a binary node being all hex in ASCII are 1 in 10**25
2534 try:
2535 try:
2535 node = id
2536 node = id
2536 self.rev(node) # quick search the index
2537 self.rev(node) # quick search the index
2537 return node
2538 return node
2538 except error.LookupError:
2539 except error.LookupError:
2539 pass # may be partial hex id
2540 pass # may be partial hex id
2540 try:
2541 try:
2541 # str(rev)
2542 # str(rev)
2542 rev = int(id)
2543 rev = int(id)
2543 if b"%d" % rev != id:
2544 if b"%d" % rev != id:
2544 raise ValueError
2545 raise ValueError
2545 if rev < 0:
2546 if rev < 0:
2546 rev = len(self) + rev
2547 rev = len(self) + rev
2547 if rev < 0 or rev >= len(self):
2548 if rev < 0 or rev >= len(self):
2548 raise ValueError
2549 raise ValueError
2549 return self.node(rev)
2550 return self.node(rev)
2550 except (ValueError, OverflowError):
2551 except (ValueError, OverflowError):
2551 pass
2552 pass
2552 if len(id) == 2 * self.nodeconstants.nodelen:
2553 if len(id) == 2 * self.nodeconstants.nodelen:
2553 try:
2554 try:
2554 # a full hex nodeid?
2555 # a full hex nodeid?
2555 node = bin(id)
2556 node = bin(id)
2556 self.rev(node)
2557 self.rev(node)
2557 return node
2558 return node
2558 except (binascii.Error, error.LookupError):
2559 except (binascii.Error, error.LookupError):
2559 pass
2560 pass
2560
2561
2561 def _partialmatch(self, id):
2562 def _partialmatch(self, id):
2562 # we don't care wdirfilenodeids as they should be always full hash
2563 # we don't care wdirfilenodeids as they should be always full hash
2563 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2564 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2564 ambiguous = False
2565 ambiguous = False
2565 try:
2566 try:
2566 partial = self.index.partialmatch(id)
2567 partial = self.index.partialmatch(id)
2567 if partial and self.hasnode(partial):
2568 if partial and self.hasnode(partial):
2568 if maybewdir:
2569 if maybewdir:
2569 # single 'ff...' match in radix tree, ambiguous with wdir
2570 # single 'ff...' match in radix tree, ambiguous with wdir
2570 ambiguous = True
2571 ambiguous = True
2571 else:
2572 else:
2572 return partial
2573 return partial
2573 elif maybewdir:
2574 elif maybewdir:
2574 # no 'ff...' match in radix tree, wdir identified
2575 # no 'ff...' match in radix tree, wdir identified
2575 raise error.WdirUnsupported
2576 raise error.WdirUnsupported
2576 else:
2577 else:
2577 return None
2578 return None
2578 except error.RevlogError:
2579 except error.RevlogError:
2579 # parsers.c radix tree lookup gave multiple matches
2580 # parsers.c radix tree lookup gave multiple matches
2580 # fast path: for unfiltered changelog, radix tree is accurate
2581 # fast path: for unfiltered changelog, radix tree is accurate
2581 if not getattr(self, 'filteredrevs', None):
2582 if not getattr(self, 'filteredrevs', None):
2582 ambiguous = True
2583 ambiguous = True
2583 # fall through to slow path that filters hidden revisions
2584 # fall through to slow path that filters hidden revisions
2584 except (AttributeError, ValueError):
2585 except (AttributeError, ValueError):
2585 # we are pure python, or key is not hex
2586 # we are pure python, or key is not hex
2586 pass
2587 pass
2587 if ambiguous:
2588 if ambiguous:
2588 raise error.AmbiguousPrefixLookupError(
2589 raise error.AmbiguousPrefixLookupError(
2589 id, self.display_id, _(b'ambiguous identifier')
2590 id, self.display_id, _(b'ambiguous identifier')
2590 )
2591 )
2591
2592
2592 if id in self._pcache:
2593 if id in self._pcache:
2593 return self._pcache[id]
2594 return self._pcache[id]
2594
2595
2595 if len(id) <= 40:
2596 if len(id) <= 40:
2596 # hex(node)[:...]
2597 # hex(node)[:...]
2597 l = len(id) // 2 * 2 # grab an even number of digits
2598 l = len(id) // 2 * 2 # grab an even number of digits
2598 try:
2599 try:
2599 # we're dropping the last digit, so let's check that it's hex,
2600 # we're dropping the last digit, so let's check that it's hex,
2600 # to avoid the expensive computation below if it's not
2601 # to avoid the expensive computation below if it's not
2601 if len(id) % 2 > 0:
2602 if len(id) % 2 > 0:
2602 if not (id[-1] in hexdigits):
2603 if not (id[-1] in hexdigits):
2603 return None
2604 return None
2604 prefix = bin(id[:l])
2605 prefix = bin(id[:l])
2605 except binascii.Error:
2606 except binascii.Error:
2606 pass
2607 pass
2607 else:
2608 else:
2608 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2609 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2609 nl = [
2610 nl = [
2610 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2611 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2611 ]
2612 ]
2612 if self.nodeconstants.nullhex.startswith(id):
2613 if self.nodeconstants.nullhex.startswith(id):
2613 nl.append(self.nullid)
2614 nl.append(self.nullid)
2614 if len(nl) > 0:
2615 if len(nl) > 0:
2615 if len(nl) == 1 and not maybewdir:
2616 if len(nl) == 1 and not maybewdir:
2616 self._pcache[id] = nl[0]
2617 self._pcache[id] = nl[0]
2617 return nl[0]
2618 return nl[0]
2618 raise error.AmbiguousPrefixLookupError(
2619 raise error.AmbiguousPrefixLookupError(
2619 id, self.display_id, _(b'ambiguous identifier')
2620 id, self.display_id, _(b'ambiguous identifier')
2620 )
2621 )
2621 if maybewdir:
2622 if maybewdir:
2622 raise error.WdirUnsupported
2623 raise error.WdirUnsupported
2623 return None
2624 return None
2624
2625
2625 def lookup(self, id):
2626 def lookup(self, id):
2626 """locate a node based on:
2627 """locate a node based on:
2627 - revision number or str(revision number)
2628 - revision number or str(revision number)
2628 - nodeid or subset of hex nodeid
2629 - nodeid or subset of hex nodeid
2629 """
2630 """
2630 n = self._match(id)
2631 n = self._match(id)
2631 if n is not None:
2632 if n is not None:
2632 return n
2633 return n
2633 n = self._partialmatch(id)
2634 n = self._partialmatch(id)
2634 if n:
2635 if n:
2635 return n
2636 return n
2636
2637
2637 raise error.LookupError(id, self.display_id, _(b'no match found'))
2638 raise error.LookupError(id, self.display_id, _(b'no match found'))
2638
2639
2639 def shortest(self, node, minlength=1):
2640 def shortest(self, node, minlength=1):
2640 """Find the shortest unambiguous prefix that matches node."""
2641 """Find the shortest unambiguous prefix that matches node."""
2641
2642
2642 def isvalid(prefix):
2643 def isvalid(prefix):
2643 try:
2644 try:
2644 matchednode = self._partialmatch(prefix)
2645 matchednode = self._partialmatch(prefix)
2645 except error.AmbiguousPrefixLookupError:
2646 except error.AmbiguousPrefixLookupError:
2646 return False
2647 return False
2647 except error.WdirUnsupported:
2648 except error.WdirUnsupported:
2648 # single 'ff...' match
2649 # single 'ff...' match
2649 return True
2650 return True
2650 if matchednode is None:
2651 if matchednode is None:
2651 raise error.LookupError(node, self.display_id, _(b'no node'))
2652 raise error.LookupError(node, self.display_id, _(b'no node'))
2652 return True
2653 return True
2653
2654
2654 def maybewdir(prefix):
2655 def maybewdir(prefix):
2655 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2656 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2656
2657
2657 hexnode = hex(node)
2658 hexnode = hex(node)
2658
2659
2659 def disambiguate(hexnode, minlength):
2660 def disambiguate(hexnode, minlength):
2660 """Disambiguate against wdirid."""
2661 """Disambiguate against wdirid."""
2661 for length in range(minlength, len(hexnode) + 1):
2662 for length in range(minlength, len(hexnode) + 1):
2662 prefix = hexnode[:length]
2663 prefix = hexnode[:length]
2663 if not maybewdir(prefix):
2664 if not maybewdir(prefix):
2664 return prefix
2665 return prefix
2665
2666
2666 if not getattr(self, 'filteredrevs', None):
2667 if not getattr(self, 'filteredrevs', None):
2667 try:
2668 try:
2668 length = max(self.index.shortest(node), minlength)
2669 length = max(self.index.shortest(node), minlength)
2669 return disambiguate(hexnode, length)
2670 return disambiguate(hexnode, length)
2670 except error.RevlogError:
2671 except error.RevlogError:
2671 if node != self.nodeconstants.wdirid:
2672 if node != self.nodeconstants.wdirid:
2672 raise error.LookupError(
2673 raise error.LookupError(
2673 node, self.display_id, _(b'no node')
2674 node, self.display_id, _(b'no node')
2674 )
2675 )
2675 except AttributeError:
2676 except AttributeError:
2676 # Fall through to pure code
2677 # Fall through to pure code
2677 pass
2678 pass
2678
2679
2679 if node == self.nodeconstants.wdirid:
2680 if node == self.nodeconstants.wdirid:
2680 for length in range(minlength, len(hexnode) + 1):
2681 for length in range(minlength, len(hexnode) + 1):
2681 prefix = hexnode[:length]
2682 prefix = hexnode[:length]
2682 if isvalid(prefix):
2683 if isvalid(prefix):
2683 return prefix
2684 return prefix
2684
2685
2685 for length in range(minlength, len(hexnode) + 1):
2686 for length in range(minlength, len(hexnode) + 1):
2686 prefix = hexnode[:length]
2687 prefix = hexnode[:length]
2687 if isvalid(prefix):
2688 if isvalid(prefix):
2688 return disambiguate(hexnode, length)
2689 return disambiguate(hexnode, length)
2689
2690
2690 def cmp(self, node, text):
2691 def cmp(self, node, text):
2691 """compare text with a given file revision
2692 """compare text with a given file revision
2692
2693
2693 returns True if text is different than what is stored.
2694 returns True if text is different than what is stored.
2694 """
2695 """
2695 p1, p2 = self.parents(node)
2696 p1, p2 = self.parents(node)
2696 return storageutil.hashrevisionsha1(text, p1, p2) != node
2697 return storageutil.hashrevisionsha1(text, p1, p2) != node
2697
2698
2698 def deltaparent(self, rev):
2699 def deltaparent(self, rev):
2699 """return deltaparent of the given revision"""
2700 """return deltaparent of the given revision"""
2700 base = self.index[rev][3]
2701 base = self.index[rev][3]
2701 if base == rev:
2702 if base == rev:
2702 return nullrev
2703 return nullrev
2703 elif self.delta_config.general_delta:
2704 elif self.delta_config.general_delta:
2704 return base
2705 return base
2705 else:
2706 else:
2706 return rev - 1
2707 return rev - 1
2707
2708
2708 def issnapshot(self, rev):
2709 def issnapshot(self, rev):
2709 """tells whether rev is a snapshot"""
2710 """tells whether rev is a snapshot"""
2710 ret = self._inner.issnapshot(rev)
2711 ret = self._inner.issnapshot(rev)
2711 self.issnapshot = self._inner.issnapshot
2712 self.issnapshot = self._inner.issnapshot
2712 return ret
2713 return ret
2713
2714
2714 def snapshotdepth(self, rev):
2715 def snapshotdepth(self, rev):
2715 """number of snapshot in the chain before this one"""
2716 """number of snapshot in the chain before this one"""
2716 if not self.issnapshot(rev):
2717 if not self.issnapshot(rev):
2717 raise error.ProgrammingError(b'revision %d not a snapshot')
2718 raise error.ProgrammingError(b'revision %d not a snapshot')
2718 return len(self._inner._deltachain(rev)[0]) - 1
2719 return len(self._inner._deltachain(rev)[0]) - 1
2719
2720
2720 def revdiff(self, rev1, rev2):
2721 def revdiff(self, rev1, rev2):
2721 """return or calculate a delta between two revisions
2722 """return or calculate a delta between two revisions
2722
2723
2723 The delta calculated is in binary form and is intended to be written to
2724 The delta calculated is in binary form and is intended to be written to
2724 revlog data directly. So this function needs raw revision data.
2725 revlog data directly. So this function needs raw revision data.
2725 """
2726 """
2726 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2727 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2727 return bytes(self._inner._chunk(rev2))
2728 return bytes(self._inner._chunk(rev2))
2728
2729
2729 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2730 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2730
2731
2731 def revision(self, nodeorrev):
2732 def revision(self, nodeorrev):
2732 """return an uncompressed revision of a given node or revision
2733 """return an uncompressed revision of a given node or revision
2733 number.
2734 number.
2734 """
2735 """
2735 return self._revisiondata(nodeorrev)
2736 return self._revisiondata(nodeorrev)
2736
2737
2737 def sidedata(self, nodeorrev):
2738 def sidedata(self, nodeorrev):
2738 """a map of extra data related to the changeset but not part of the hash
2739 """a map of extra data related to the changeset but not part of the hash
2739
2740
2740 This function currently return a dictionary. However, more advanced
2741 This function currently return a dictionary. However, more advanced
2741 mapping object will likely be used in the future for a more
2742 mapping object will likely be used in the future for a more
2742 efficient/lazy code.
2743 efficient/lazy code.
2743 """
2744 """
2744 # deal with <nodeorrev> argument type
2745 # deal with <nodeorrev> argument type
2745 if isinstance(nodeorrev, int):
2746 if isinstance(nodeorrev, int):
2746 rev = nodeorrev
2747 rev = nodeorrev
2747 else:
2748 else:
2748 rev = self.rev(nodeorrev)
2749 rev = self.rev(nodeorrev)
2749 return self._sidedata(rev)
2750 return self._sidedata(rev)
2750
2751
2751 def _rawtext(self, node, rev):
2752 def _rawtext(self, node, rev):
2752 """return the possibly unvalidated rawtext for a revision
2753 """return the possibly unvalidated rawtext for a revision
2753
2754
2754 returns (rev, rawtext, validated)
2755 returns (rev, rawtext, validated)
2755 """
2756 """
2756 # Check if we have the entry in cache
2757 # Check if we have the entry in cache
2757 # The cache entry looks like (node, rev, rawtext)
2758 # The cache entry looks like (node, rev, rawtext)
2758 if self._inner._revisioncache:
2759 if self._inner._revisioncache:
2759 if self._inner._revisioncache[0] == node:
2760 if self._inner._revisioncache[0] == node:
2760 return (rev, self._inner._revisioncache[2], True)
2761 return (rev, self._inner._revisioncache[2], True)
2761
2762
2762 if rev is None:
2763 if rev is None:
2763 rev = self.rev(node)
2764 rev = self.rev(node)
2764
2765
2765 text = self._inner.raw_text(node, rev)
2766 text = self._inner.raw_text(node, rev)
2766 return (rev, text, False)
2767 return (rev, text, False)
2767
2768
2768 def _revisiondata(self, nodeorrev, raw=False):
2769 def _revisiondata(self, nodeorrev, raw=False):
2769 # deal with <nodeorrev> argument type
2770 # deal with <nodeorrev> argument type
2770 if isinstance(nodeorrev, int):
2771 if isinstance(nodeorrev, int):
2771 rev = nodeorrev
2772 rev = nodeorrev
2772 node = self.node(rev)
2773 node = self.node(rev)
2773 else:
2774 else:
2774 node = nodeorrev
2775 node = nodeorrev
2775 rev = None
2776 rev = None
2776
2777
2777 # fast path the special `nullid` rev
2778 # fast path the special `nullid` rev
2778 if node == self.nullid:
2779 if node == self.nullid:
2779 return b""
2780 return b""
2780
2781
2781 # ``rawtext`` is the text as stored inside the revlog. Might be the
2782 # ``rawtext`` is the text as stored inside the revlog. Might be the
2782 # revision or might need to be processed to retrieve the revision.
2783 # revision or might need to be processed to retrieve the revision.
2783 rev, rawtext, validated = self._rawtext(node, rev)
2784 rev, rawtext, validated = self._rawtext(node, rev)
2784
2785
2785 if raw and validated:
2786 if raw and validated:
2786 # if we don't want to process the raw text and that raw
2787 # if we don't want to process the raw text and that raw
2787 # text is cached, we can exit early.
2788 # text is cached, we can exit early.
2788 return rawtext
2789 return rawtext
2789 if rev is None:
2790 if rev is None:
2790 rev = self.rev(node)
2791 rev = self.rev(node)
2791 # the revlog's flag for this revision
2792 # the revlog's flag for this revision
2792 # (usually alter its state or content)
2793 # (usually alter its state or content)
2793 flags = self.flags(rev)
2794 flags = self.flags(rev)
2794
2795
2795 if validated and flags == REVIDX_DEFAULT_FLAGS:
2796 if validated and flags == REVIDX_DEFAULT_FLAGS:
2796 # no extra flags set, no flag processor runs, text = rawtext
2797 # no extra flags set, no flag processor runs, text = rawtext
2797 return rawtext
2798 return rawtext
2798
2799
2799 if raw:
2800 if raw:
2800 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2801 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2801 text = rawtext
2802 text = rawtext
2802 else:
2803 else:
2803 r = flagutil.processflagsread(self, rawtext, flags)
2804 r = flagutil.processflagsread(self, rawtext, flags)
2804 text, validatehash = r
2805 text, validatehash = r
2805 if validatehash:
2806 if validatehash:
2806 self.checkhash(text, node, rev=rev)
2807 self.checkhash(text, node, rev=rev)
2807 if not validated:
2808 if not validated:
2808 self._inner._revisioncache = (node, rev, rawtext)
2809 self._inner._revisioncache = (node, rev, rawtext)
2809
2810
2810 return text
2811 return text
2811
2812
2812 def _sidedata(self, rev):
2813 def _sidedata(self, rev):
2813 """Return the sidedata for a given revision number."""
2814 """Return the sidedata for a given revision number."""
2814 if self._sidedatafile is None:
2815 if self._sidedatafile is None:
2815 return {}
2816 return {}
2816 sidedata_end = None
2817 sidedata_end = None
2817 if self._docket is not None:
2818 if self._docket is not None:
2818 sidedata_end = self._docket.sidedata_end
2819 sidedata_end = self._docket.sidedata_end
2819 return self._inner.sidedata(rev, sidedata_end)
2820 return self._inner.sidedata(rev, sidedata_end)
2820
2821
2821 def rawdata(self, nodeorrev):
2822 def rawdata(self, nodeorrev):
2822 """return an uncompressed raw data of a given node or revision number."""
2823 """return an uncompressed raw data of a given node or revision number."""
2823 return self._revisiondata(nodeorrev, raw=True)
2824 return self._revisiondata(nodeorrev, raw=True)
2824
2825
2825 def hash(self, text, p1, p2):
2826 def hash(self, text, p1, p2):
2826 """Compute a node hash.
2827 """Compute a node hash.
2827
2828
2828 Available as a function so that subclasses can replace the hash
2829 Available as a function so that subclasses can replace the hash
2829 as needed.
2830 as needed.
2830 """
2831 """
2831 return storageutil.hashrevisionsha1(text, p1, p2)
2832 return storageutil.hashrevisionsha1(text, p1, p2)
2832
2833
2833 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2834 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2834 """Check node hash integrity.
2835 """Check node hash integrity.
2835
2836
2836 Available as a function so that subclasses can extend hash mismatch
2837 Available as a function so that subclasses can extend hash mismatch
2837 behaviors as needed.
2838 behaviors as needed.
2838 """
2839 """
2839 try:
2840 try:
2840 if p1 is None and p2 is None:
2841 if p1 is None and p2 is None:
2841 p1, p2 = self.parents(node)
2842 p1, p2 = self.parents(node)
2842 if node != self.hash(text, p1, p2):
2843 if node != self.hash(text, p1, p2):
2843 # Clear the revision cache on hash failure. The revision cache
2844 # Clear the revision cache on hash failure. The revision cache
2844 # only stores the raw revision and clearing the cache does have
2845 # only stores the raw revision and clearing the cache does have
2845 # the side-effect that we won't have a cache hit when the raw
2846 # the side-effect that we won't have a cache hit when the raw
2846 # revision data is accessed. But this case should be rare and
2847 # revision data is accessed. But this case should be rare and
2847 # it is extra work to teach the cache about the hash
2848 # it is extra work to teach the cache about the hash
2848 # verification state.
2849 # verification state.
2849 if (
2850 if (
2850 self._inner._revisioncache
2851 self._inner._revisioncache
2851 and self._inner._revisioncache[0] == node
2852 and self._inner._revisioncache[0] == node
2852 ):
2853 ):
2853 self._inner._revisioncache = None
2854 self._inner._revisioncache = None
2854
2855
2855 revornode = rev
2856 revornode = rev
2856 if revornode is None:
2857 if revornode is None:
2857 revornode = templatefilters.short(hex(node))
2858 revornode = templatefilters.short(hex(node))
2858 raise error.RevlogError(
2859 raise error.RevlogError(
2859 _(b"integrity check failed on %s:%s")
2860 _(b"integrity check failed on %s:%s")
2860 % (self.display_id, pycompat.bytestr(revornode))
2861 % (self.display_id, pycompat.bytestr(revornode))
2861 )
2862 )
2862 except error.RevlogError:
2863 except error.RevlogError:
2863 if self.feature_config.censorable and storageutil.iscensoredtext(
2864 if self.feature_config.censorable and storageutil.iscensoredtext(
2864 text
2865 text
2865 ):
2866 ):
2866 raise error.CensoredNodeError(self.display_id, node, text)
2867 raise error.CensoredNodeError(self.display_id, node, text)
2867 raise
2868 raise
2868
2869
2869 @property
2870 @property
2870 def _split_index_file(self):
2871 def _split_index_file(self):
2871 """the path where to expect the index of an ongoing splitting operation
2872 """the path where to expect the index of an ongoing splitting operation
2872
2873
2873 The file will only exist if a splitting operation is in progress, but
2874 The file will only exist if a splitting operation is in progress, but
2874 it is always expected at the same location."""
2875 it is always expected at the same location."""
2875 parts = self.radix.split(b'/')
2876 parts = self.radix.split(b'/')
2876 if len(parts) > 1:
2877 if len(parts) > 1:
2877 # adds a '-s' prefix to the ``data/` or `meta/` base
2878 # adds a '-s' prefix to the ``data/` or `meta/` base
2878 head = parts[0] + b'-s'
2879 head = parts[0] + b'-s'
2879 mids = parts[1:-1]
2880 mids = parts[1:-1]
2880 tail = parts[-1] + b'.i'
2881 tail = parts[-1] + b'.i'
2881 pieces = [head] + mids + [tail]
2882 pieces = [head] + mids + [tail]
2882 return b'/'.join(pieces)
2883 return b'/'.join(pieces)
2883 else:
2884 else:
2884 # the revlog is stored at the root of the store (changelog or
2885 # the revlog is stored at the root of the store (changelog or
2885 # manifest), no risk of collision.
2886 # manifest), no risk of collision.
2886 return self.radix + b'.i.s'
2887 return self.radix + b'.i.s'
2887
2888
2888 def _enforceinlinesize(self, tr):
2889 def _enforceinlinesize(self, tr):
2889 """Check if the revlog is too big for inline and convert if so.
2890 """Check if the revlog is too big for inline and convert if so.
2890
2891
2891 This should be called after revisions are added to the revlog. If the
2892 This should be called after revisions are added to the revlog. If the
2892 revlog has grown too large to be an inline revlog, it will convert it
2893 revlog has grown too large to be an inline revlog, it will convert it
2893 to use multiple index and data files.
2894 to use multiple index and data files.
2894 """
2895 """
2895 tiprev = len(self) - 1
2896 tiprev = len(self) - 1
2896 total_size = self.start(tiprev) + self.length(tiprev)
2897 total_size = self.start(tiprev) + self.length(tiprev)
2897 if not self._inline or (self._may_inline and total_size < _maxinline):
2898 if not self._inline or (self._may_inline and total_size < _maxinline):
2898 return
2899 return
2899
2900
2900 if self._docket is not None:
2901 if self._docket is not None:
2901 msg = b"inline revlog should not have a docket"
2902 msg = b"inline revlog should not have a docket"
2902 raise error.ProgrammingError(msg)
2903 raise error.ProgrammingError(msg)
2903
2904
2904 # In the common case, we enforce inline size because the revlog has
2905 # In the common case, we enforce inline size because the revlog has
2905 # been appened too. And in such case, it must have an initial offset
2906 # been appened too. And in such case, it must have an initial offset
2906 # recorded in the transaction.
2907 # recorded in the transaction.
2907 troffset = tr.findoffset(self._inner.canonical_index_file)
2908 troffset = tr.findoffset(self._inner.canonical_index_file)
2908 pre_touched = troffset is not None
2909 pre_touched = troffset is not None
2909 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2910 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2910 raise error.RevlogError(
2911 raise error.RevlogError(
2911 _(b"%s not found in the transaction") % self._indexfile
2912 _(b"%s not found in the transaction") % self._indexfile
2912 )
2913 )
2913
2914
2914 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2915 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2915 tr.add(self._datafile, 0)
2916 tr.add(self._datafile, 0)
2916
2917
2917 new_index_file_path = None
2918 new_index_file_path = None
2918 old_index_file_path = self._indexfile
2919 old_index_file_path = self._indexfile
2919 new_index_file_path = self._split_index_file
2920 new_index_file_path = self._split_index_file
2920 opener = self.opener
2921 opener = self.opener
2921 weak_self = weakref.ref(self)
2922 weak_self = weakref.ref(self)
2922
2923
2923 # the "split" index replace the real index when the transaction is
2924 # the "split" index replace the real index when the transaction is
2924 # finalized
2925 # finalized
2925 def finalize_callback(tr):
2926 def finalize_callback(tr):
2926 opener.rename(
2927 opener.rename(
2927 new_index_file_path,
2928 new_index_file_path,
2928 old_index_file_path,
2929 old_index_file_path,
2929 checkambig=True,
2930 checkambig=True,
2930 )
2931 )
2931 maybe_self = weak_self()
2932 maybe_self = weak_self()
2932 if maybe_self is not None:
2933 if maybe_self is not None:
2933 maybe_self._indexfile = old_index_file_path
2934 maybe_self._indexfile = old_index_file_path
2934 maybe_self._inner.index_file = maybe_self._indexfile
2935 maybe_self._inner.index_file = maybe_self._indexfile
2935
2936
2936 def abort_callback(tr):
2937 def abort_callback(tr):
2937 maybe_self = weak_self()
2938 maybe_self = weak_self()
2938 if maybe_self is not None:
2939 if maybe_self is not None:
2939 maybe_self._indexfile = old_index_file_path
2940 maybe_self._indexfile = old_index_file_path
2940 maybe_self._inner.inline = True
2941 maybe_self._inner.inline = True
2941 maybe_self._inner.index_file = old_index_file_path
2942 maybe_self._inner.index_file = old_index_file_path
2942
2943
2943 tr.registertmp(new_index_file_path)
2944 tr.registertmp(new_index_file_path)
2944 # we use 001 here to make this this happens after the finalisation of
2945 # we use 001 here to make this this happens after the finalisation of
2945 # pending changelog write (using 000). Otherwise the two finalizer
2946 # pending changelog write (using 000). Otherwise the two finalizer
2946 # would step over each other and delete the changelog.i file.
2947 # would step over each other and delete the changelog.i file.
2947 if self.target[1] is not None:
2948 if self.target[1] is not None:
2948 callback_id = b'001-revlog-split-%d-%s' % self.target
2949 callback_id = b'001-revlog-split-%d-%s' % self.target
2949 else:
2950 else:
2950 callback_id = b'001-revlog-split-%d' % self.target[0]
2951 callback_id = b'001-revlog-split-%d' % self.target[0]
2951 tr.addfinalize(callback_id, finalize_callback)
2952 tr.addfinalize(callback_id, finalize_callback)
2952 tr.addabort(callback_id, abort_callback)
2953 tr.addabort(callback_id, abort_callback)
2953
2954
2954 self._format_flags &= ~FLAG_INLINE_DATA
2955 self._format_flags &= ~FLAG_INLINE_DATA
2955 self._inner.split_inline(
2956 self._inner.split_inline(
2956 tr,
2957 tr,
2957 self._format_flags | self._format_version,
2958 self._format_flags | self._format_version,
2958 new_index_file_path=new_index_file_path,
2959 new_index_file_path=new_index_file_path,
2959 )
2960 )
2960
2961
2961 self._inline = False
2962 self._inline = False
2962 if new_index_file_path is not None:
2963 if new_index_file_path is not None:
2963 self._indexfile = new_index_file_path
2964 self._indexfile = new_index_file_path
2964
2965
2965 nodemaputil.setup_persistent_nodemap(tr, self)
2966 nodemaputil.setup_persistent_nodemap(tr, self)
2966
2967
2967 def _nodeduplicatecallback(self, transaction, node):
2968 def _nodeduplicatecallback(self, transaction, node):
2968 """called when trying to add a node already stored."""
2969 """called when trying to add a node already stored."""
2969
2970
2970 @contextlib.contextmanager
2971 @contextlib.contextmanager
2971 def reading(self):
2972 def reading(self):
2972 with self._inner.reading():
2973 with self._inner.reading():
2973 yield
2974 yield
2974
2975
2975 @contextlib.contextmanager
2976 @contextlib.contextmanager
2976 def _writing(self, transaction):
2977 def _writing(self, transaction):
2977 if self._trypending:
2978 if self._trypending:
2978 msg = b'try to write in a `trypending` revlog: %s'
2979 msg = b'try to write in a `trypending` revlog: %s'
2979 msg %= self.display_id
2980 msg %= self.display_id
2980 raise error.ProgrammingError(msg)
2981 raise error.ProgrammingError(msg)
2981 if self._inner.is_writing:
2982 if self._inner.is_writing:
2982 yield
2983 yield
2983 else:
2984 else:
2984 data_end = None
2985 data_end = None
2985 sidedata_end = None
2986 sidedata_end = None
2986 if self._docket is not None:
2987 if self._docket is not None:
2987 data_end = self._docket.data_end
2988 data_end = self._docket.data_end
2988 sidedata_end = self._docket.sidedata_end
2989 sidedata_end = self._docket.sidedata_end
2989 with self._inner.writing(
2990 with self._inner.writing(
2990 transaction,
2991 transaction,
2991 data_end=data_end,
2992 data_end=data_end,
2992 sidedata_end=sidedata_end,
2993 sidedata_end=sidedata_end,
2993 ):
2994 ):
2994 yield
2995 yield
2995 if self._docket is not None:
2996 if self._docket is not None:
2996 self._write_docket(transaction)
2997 self._write_docket(transaction)
2997
2998
2998 @property
2999 @property
2999 def is_delaying(self):
3000 def is_delaying(self):
3000 return self._inner.is_delaying
3001 return self._inner.is_delaying
3001
3002
3002 def _write_docket(self, transaction):
3003 def _write_docket(self, transaction):
3003 """write the current docket on disk
3004 """write the current docket on disk
3004
3005
3005 Exist as a method to help changelog to implement transaction logic
3006 Exist as a method to help changelog to implement transaction logic
3006
3007
3007 We could also imagine using the same transaction logic for all revlog
3008 We could also imagine using the same transaction logic for all revlog
3008 since docket are cheap."""
3009 since docket are cheap."""
3009 self._docket.write(transaction)
3010 self._docket.write(transaction)
3010
3011
3011 def addrevision(
3012 def addrevision(
3012 self,
3013 self,
3013 text,
3014 text,
3014 transaction,
3015 transaction,
3015 link,
3016 link,
3016 p1,
3017 p1,
3017 p2,
3018 p2,
3018 cachedelta=None,
3019 cachedelta=None,
3019 node=None,
3020 node=None,
3020 flags=REVIDX_DEFAULT_FLAGS,
3021 flags=REVIDX_DEFAULT_FLAGS,
3021 deltacomputer=None,
3022 deltacomputer=None,
3022 sidedata=None,
3023 sidedata=None,
3023 ):
3024 ):
3024 """add a revision to the log
3025 """add a revision to the log
3025
3026
3026 text - the revision data to add
3027 text - the revision data to add
3027 transaction - the transaction object used for rollback
3028 transaction - the transaction object used for rollback
3028 link - the linkrev data to add
3029 link - the linkrev data to add
3029 p1, p2 - the parent nodeids of the revision
3030 p1, p2 - the parent nodeids of the revision
3030 cachedelta - an optional precomputed delta
3031 cachedelta - an optional precomputed delta
3031 node - nodeid of revision; typically node is not specified, and it is
3032 node - nodeid of revision; typically node is not specified, and it is
3032 computed by default as hash(text, p1, p2), however subclasses might
3033 computed by default as hash(text, p1, p2), however subclasses might
3033 use different hashing method (and override checkhash() in such case)
3034 use different hashing method (and override checkhash() in such case)
3034 flags - the known flags to set on the revision
3035 flags - the known flags to set on the revision
3035 deltacomputer - an optional deltacomputer instance shared between
3036 deltacomputer - an optional deltacomputer instance shared between
3036 multiple calls
3037 multiple calls
3037 """
3038 """
3038 if link == nullrev:
3039 if link == nullrev:
3039 raise error.RevlogError(
3040 raise error.RevlogError(
3040 _(b"attempted to add linkrev -1 to %s") % self.display_id
3041 _(b"attempted to add linkrev -1 to %s") % self.display_id
3041 )
3042 )
3042
3043
3043 if sidedata is None:
3044 if sidedata is None:
3044 sidedata = {}
3045 sidedata = {}
3045 elif sidedata and not self.feature_config.has_side_data:
3046 elif sidedata and not self.feature_config.has_side_data:
3046 raise error.ProgrammingError(
3047 raise error.ProgrammingError(
3047 _(b"trying to add sidedata to a revlog who don't support them")
3048 _(b"trying to add sidedata to a revlog who don't support them")
3048 )
3049 )
3049
3050
3050 if flags:
3051 if flags:
3051 node = node or self.hash(text, p1, p2)
3052 node = node or self.hash(text, p1, p2)
3052
3053
3053 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3054
3055
3055 # If the flag processor modifies the revision data, ignore any provided
3056 # If the flag processor modifies the revision data, ignore any provided
3056 # cachedelta.
3057 # cachedelta.
3057 if rawtext != text:
3058 if rawtext != text:
3058 cachedelta = None
3059 cachedelta = None
3059
3060
3060 if len(rawtext) > _maxentrysize:
3061 if len(rawtext) > _maxentrysize:
3061 raise error.RevlogError(
3062 raise error.RevlogError(
3062 _(
3063 _(
3063 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3064 )
3065 )
3065 % (self.display_id, len(rawtext))
3066 % (self.display_id, len(rawtext))
3066 )
3067 )
3067
3068
3068 node = node or self.hash(rawtext, p1, p2)
3069 node = node or self.hash(rawtext, p1, p2)
3069 rev = self.index.get_rev(node)
3070 rev = self.index.get_rev(node)
3070 if rev is not None:
3071 if rev is not None:
3071 return rev
3072 return rev
3072
3073
3073 if validatehash:
3074 if validatehash:
3074 self.checkhash(rawtext, node, p1=p1, p2=p2)
3075 self.checkhash(rawtext, node, p1=p1, p2=p2)
3075
3076
3076 return self.addrawrevision(
3077 return self.addrawrevision(
3077 rawtext,
3078 rawtext,
3078 transaction,
3079 transaction,
3079 link,
3080 link,
3080 p1,
3081 p1,
3081 p2,
3082 p2,
3082 node,
3083 node,
3083 flags,
3084 flags,
3084 cachedelta=cachedelta,
3085 cachedelta=cachedelta,
3085 deltacomputer=deltacomputer,
3086 deltacomputer=deltacomputer,
3086 sidedata=sidedata,
3087 sidedata=sidedata,
3087 )
3088 )
3088
3089
3089 def addrawrevision(
3090 def addrawrevision(
3090 self,
3091 self,
3091 rawtext,
3092 rawtext,
3092 transaction,
3093 transaction,
3093 link,
3094 link,
3094 p1,
3095 p1,
3095 p2,
3096 p2,
3096 node,
3097 node,
3097 flags,
3098 flags,
3098 cachedelta=None,
3099 cachedelta=None,
3099 deltacomputer=None,
3100 deltacomputer=None,
3100 sidedata=None,
3101 sidedata=None,
3101 ):
3102 ):
3102 """add a raw revision with known flags, node and parents
3103 """add a raw revision with known flags, node and parents
3103 useful when reusing a revision not stored in this revlog (ex: received
3104 useful when reusing a revision not stored in this revlog (ex: received
3104 over wire, or read from an external bundle).
3105 over wire, or read from an external bundle).
3105 """
3106 """
3106 with self._writing(transaction):
3107 with self._writing(transaction):
3107 return self._addrevision(
3108 return self._addrevision(
3108 node,
3109 node,
3109 rawtext,
3110 rawtext,
3110 transaction,
3111 transaction,
3111 link,
3112 link,
3112 p1,
3113 p1,
3113 p2,
3114 p2,
3114 flags,
3115 flags,
3115 cachedelta,
3116 cachedelta,
3116 deltacomputer=deltacomputer,
3117 deltacomputer=deltacomputer,
3117 sidedata=sidedata,
3118 sidedata=sidedata,
3118 )
3119 )
3119
3120
3120 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3121 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3121 return self._inner.compress(data)
3122 return self._inner.compress(data)
3122
3123
3123 def decompress(self, data):
3124 def decompress(self, data):
3124 return self._inner.decompress(data)
3125 return self._inner.decompress(data)
3125
3126
3126 def _addrevision(
3127 def _addrevision(
3127 self,
3128 self,
3128 node,
3129 node,
3129 rawtext,
3130 rawtext,
3130 transaction,
3131 transaction,
3131 link,
3132 link,
3132 p1,
3133 p1,
3133 p2,
3134 p2,
3134 flags,
3135 flags,
3135 cachedelta,
3136 cachedelta,
3136 alwayscache=False,
3137 alwayscache=False,
3137 deltacomputer=None,
3138 deltacomputer=None,
3138 sidedata=None,
3139 sidedata=None,
3139 ):
3140 ):
3140 """internal function to add revisions to the log
3141 """internal function to add revisions to the log
3141
3142
3142 see addrevision for argument descriptions.
3143 see addrevision for argument descriptions.
3143
3144
3144 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3145 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3145
3146
3146 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3147 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3147 be used.
3148 be used.
3148
3149
3149 invariants:
3150 invariants:
3150 - rawtext is optional (can be None); if not set, cachedelta must be set.
3151 - rawtext is optional (can be None); if not set, cachedelta must be set.
3151 if both are set, they must correspond to each other.
3152 if both are set, they must correspond to each other.
3152 """
3153 """
3153 if node == self.nullid:
3154 if node == self.nullid:
3154 raise error.RevlogError(
3155 raise error.RevlogError(
3155 _(b"%s: attempt to add null revision") % self.display_id
3156 _(b"%s: attempt to add null revision") % self.display_id
3156 )
3157 )
3157 if (
3158 if (
3158 node == self.nodeconstants.wdirid
3159 node == self.nodeconstants.wdirid
3159 or node in self.nodeconstants.wdirfilenodeids
3160 or node in self.nodeconstants.wdirfilenodeids
3160 ):
3161 ):
3161 raise error.RevlogError(
3162 raise error.RevlogError(
3162 _(b"%s: attempt to add wdir revision") % self.display_id
3163 _(b"%s: attempt to add wdir revision") % self.display_id
3163 )
3164 )
3164 if not self._inner.is_writing:
3165 if not self._inner.is_writing:
3165 msg = b'adding revision outside `revlog._writing` context'
3166 msg = b'adding revision outside `revlog._writing` context'
3166 raise error.ProgrammingError(msg)
3167 raise error.ProgrammingError(msg)
3167
3168
3168 btext = [rawtext]
3169 btext = [rawtext]
3169
3170
3170 curr = len(self)
3171 curr = len(self)
3171 prev = curr - 1
3172 prev = curr - 1
3172
3173
3173 offset = self._get_data_offset(prev)
3174 offset = self._get_data_offset(prev)
3174
3175
3175 if self._concurrencychecker:
3176 if self._concurrencychecker:
3176 ifh, dfh, sdfh = self._inner._writinghandles
3177 ifh, dfh, sdfh = self._inner._writinghandles
3177 # XXX no checking for the sidedata file
3178 # XXX no checking for the sidedata file
3178 if self._inline:
3179 if self._inline:
3179 # offset is "as if" it were in the .d file, so we need to add on
3180 # offset is "as if" it were in the .d file, so we need to add on
3180 # the size of the entry metadata.
3181 # the size of the entry metadata.
3181 self._concurrencychecker(
3182 self._concurrencychecker(
3182 ifh, self._indexfile, offset + curr * self.index.entry_size
3183 ifh, self._indexfile, offset + curr * self.index.entry_size
3183 )
3184 )
3184 else:
3185 else:
3185 # Entries in the .i are a consistent size.
3186 # Entries in the .i are a consistent size.
3186 self._concurrencychecker(
3187 self._concurrencychecker(
3187 ifh, self._indexfile, curr * self.index.entry_size
3188 ifh, self._indexfile, curr * self.index.entry_size
3188 )
3189 )
3189 self._concurrencychecker(dfh, self._datafile, offset)
3190 self._concurrencychecker(dfh, self._datafile, offset)
3190
3191
3191 p1r, p2r = self.rev(p1), self.rev(p2)
3192 p1r, p2r = self.rev(p1), self.rev(p2)
3192
3193
3193 # full versions are inserted when the needed deltas
3194 # full versions are inserted when the needed deltas
3194 # become comparable to the uncompressed text
3195 # become comparable to the uncompressed text
3195 if rawtext is None:
3196 if rawtext is None:
3196 # need rawtext size, before changed by flag processors, which is
3197 # need rawtext size, before changed by flag processors, which is
3197 # the non-raw size. use revlog explicitly to avoid filelog's extra
3198 # the non-raw size. use revlog explicitly to avoid filelog's extra
3198 # logic that might remove metadata size.
3199 # logic that might remove metadata size.
3199 textlen = mdiff.patchedsize(
3200 textlen = mdiff.patchedsize(
3200 revlog.size(self, cachedelta[0]), cachedelta[1]
3201 revlog.size(self, cachedelta[0]), cachedelta[1]
3201 )
3202 )
3202 else:
3203 else:
3203 textlen = len(rawtext)
3204 textlen = len(rawtext)
3204
3205
3205 if deltacomputer is None:
3206 if deltacomputer is None:
3206 write_debug = None
3207 write_debug = None
3207 if self.delta_config.debug_delta:
3208 if self.delta_config.debug_delta:
3208 write_debug = transaction._report
3209 write_debug = transaction._report
3209 deltacomputer = deltautil.deltacomputer(
3210 deltacomputer = deltautil.deltacomputer(
3210 self, write_debug=write_debug
3211 self, write_debug=write_debug
3211 )
3212 )
3212
3213
3213 if cachedelta is not None and len(cachedelta) == 2:
3214 if cachedelta is not None and len(cachedelta) == 2:
3214 # If the cached delta has no information about how it should be
3215 # If the cached delta has no information about how it should be
3215 # reused, add the default reuse instruction according to the
3216 # reused, add the default reuse instruction according to the
3216 # revlog's configuration.
3217 # revlog's configuration.
3217 if (
3218 if (
3218 self.delta_config.general_delta
3219 self.delta_config.general_delta
3219 and self.delta_config.lazy_delta_base
3220 and self.delta_config.lazy_delta_base
3220 ):
3221 ):
3221 delta_base_reuse = DELTA_BASE_REUSE_TRY
3222 delta_base_reuse = DELTA_BASE_REUSE_TRY
3222 else:
3223 else:
3223 delta_base_reuse = DELTA_BASE_REUSE_NO
3224 delta_base_reuse = DELTA_BASE_REUSE_NO
3224 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3225 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3225
3226
3226 revinfo = revlogutils.revisioninfo(
3227 revinfo = revlogutils.revisioninfo(
3227 node,
3228 node,
3228 p1,
3229 p1,
3229 p2,
3230 p2,
3230 btext,
3231 btext,
3231 textlen,
3232 textlen,
3232 cachedelta,
3233 cachedelta,
3233 flags,
3234 flags,
3234 )
3235 )
3235
3236
3236 deltainfo = deltacomputer.finddeltainfo(revinfo)
3237 deltainfo = deltacomputer.finddeltainfo(revinfo)
3237
3238
3238 compression_mode = COMP_MODE_INLINE
3239 compression_mode = COMP_MODE_INLINE
3239 if self._docket is not None:
3240 if self._docket is not None:
3240 default_comp = self._docket.default_compression_header
3241 default_comp = self._docket.default_compression_header
3241 r = deltautil.delta_compression(default_comp, deltainfo)
3242 r = deltautil.delta_compression(default_comp, deltainfo)
3242 compression_mode, deltainfo = r
3243 compression_mode, deltainfo = r
3243
3244
3244 sidedata_compression_mode = COMP_MODE_INLINE
3245 sidedata_compression_mode = COMP_MODE_INLINE
3245 if sidedata and self.feature_config.has_side_data:
3246 if sidedata and self.feature_config.has_side_data:
3246 sidedata_compression_mode = COMP_MODE_PLAIN
3247 sidedata_compression_mode = COMP_MODE_PLAIN
3247 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3248 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3248 sidedata_offset = self._docket.sidedata_end
3249 sidedata_offset = self._docket.sidedata_end
3249 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3250 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3250 if (
3251 if (
3251 h != b'u'
3252 h != b'u'
3252 and comp_sidedata[0:1] != b'\0'
3253 and comp_sidedata[0:1] != b'\0'
3253 and len(comp_sidedata) < len(serialized_sidedata)
3254 and len(comp_sidedata) < len(serialized_sidedata)
3254 ):
3255 ):
3255 assert not h
3256 assert not h
3256 if (
3257 if (
3257 comp_sidedata[0:1]
3258 comp_sidedata[0:1]
3258 == self._docket.default_compression_header
3259 == self._docket.default_compression_header
3259 ):
3260 ):
3260 sidedata_compression_mode = COMP_MODE_DEFAULT
3261 sidedata_compression_mode = COMP_MODE_DEFAULT
3261 serialized_sidedata = comp_sidedata
3262 serialized_sidedata = comp_sidedata
3262 else:
3263 else:
3263 sidedata_compression_mode = COMP_MODE_INLINE
3264 sidedata_compression_mode = COMP_MODE_INLINE
3264 serialized_sidedata = comp_sidedata
3265 serialized_sidedata = comp_sidedata
3265 else:
3266 else:
3266 serialized_sidedata = b""
3267 serialized_sidedata = b""
3267 # Don't store the offset if the sidedata is empty, that way
3268 # Don't store the offset if the sidedata is empty, that way
3268 # we can easily detect empty sidedata and they will be no different
3269 # we can easily detect empty sidedata and they will be no different
3269 # than ones we manually add.
3270 # than ones we manually add.
3270 sidedata_offset = 0
3271 sidedata_offset = 0
3271
3272
3272 rank = RANK_UNKNOWN
3273 rank = RANK_UNKNOWN
3273 if self.feature_config.compute_rank:
3274 if self.feature_config.compute_rank:
3274 if (p1r, p2r) == (nullrev, nullrev):
3275 if (p1r, p2r) == (nullrev, nullrev):
3275 rank = 1
3276 rank = 1
3276 elif p1r != nullrev and p2r == nullrev:
3277 elif p1r != nullrev and p2r == nullrev:
3277 rank = 1 + self.fast_rank(p1r)
3278 rank = 1 + self.fast_rank(p1r)
3278 elif p1r == nullrev and p2r != nullrev:
3279 elif p1r == nullrev and p2r != nullrev:
3279 rank = 1 + self.fast_rank(p2r)
3280 rank = 1 + self.fast_rank(p2r)
3280 else: # merge node
3281 else: # merge node
3281 if rustdagop is not None and self.index.rust_ext_compat:
3282 if rustdagop is not None and self.index.rust_ext_compat:
3282 rank = rustdagop.rank(self.index, p1r, p2r)
3283 rank = rustdagop.rank(self.index, p1r, p2r)
3283 else:
3284 else:
3284 pmin, pmax = sorted((p1r, p2r))
3285 pmin, pmax = sorted((p1r, p2r))
3285 rank = 1 + self.fast_rank(pmax)
3286 rank = 1 + self.fast_rank(pmax)
3286 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3287 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3287
3288
3288 e = revlogutils.entry(
3289 e = revlogutils.entry(
3289 flags=flags,
3290 flags=flags,
3290 data_offset=offset,
3291 data_offset=offset,
3291 data_compressed_length=deltainfo.deltalen,
3292 data_compressed_length=deltainfo.deltalen,
3292 data_uncompressed_length=textlen,
3293 data_uncompressed_length=textlen,
3293 data_compression_mode=compression_mode,
3294 data_compression_mode=compression_mode,
3294 data_delta_base=deltainfo.base,
3295 data_delta_base=deltainfo.base,
3295 link_rev=link,
3296 link_rev=link,
3296 parent_rev_1=p1r,
3297 parent_rev_1=p1r,
3297 parent_rev_2=p2r,
3298 parent_rev_2=p2r,
3298 node_id=node,
3299 node_id=node,
3299 sidedata_offset=sidedata_offset,
3300 sidedata_offset=sidedata_offset,
3300 sidedata_compressed_length=len(serialized_sidedata),
3301 sidedata_compressed_length=len(serialized_sidedata),
3301 sidedata_compression_mode=sidedata_compression_mode,
3302 sidedata_compression_mode=sidedata_compression_mode,
3302 rank=rank,
3303 rank=rank,
3303 )
3304 )
3304
3305
3305 self.index.append(e)
3306 self.index.append(e)
3306 entry = self.index.entry_binary(curr)
3307 entry = self.index.entry_binary(curr)
3307 if curr == 0 and self._docket is None:
3308 if curr == 0 and self._docket is None:
3308 header = self._format_flags | self._format_version
3309 header = self._format_flags | self._format_version
3309 header = self.index.pack_header(header)
3310 header = self.index.pack_header(header)
3310 entry = header + entry
3311 entry = header + entry
3311 self._writeentry(
3312 self._writeentry(
3312 transaction,
3313 transaction,
3313 entry,
3314 entry,
3314 deltainfo.data,
3315 deltainfo.data,
3315 link,
3316 link,
3316 offset,
3317 offset,
3317 serialized_sidedata,
3318 serialized_sidedata,
3318 sidedata_offset,
3319 sidedata_offset,
3319 )
3320 )
3320
3321
3321 rawtext = btext[0]
3322 rawtext = btext[0]
3322
3323
3323 if alwayscache and rawtext is None:
3324 if alwayscache and rawtext is None:
3324 rawtext = deltacomputer.buildtext(revinfo)
3325 rawtext = deltacomputer.buildtext(revinfo)
3325
3326
3326 if type(rawtext) == bytes: # only accept immutable objects
3327 if type(rawtext) == bytes: # only accept immutable objects
3327 self._inner._revisioncache = (node, curr, rawtext)
3328 self._inner._revisioncache = (node, curr, rawtext)
3328 self._chainbasecache[curr] = deltainfo.chainbase
3329 self._chainbasecache[curr] = deltainfo.chainbase
3329 return curr
3330 return curr
3330
3331
3331 def _get_data_offset(self, prev):
3332 def _get_data_offset(self, prev):
3332 """Returns the current offset in the (in-transaction) data file.
3333 """Returns the current offset in the (in-transaction) data file.
3333 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3334 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3334 file to store that information: since sidedata can be rewritten to the
3335 file to store that information: since sidedata can be rewritten to the
3335 end of the data file within a transaction, you can have cases where, for
3336 end of the data file within a transaction, you can have cases where, for
3336 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3337 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3337 to `n - 1`'s sidedata being written after `n`'s data.
3338 to `n - 1`'s sidedata being written after `n`'s data.
3338
3339
3339 TODO cache this in a docket file before getting out of experimental."""
3340 TODO cache this in a docket file before getting out of experimental."""
3340 if self._docket is None:
3341 if self._docket is None:
3341 return self.end(prev)
3342 return self.end(prev)
3342 else:
3343 else:
3343 return self._docket.data_end
3344 return self._docket.data_end
3344
3345
3345 def _writeentry(
3346 def _writeentry(
3346 self,
3347 self,
3347 transaction,
3348 transaction,
3348 entry,
3349 entry,
3349 data,
3350 data,
3350 link,
3351 link,
3351 offset,
3352 offset,
3352 sidedata,
3353 sidedata,
3353 sidedata_offset,
3354 sidedata_offset,
3354 ):
3355 ):
3355 # Files opened in a+ mode have inconsistent behavior on various
3356 # Files opened in a+ mode have inconsistent behavior on various
3356 # platforms. Windows requires that a file positioning call be made
3357 # platforms. Windows requires that a file positioning call be made
3357 # when the file handle transitions between reads and writes. See
3358 # when the file handle transitions between reads and writes. See
3358 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3359 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3359 # platforms, Python or the platform itself can be buggy. Some versions
3360 # platforms, Python or the platform itself can be buggy. Some versions
3360 # of Solaris have been observed to not append at the end of the file
3361 # of Solaris have been observed to not append at the end of the file
3361 # if the file was seeked to before the end. See issue4943 for more.
3362 # if the file was seeked to before the end. See issue4943 for more.
3362 #
3363 #
3363 # We work around this issue by inserting a seek() before writing.
3364 # We work around this issue by inserting a seek() before writing.
3364 # Note: This is likely not necessary on Python 3. However, because
3365 # Note: This is likely not necessary on Python 3. However, because
3365 # the file handle is reused for reads and may be seeked there, we need
3366 # the file handle is reused for reads and may be seeked there, we need
3366 # to be careful before changing this.
3367 # to be careful before changing this.
3367 index_end = data_end = sidedata_end = None
3368 index_end = data_end = sidedata_end = None
3368 if self._docket is not None:
3369 if self._docket is not None:
3369 index_end = self._docket.index_end
3370 index_end = self._docket.index_end
3370 data_end = self._docket.data_end
3371 data_end = self._docket.data_end
3371 sidedata_end = self._docket.sidedata_end
3372 sidedata_end = self._docket.sidedata_end
3372
3373
3373 files_end = self._inner.write_entry(
3374 files_end = self._inner.write_entry(
3374 transaction,
3375 transaction,
3375 entry,
3376 entry,
3376 data,
3377 data,
3377 link,
3378 link,
3378 offset,
3379 offset,
3379 sidedata,
3380 sidedata,
3380 sidedata_offset,
3381 sidedata_offset,
3381 index_end,
3382 index_end,
3382 data_end,
3383 data_end,
3383 sidedata_end,
3384 sidedata_end,
3384 )
3385 )
3385 self._enforceinlinesize(transaction)
3386 self._enforceinlinesize(transaction)
3386 if self._docket is not None:
3387 if self._docket is not None:
3387 self._docket.index_end = files_end[0]
3388 self._docket.index_end = files_end[0]
3388 self._docket.data_end = files_end[1]
3389 self._docket.data_end = files_end[1]
3389 self._docket.sidedata_end = files_end[2]
3390 self._docket.sidedata_end = files_end[2]
3390
3391
3391 nodemaputil.setup_persistent_nodemap(transaction, self)
3392 nodemaputil.setup_persistent_nodemap(transaction, self)
3392
3393
3393 def addgroup(
3394 def addgroup(
3394 self,
3395 self,
3395 deltas,
3396 deltas,
3396 linkmapper,
3397 linkmapper,
3397 transaction,
3398 transaction,
3398 alwayscache=False,
3399 alwayscache=False,
3399 addrevisioncb=None,
3400 addrevisioncb=None,
3400 duplicaterevisioncb=None,
3401 duplicaterevisioncb=None,
3401 debug_info=None,
3402 debug_info=None,
3402 delta_base_reuse_policy=None,
3403 delta_base_reuse_policy=None,
3403 ):
3404 ):
3404 """
3405 """
3405 add a delta group
3406 add a delta group
3406
3407
3407 given a set of deltas, add them to the revision log. the
3408 given a set of deltas, add them to the revision log. the
3408 first delta is against its parent, which should be in our
3409 first delta is against its parent, which should be in our
3409 log, the rest are against the previous delta.
3410 log, the rest are against the previous delta.
3410
3411
3411 If ``addrevisioncb`` is defined, it will be called with arguments of
3412 If ``addrevisioncb`` is defined, it will be called with arguments of
3412 this revlog and the node that was added.
3413 this revlog and the node that was added.
3413 """
3414 """
3414
3415
3415 if self._adding_group:
3416 if self._adding_group:
3416 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3417 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3417
3418
3418 # read the default delta-base reuse policy from revlog config if the
3419 # read the default delta-base reuse policy from revlog config if the
3419 # group did not specify one.
3420 # group did not specify one.
3420 if delta_base_reuse_policy is None:
3421 if delta_base_reuse_policy is None:
3421 if (
3422 if (
3422 self.delta_config.general_delta
3423 self.delta_config.general_delta
3423 and self.delta_config.lazy_delta_base
3424 and self.delta_config.lazy_delta_base
3424 ):
3425 ):
3425 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3426 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3426 else:
3427 else:
3427 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3428 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3428
3429
3429 self._adding_group = True
3430 self._adding_group = True
3430 empty = True
3431 empty = True
3431 try:
3432 try:
3432 with self._writing(transaction):
3433 with self._writing(transaction):
3433 write_debug = None
3434 write_debug = None
3434 if self.delta_config.debug_delta:
3435 if self.delta_config.debug_delta:
3435 write_debug = transaction._report
3436 write_debug = transaction._report
3436 deltacomputer = deltautil.deltacomputer(
3437 deltacomputer = deltautil.deltacomputer(
3437 self,
3438 self,
3438 write_debug=write_debug,
3439 write_debug=write_debug,
3439 debug_info=debug_info,
3440 debug_info=debug_info,
3440 )
3441 )
3441 # loop through our set of deltas
3442 # loop through our set of deltas
3442 for data in deltas:
3443 for data in deltas:
3443 (
3444 (
3444 node,
3445 node,
3445 p1,
3446 p1,
3446 p2,
3447 p2,
3447 linknode,
3448 linknode,
3448 deltabase,
3449 deltabase,
3449 delta,
3450 delta,
3450 flags,
3451 flags,
3451 sidedata,
3452 sidedata,
3452 ) = data
3453 ) = data
3453 link = linkmapper(linknode)
3454 link = linkmapper(linknode)
3454 flags = flags or REVIDX_DEFAULT_FLAGS
3455 flags = flags or REVIDX_DEFAULT_FLAGS
3455
3456
3456 rev = self.index.get_rev(node)
3457 rev = self.index.get_rev(node)
3457 if rev is not None:
3458 if rev is not None:
3458 # this can happen if two branches make the same change
3459 # this can happen if two branches make the same change
3459 self._nodeduplicatecallback(transaction, rev)
3460 self._nodeduplicatecallback(transaction, rev)
3460 if duplicaterevisioncb:
3461 if duplicaterevisioncb:
3461 duplicaterevisioncb(self, rev)
3462 duplicaterevisioncb(self, rev)
3462 empty = False
3463 empty = False
3463 continue
3464 continue
3464
3465
3465 for p in (p1, p2):
3466 for p in (p1, p2):
3466 if not self.index.has_node(p):
3467 if not self.index.has_node(p):
3467 raise error.LookupError(
3468 raise error.LookupError(
3468 p, self.radix, _(b'unknown parent')
3469 p, self.radix, _(b'unknown parent')
3469 )
3470 )
3470
3471
3471 if not self.index.has_node(deltabase):
3472 if not self.index.has_node(deltabase):
3472 raise error.LookupError(
3473 raise error.LookupError(
3473 deltabase, self.display_id, _(b'unknown delta base')
3474 deltabase, self.display_id, _(b'unknown delta base')
3474 )
3475 )
3475
3476
3476 baserev = self.rev(deltabase)
3477 baserev = self.rev(deltabase)
3477
3478
3478 if baserev != nullrev and self.iscensored(baserev):
3479 if baserev != nullrev and self.iscensored(baserev):
3479 # if base is censored, delta must be full replacement in a
3480 # if base is censored, delta must be full replacement in a
3480 # single patch operation
3481 # single patch operation
3481 hlen = struct.calcsize(b">lll")
3482 hlen = struct.calcsize(b">lll")
3482 oldlen = self.rawsize(baserev)
3483 oldlen = self.rawsize(baserev)
3483 newlen = len(delta) - hlen
3484 newlen = len(delta) - hlen
3484 if delta[:hlen] != mdiff.replacediffheader(
3485 if delta[:hlen] != mdiff.replacediffheader(
3485 oldlen, newlen
3486 oldlen, newlen
3486 ):
3487 ):
3487 raise error.CensoredBaseError(
3488 raise error.CensoredBaseError(
3488 self.display_id, self.node(baserev)
3489 self.display_id, self.node(baserev)
3489 )
3490 )
3490
3491
3491 if not flags and self._peek_iscensored(baserev, delta):
3492 if not flags and self._peek_iscensored(baserev, delta):
3492 flags |= REVIDX_ISCENSORED
3493 flags |= REVIDX_ISCENSORED
3493
3494
3494 # We assume consumers of addrevisioncb will want to retrieve
3495 # We assume consumers of addrevisioncb will want to retrieve
3495 # the added revision, which will require a call to
3496 # the added revision, which will require a call to
3496 # revision(). revision() will fast path if there is a cache
3497 # revision(). revision() will fast path if there is a cache
3497 # hit. So, we tell _addrevision() to always cache in this case.
3498 # hit. So, we tell _addrevision() to always cache in this case.
3498 # We're only using addgroup() in the context of changegroup
3499 # We're only using addgroup() in the context of changegroup
3499 # generation so the revision data can always be handled as raw
3500 # generation so the revision data can always be handled as raw
3500 # by the flagprocessor.
3501 # by the flagprocessor.
3501 rev = self._addrevision(
3502 rev = self._addrevision(
3502 node,
3503 node,
3503 None,
3504 None,
3504 transaction,
3505 transaction,
3505 link,
3506 link,
3506 p1,
3507 p1,
3507 p2,
3508 p2,
3508 flags,
3509 flags,
3509 (baserev, delta, delta_base_reuse_policy),
3510 (baserev, delta, delta_base_reuse_policy),
3510 alwayscache=alwayscache,
3511 alwayscache=alwayscache,
3511 deltacomputer=deltacomputer,
3512 deltacomputer=deltacomputer,
3512 sidedata=sidedata,
3513 sidedata=sidedata,
3513 )
3514 )
3514
3515
3515 if addrevisioncb:
3516 if addrevisioncb:
3516 addrevisioncb(self, rev)
3517 addrevisioncb(self, rev)
3517 empty = False
3518 empty = False
3518 finally:
3519 finally:
3519 self._adding_group = False
3520 self._adding_group = False
3520 return not empty
3521 return not empty
3521
3522
3522 def iscensored(self, rev):
3523 def iscensored(self, rev):
3523 """Check if a file revision is censored."""
3524 """Check if a file revision is censored."""
3524 if not self.feature_config.censorable:
3525 if not self.feature_config.censorable:
3525 return False
3526 return False
3526
3527
3527 return self.flags(rev) & REVIDX_ISCENSORED
3528 return self.flags(rev) & REVIDX_ISCENSORED
3528
3529
3529 def _peek_iscensored(self, baserev, delta):
3530 def _peek_iscensored(self, baserev, delta):
3530 """Quickly check if a delta produces a censored revision."""
3531 """Quickly check if a delta produces a censored revision."""
3531 if not self.feature_config.censorable:
3532 if not self.feature_config.censorable:
3532 return False
3533 return False
3533
3534
3534 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3535
3536
3536 def getstrippoint(self, minlink):
3537 def getstrippoint(self, minlink):
3537 """find the minimum rev that must be stripped to strip the linkrev
3538 """find the minimum rev that must be stripped to strip the linkrev
3538
3539
3539 Returns a tuple containing the minimum rev and a set of all revs that
3540 Returns a tuple containing the minimum rev and a set of all revs that
3540 have linkrevs that will be broken by this strip.
3541 have linkrevs that will be broken by this strip.
3541 """
3542 """
3542 return storageutil.resolvestripinfo(
3543 return storageutil.resolvestripinfo(
3543 minlink,
3544 minlink,
3544 len(self) - 1,
3545 len(self) - 1,
3545 self.headrevs(),
3546 self.headrevs(),
3546 self.linkrev,
3547 self.linkrev,
3547 self.parentrevs,
3548 self.parentrevs,
3548 )
3549 )
3549
3550
3550 def strip(self, minlink, transaction):
3551 def strip(self, minlink, transaction):
3551 """truncate the revlog on the first revision with a linkrev >= minlink
3552 """truncate the revlog on the first revision with a linkrev >= minlink
3552
3553
3553 This function is called when we're stripping revision minlink and
3554 This function is called when we're stripping revision minlink and
3554 its descendants from the repository.
3555 its descendants from the repository.
3555
3556
3556 We have to remove all revisions with linkrev >= minlink, because
3557 We have to remove all revisions with linkrev >= minlink, because
3557 the equivalent changelog revisions will be renumbered after the
3558 the equivalent changelog revisions will be renumbered after the
3558 strip.
3559 strip.
3559
3560
3560 So we truncate the revlog on the first of these revisions, and
3561 So we truncate the revlog on the first of these revisions, and
3561 trust that the caller has saved the revisions that shouldn't be
3562 trust that the caller has saved the revisions that shouldn't be
3562 removed and that it'll re-add them after this truncation.
3563 removed and that it'll re-add them after this truncation.
3563 """
3564 """
3564 if len(self) == 0:
3565 if len(self) == 0:
3565 return
3566 return
3566
3567
3567 rev, _ = self.getstrippoint(minlink)
3568 rev, _ = self.getstrippoint(minlink)
3568 if rev == len(self):
3569 if rev == len(self):
3569 return
3570 return
3570
3571
3571 # first truncate the files on disk
3572 # first truncate the files on disk
3572 data_end = self.start(rev)
3573 data_end = self.start(rev)
3573 if not self._inline:
3574 if not self._inline:
3574 transaction.add(self._datafile, data_end)
3575 transaction.add(self._datafile, data_end)
3575 end = rev * self.index.entry_size
3576 end = rev * self.index.entry_size
3576 else:
3577 else:
3577 end = data_end + (rev * self.index.entry_size)
3578 end = data_end + (rev * self.index.entry_size)
3578
3579
3579 if self._sidedatafile:
3580 if self._sidedatafile:
3580 sidedata_end = self.sidedata_cut_off(rev)
3581 sidedata_end = self.sidedata_cut_off(rev)
3581 transaction.add(self._sidedatafile, sidedata_end)
3582 transaction.add(self._sidedatafile, sidedata_end)
3582
3583
3583 transaction.add(self._indexfile, end)
3584 transaction.add(self._indexfile, end)
3584 if self._docket is not None:
3585 if self._docket is not None:
3585 # XXX we could, leverage the docket while stripping. However it is
3586 # XXX we could, leverage the docket while stripping. However it is
3586 # not powerfull enough at the time of this comment
3587 # not powerfull enough at the time of this comment
3587 self._docket.index_end = end
3588 self._docket.index_end = end
3588 self._docket.data_end = data_end
3589 self._docket.data_end = data_end
3589 self._docket.sidedata_end = sidedata_end
3590 self._docket.sidedata_end = sidedata_end
3590 self._docket.write(transaction, stripping=True)
3591 self._docket.write(transaction, stripping=True)
3591
3592
3592 # then reset internal state in memory to forget those revisions
3593 # then reset internal state in memory to forget those revisions
3593 self._chaininfocache = util.lrucachedict(500)
3594 self._chaininfocache = util.lrucachedict(500)
3594 self._inner.clear_cache()
3595 self._inner.clear_cache()
3595
3596
3596 del self.index[rev:-1]
3597 del self.index[rev:-1]
3597
3598
3598 def checksize(self):
3599 def checksize(self):
3599 """Check size of index and data files
3600 """Check size of index and data files
3600
3601
3601 return a (dd, di) tuple.
3602 return a (dd, di) tuple.
3602 - dd: extra bytes for the "data" file
3603 - dd: extra bytes for the "data" file
3603 - di: extra bytes for the "index" file
3604 - di: extra bytes for the "index" file
3604
3605
3605 A healthy revlog will return (0, 0).
3606 A healthy revlog will return (0, 0).
3606 """
3607 """
3607 expected = 0
3608 expected = 0
3608 if len(self):
3609 if len(self):
3609 expected = max(0, self.end(len(self) - 1))
3610 expected = max(0, self.end(len(self) - 1))
3610
3611
3611 try:
3612 try:
3612 with self._datafp() as f:
3613 with self._datafp() as f:
3613 f.seek(0, io.SEEK_END)
3614 f.seek(0, io.SEEK_END)
3614 actual = f.tell()
3615 actual = f.tell()
3615 dd = actual - expected
3616 dd = actual - expected
3616 except FileNotFoundError:
3617 except FileNotFoundError:
3617 dd = 0
3618 dd = 0
3618
3619
3619 try:
3620 try:
3620 f = self.opener(self._indexfile)
3621 f = self.opener(self._indexfile)
3621 f.seek(0, io.SEEK_END)
3622 f.seek(0, io.SEEK_END)
3622 actual = f.tell()
3623 actual = f.tell()
3623 f.close()
3624 f.close()
3624 s = self.index.entry_size
3625 s = self.index.entry_size
3625 i = max(0, actual // s)
3626 i = max(0, actual // s)
3626 di = actual - (i * s)
3627 di = actual - (i * s)
3627 if self._inline:
3628 if self._inline:
3628 databytes = 0
3629 databytes = 0
3629 for r in self:
3630 for r in self:
3630 databytes += max(0, self.length(r))
3631 databytes += max(0, self.length(r))
3631 dd = 0
3632 dd = 0
3632 di = actual - len(self) * s - databytes
3633 di = actual - len(self) * s - databytes
3633 except FileNotFoundError:
3634 except FileNotFoundError:
3634 di = 0
3635 di = 0
3635
3636
3636 return (dd, di)
3637 return (dd, di)
3637
3638
3638 def files(self):
3639 def files(self):
3639 """return list of files that compose this revlog"""
3640 """return list of files that compose this revlog"""
3640 res = [self._indexfile]
3641 res = [self._indexfile]
3641 if self._docket_file is None:
3642 if self._docket_file is None:
3642 if not self._inline:
3643 if not self._inline:
3643 res.append(self._datafile)
3644 res.append(self._datafile)
3644 else:
3645 else:
3645 res.append(self._docket_file)
3646 res.append(self._docket_file)
3646 res.extend(self._docket.old_index_filepaths(include_empty=False))
3647 res.extend(self._docket.old_index_filepaths(include_empty=False))
3647 if self._docket.data_end:
3648 if self._docket.data_end:
3648 res.append(self._datafile)
3649 res.append(self._datafile)
3649 res.extend(self._docket.old_data_filepaths(include_empty=False))
3650 res.extend(self._docket.old_data_filepaths(include_empty=False))
3650 if self._docket.sidedata_end:
3651 if self._docket.sidedata_end:
3651 res.append(self._sidedatafile)
3652 res.append(self._sidedatafile)
3652 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3653 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3653 return res
3654 return res
3654
3655
3655 def emitrevisions(
3656 def emitrevisions(
3656 self,
3657 self,
3657 nodes,
3658 nodes,
3658 nodesorder=None,
3659 nodesorder=None,
3659 revisiondata=False,
3660 revisiondata=False,
3660 assumehaveparentrevisions=False,
3661 assumehaveparentrevisions=False,
3661 deltamode=repository.CG_DELTAMODE_STD,
3662 deltamode=repository.CG_DELTAMODE_STD,
3662 sidedata_helpers=None,
3663 sidedata_helpers=None,
3663 debug_info=None,
3664 debug_info=None,
3664 ):
3665 ):
3665 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3666 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3666 raise error.ProgrammingError(
3667 raise error.ProgrammingError(
3667 b'unhandled value for nodesorder: %s' % nodesorder
3668 b'unhandled value for nodesorder: %s' % nodesorder
3668 )
3669 )
3669
3670
3670 if nodesorder is None and not self.delta_config.general_delta:
3671 if nodesorder is None and not self.delta_config.general_delta:
3671 nodesorder = b'storage'
3672 nodesorder = b'storage'
3672
3673
3673 if (
3674 if (
3674 not self._storedeltachains
3675 not self._storedeltachains
3675 and deltamode != repository.CG_DELTAMODE_PREV
3676 and deltamode != repository.CG_DELTAMODE_PREV
3676 ):
3677 ):
3677 deltamode = repository.CG_DELTAMODE_FULL
3678 deltamode = repository.CG_DELTAMODE_FULL
3678
3679
3679 return storageutil.emitrevisions(
3680 return storageutil.emitrevisions(
3680 self,
3681 self,
3681 nodes,
3682 nodes,
3682 nodesorder,
3683 nodesorder,
3683 revlogrevisiondelta,
3684 revlogrevisiondelta,
3684 deltaparentfn=self.deltaparent,
3685 deltaparentfn=self.deltaparent,
3685 candeltafn=self._candelta,
3686 candeltafn=self._candelta,
3686 rawsizefn=self.rawsize,
3687 rawsizefn=self.rawsize,
3687 revdifffn=self.revdiff,
3688 revdifffn=self.revdiff,
3688 flagsfn=self.flags,
3689 flagsfn=self.flags,
3689 deltamode=deltamode,
3690 deltamode=deltamode,
3690 revisiondata=revisiondata,
3691 revisiondata=revisiondata,
3691 assumehaveparentrevisions=assumehaveparentrevisions,
3692 assumehaveparentrevisions=assumehaveparentrevisions,
3692 sidedata_helpers=sidedata_helpers,
3693 sidedata_helpers=sidedata_helpers,
3693 debug_info=debug_info,
3694 debug_info=debug_info,
3694 )
3695 )
3695
3696
3696 DELTAREUSEALWAYS = b'always'
3697 DELTAREUSEALWAYS = b'always'
3697 DELTAREUSESAMEREVS = b'samerevs'
3698 DELTAREUSESAMEREVS = b'samerevs'
3698 DELTAREUSENEVER = b'never'
3699 DELTAREUSENEVER = b'never'
3699
3700
3700 DELTAREUSEFULLADD = b'fulladd'
3701 DELTAREUSEFULLADD = b'fulladd'
3701
3702
3702 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3703 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3703
3704
3704 def clone(
3705 def clone(
3705 self,
3706 self,
3706 tr,
3707 tr,
3707 destrevlog,
3708 destrevlog,
3708 addrevisioncb=None,
3709 addrevisioncb=None,
3709 deltareuse=DELTAREUSESAMEREVS,
3710 deltareuse=DELTAREUSESAMEREVS,
3710 forcedeltabothparents=None,
3711 forcedeltabothparents=None,
3711 sidedata_helpers=None,
3712 sidedata_helpers=None,
3712 ):
3713 ):
3713 """Copy this revlog to another, possibly with format changes.
3714 """Copy this revlog to another, possibly with format changes.
3714
3715
3715 The destination revlog will contain the same revisions and nodes.
3716 The destination revlog will contain the same revisions and nodes.
3716 However, it may not be bit-for-bit identical due to e.g. delta encoding
3717 However, it may not be bit-for-bit identical due to e.g. delta encoding
3717 differences.
3718 differences.
3718
3719
3719 The ``deltareuse`` argument control how deltas from the existing revlog
3720 The ``deltareuse`` argument control how deltas from the existing revlog
3720 are preserved in the destination revlog. The argument can have the
3721 are preserved in the destination revlog. The argument can have the
3721 following values:
3722 following values:
3722
3723
3723 DELTAREUSEALWAYS
3724 DELTAREUSEALWAYS
3724 Deltas will always be reused (if possible), even if the destination
3725 Deltas will always be reused (if possible), even if the destination
3725 revlog would not select the same revisions for the delta. This is the
3726 revlog would not select the same revisions for the delta. This is the
3726 fastest mode of operation.
3727 fastest mode of operation.
3727 DELTAREUSESAMEREVS
3728 DELTAREUSESAMEREVS
3728 Deltas will be reused if the destination revlog would pick the same
3729 Deltas will be reused if the destination revlog would pick the same
3729 revisions for the delta. This mode strikes a balance between speed
3730 revisions for the delta. This mode strikes a balance between speed
3730 and optimization.
3731 and optimization.
3731 DELTAREUSENEVER
3732 DELTAREUSENEVER
3732 Deltas will never be reused. This is the slowest mode of execution.
3733 Deltas will never be reused. This is the slowest mode of execution.
3733 This mode can be used to recompute deltas (e.g. if the diff/delta
3734 This mode can be used to recompute deltas (e.g. if the diff/delta
3734 algorithm changes).
3735 algorithm changes).
3735 DELTAREUSEFULLADD
3736 DELTAREUSEFULLADD
3736 Revision will be re-added as if their were new content. This is
3737 Revision will be re-added as if their were new content. This is
3737 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3738 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3738 eg: large file detection and handling.
3739 eg: large file detection and handling.
3739
3740
3740 Delta computation can be slow, so the choice of delta reuse policy can
3741 Delta computation can be slow, so the choice of delta reuse policy can
3741 significantly affect run time.
3742 significantly affect run time.
3742
3743
3743 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3744 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3744 two extremes. Deltas will be reused if they are appropriate. But if the
3745 two extremes. Deltas will be reused if they are appropriate. But if the
3745 delta could choose a better revision, it will do so. This means if you
3746 delta could choose a better revision, it will do so. This means if you
3746 are converting a non-generaldelta revlog to a generaldelta revlog,
3747 are converting a non-generaldelta revlog to a generaldelta revlog,
3747 deltas will be recomputed if the delta's parent isn't a parent of the
3748 deltas will be recomputed if the delta's parent isn't a parent of the
3748 revision.
3749 revision.
3749
3750
3750 In addition to the delta policy, the ``forcedeltabothparents``
3751 In addition to the delta policy, the ``forcedeltabothparents``
3751 argument controls whether to force compute deltas against both parents
3752 argument controls whether to force compute deltas against both parents
3752 for merges. By default, the current default is used.
3753 for merges. By default, the current default is used.
3753
3754
3754 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3755 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3755 `sidedata_helpers`.
3756 `sidedata_helpers`.
3756 """
3757 """
3757 if deltareuse not in self.DELTAREUSEALL:
3758 if deltareuse not in self.DELTAREUSEALL:
3758 raise ValueError(
3759 raise ValueError(
3759 _(b'value for deltareuse invalid: %s') % deltareuse
3760 _(b'value for deltareuse invalid: %s') % deltareuse
3760 )
3761 )
3761
3762
3762 if len(destrevlog):
3763 if len(destrevlog):
3763 raise ValueError(_(b'destination revlog is not empty'))
3764 raise ValueError(_(b'destination revlog is not empty'))
3764
3765
3765 if getattr(self, 'filteredrevs', None):
3766 if getattr(self, 'filteredrevs', None):
3766 raise ValueError(_(b'source revlog has filtered revisions'))
3767 raise ValueError(_(b'source revlog has filtered revisions'))
3767 if getattr(destrevlog, 'filteredrevs', None):
3768 if getattr(destrevlog, 'filteredrevs', None):
3768 raise ValueError(_(b'destination revlog has filtered revisions'))
3769 raise ValueError(_(b'destination revlog has filtered revisions'))
3769
3770
3770 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3771 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3771 # if possible.
3772 # if possible.
3772 old_delta_config = destrevlog.delta_config
3773 old_delta_config = destrevlog.delta_config
3773 destrevlog.delta_config = destrevlog.delta_config.copy()
3774 destrevlog.delta_config = destrevlog.delta_config.copy()
3774
3775
3775 try:
3776 try:
3776 if deltareuse == self.DELTAREUSEALWAYS:
3777 if deltareuse == self.DELTAREUSEALWAYS:
3777 destrevlog.delta_config.lazy_delta_base = True
3778 destrevlog.delta_config.lazy_delta_base = True
3778 destrevlog.delta_config.lazy_delta = True
3779 destrevlog.delta_config.lazy_delta = True
3779 elif deltareuse == self.DELTAREUSESAMEREVS:
3780 elif deltareuse == self.DELTAREUSESAMEREVS:
3780 destrevlog.delta_config.lazy_delta_base = False
3781 destrevlog.delta_config.lazy_delta_base = False
3781 destrevlog.delta_config.lazy_delta = True
3782 destrevlog.delta_config.lazy_delta = True
3782 elif deltareuse == self.DELTAREUSENEVER:
3783 elif deltareuse == self.DELTAREUSENEVER:
3783 destrevlog.delta_config.lazy_delta_base = False
3784 destrevlog.delta_config.lazy_delta_base = False
3784 destrevlog.delta_config.lazy_delta = False
3785 destrevlog.delta_config.lazy_delta = False
3785
3786
3786 delta_both_parents = (
3787 delta_both_parents = (
3787 forcedeltabothparents or old_delta_config.delta_both_parents
3788 forcedeltabothparents or old_delta_config.delta_both_parents
3788 )
3789 )
3789 destrevlog.delta_config.delta_both_parents = delta_both_parents
3790 destrevlog.delta_config.delta_both_parents = delta_both_parents
3790
3791
3791 with self.reading(), destrevlog._writing(tr):
3792 with self.reading(), destrevlog._writing(tr):
3792 self._clone(
3793 self._clone(
3793 tr,
3794 tr,
3794 destrevlog,
3795 destrevlog,
3795 addrevisioncb,
3796 addrevisioncb,
3796 deltareuse,
3797 deltareuse,
3797 forcedeltabothparents,
3798 forcedeltabothparents,
3798 sidedata_helpers,
3799 sidedata_helpers,
3799 )
3800 )
3800
3801
3801 finally:
3802 finally:
3802 destrevlog.delta_config = old_delta_config
3803 destrevlog.delta_config = old_delta_config
3803
3804
3804 def _clone(
3805 def _clone(
3805 self,
3806 self,
3806 tr,
3807 tr,
3807 destrevlog,
3808 destrevlog,
3808 addrevisioncb,
3809 addrevisioncb,
3809 deltareuse,
3810 deltareuse,
3810 forcedeltabothparents,
3811 forcedeltabothparents,
3811 sidedata_helpers,
3812 sidedata_helpers,
3812 ):
3813 ):
3813 """perform the core duty of `revlog.clone` after parameter processing"""
3814 """perform the core duty of `revlog.clone` after parameter processing"""
3814 write_debug = None
3815 write_debug = None
3815 if self.delta_config.debug_delta:
3816 if self.delta_config.debug_delta:
3816 write_debug = tr._report
3817 write_debug = tr._report
3817 deltacomputer = deltautil.deltacomputer(
3818 deltacomputer = deltautil.deltacomputer(
3818 destrevlog,
3819 destrevlog,
3819 write_debug=write_debug,
3820 write_debug=write_debug,
3820 )
3821 )
3821 index = self.index
3822 index = self.index
3822 for rev in self:
3823 for rev in self:
3823 entry = index[rev]
3824 entry = index[rev]
3824
3825
3825 # Some classes override linkrev to take filtered revs into
3826 # Some classes override linkrev to take filtered revs into
3826 # account. Use raw entry from index.
3827 # account. Use raw entry from index.
3827 flags = entry[0] & 0xFFFF
3828 flags = entry[0] & 0xFFFF
3828 linkrev = entry[4]
3829 linkrev = entry[4]
3829 p1 = index[entry[5]][7]
3830 p1 = index[entry[5]][7]
3830 p2 = index[entry[6]][7]
3831 p2 = index[entry[6]][7]
3831 node = entry[7]
3832 node = entry[7]
3832
3833
3833 # (Possibly) reuse the delta from the revlog if allowed and
3834 # (Possibly) reuse the delta from the revlog if allowed and
3834 # the revlog chunk is a delta.
3835 # the revlog chunk is a delta.
3835 cachedelta = None
3836 cachedelta = None
3836 rawtext = None
3837 rawtext = None
3837 if deltareuse == self.DELTAREUSEFULLADD:
3838 if deltareuse == self.DELTAREUSEFULLADD:
3838 text = self._revisiondata(rev)
3839 text = self._revisiondata(rev)
3839 sidedata = self.sidedata(rev)
3840 sidedata = self.sidedata(rev)
3840
3841
3841 if sidedata_helpers is not None:
3842 if sidedata_helpers is not None:
3842 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3843 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3843 self, sidedata_helpers, sidedata, rev
3844 self, sidedata_helpers, sidedata, rev
3844 )
3845 )
3845 flags = flags | new_flags[0] & ~new_flags[1]
3846 flags = flags | new_flags[0] & ~new_flags[1]
3846
3847
3847 destrevlog.addrevision(
3848 destrevlog.addrevision(
3848 text,
3849 text,
3849 tr,
3850 tr,
3850 linkrev,
3851 linkrev,
3851 p1,
3852 p1,
3852 p2,
3853 p2,
3853 cachedelta=cachedelta,
3854 cachedelta=cachedelta,
3854 node=node,
3855 node=node,
3855 flags=flags,
3856 flags=flags,
3856 deltacomputer=deltacomputer,
3857 deltacomputer=deltacomputer,
3857 sidedata=sidedata,
3858 sidedata=sidedata,
3858 )
3859 )
3859 else:
3860 else:
3860 if destrevlog.delta_config.lazy_delta:
3861 if destrevlog.delta_config.lazy_delta:
3861 dp = self.deltaparent(rev)
3862 dp = self.deltaparent(rev)
3862 if dp != nullrev:
3863 if dp != nullrev:
3863 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3864 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3864
3865
3865 sidedata = None
3866 sidedata = None
3866 if not cachedelta:
3867 if not cachedelta:
3867 try:
3868 try:
3868 rawtext = self._revisiondata(rev)
3869 rawtext = self._revisiondata(rev)
3869 except error.CensoredNodeError as censored:
3870 except error.CensoredNodeError as censored:
3870 assert flags & REVIDX_ISCENSORED
3871 assert flags & REVIDX_ISCENSORED
3871 rawtext = censored.tombstone
3872 rawtext = censored.tombstone
3872 sidedata = self.sidedata(rev)
3873 sidedata = self.sidedata(rev)
3873 if sidedata is None:
3874 if sidedata is None:
3874 sidedata = self.sidedata(rev)
3875 sidedata = self.sidedata(rev)
3875
3876
3876 if sidedata_helpers is not None:
3877 if sidedata_helpers is not None:
3877 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3878 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3878 self, sidedata_helpers, sidedata, rev
3879 self, sidedata_helpers, sidedata, rev
3879 )
3880 )
3880 flags = flags | new_flags[0] & ~new_flags[1]
3881 flags = flags | new_flags[0] & ~new_flags[1]
3881
3882
3882 destrevlog._addrevision(
3883 destrevlog._addrevision(
3883 node,
3884 node,
3884 rawtext,
3885 rawtext,
3885 tr,
3886 tr,
3886 linkrev,
3887 linkrev,
3887 p1,
3888 p1,
3888 p2,
3889 p2,
3889 flags,
3890 flags,
3890 cachedelta,
3891 cachedelta,
3891 deltacomputer=deltacomputer,
3892 deltacomputer=deltacomputer,
3892 sidedata=sidedata,
3893 sidedata=sidedata,
3893 )
3894 )
3894
3895
3895 if addrevisioncb:
3896 if addrevisioncb:
3896 addrevisioncb(self, rev, node)
3897 addrevisioncb(self, rev, node)
3897
3898
3898 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3899 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3899 if self._format_version == REVLOGV0:
3900 if self._format_version == REVLOGV0:
3900 raise error.RevlogError(
3901 raise error.RevlogError(
3901 _(b'cannot censor with version %d revlogs')
3902 _(b'cannot censor with version %d revlogs')
3902 % self._format_version
3903 % self._format_version
3903 )
3904 )
3904 elif self._format_version == REVLOGV1:
3905 elif self._format_version == REVLOGV1:
3905 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3906 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3906 else:
3907 else:
3907 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3908 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3908
3909
3909 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3910 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3910 """Verifies the integrity of the revlog.
3911 """Verifies the integrity of the revlog.
3911
3912
3912 Yields ``revlogproblem`` instances describing problems that are
3913 Yields ``revlogproblem`` instances describing problems that are
3913 found.
3914 found.
3914 """
3915 """
3915 dd, di = self.checksize()
3916 dd, di = self.checksize()
3916 if dd:
3917 if dd:
3917 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3918 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3918 if di:
3919 if di:
3919 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3920 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3920
3921
3921 version = self._format_version
3922 version = self._format_version
3922
3923
3923 # The verifier tells us what version revlog we should be.
3924 # The verifier tells us what version revlog we should be.
3924 if version != state[b'expectedversion']:
3925 if version != state[b'expectedversion']:
3925 yield revlogproblem(
3926 yield revlogproblem(
3926 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3927 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3927 % (self.display_id, version, state[b'expectedversion'])
3928 % (self.display_id, version, state[b'expectedversion'])
3928 )
3929 )
3929
3930
3930 state[b'skipread'] = set()
3931 state[b'skipread'] = set()
3931 state[b'safe_renamed'] = set()
3932 state[b'safe_renamed'] = set()
3932
3933
3933 for rev in self:
3934 for rev in self:
3934 node = self.node(rev)
3935 node = self.node(rev)
3935
3936
3936 # Verify contents. 4 cases to care about:
3937 # Verify contents. 4 cases to care about:
3937 #
3938 #
3938 # common: the most common case
3939 # common: the most common case
3939 # rename: with a rename
3940 # rename: with a rename
3940 # meta: file content starts with b'\1\n', the metadata
3941 # meta: file content starts with b'\1\n', the metadata
3941 # header defined in filelog.py, but without a rename
3942 # header defined in filelog.py, but without a rename
3942 # ext: content stored externally
3943 # ext: content stored externally
3943 #
3944 #
3944 # More formally, their differences are shown below:
3945 # More formally, their differences are shown below:
3945 #
3946 #
3946 # | common | rename | meta | ext
3947 # | common | rename | meta | ext
3947 # -------------------------------------------------------
3948 # -------------------------------------------------------
3948 # flags() | 0 | 0 | 0 | not 0
3949 # flags() | 0 | 0 | 0 | not 0
3949 # renamed() | False | True | False | ?
3950 # renamed() | False | True | False | ?
3950 # rawtext[0:2]=='\1\n'| False | True | True | ?
3951 # rawtext[0:2]=='\1\n'| False | True | True | ?
3951 #
3952 #
3952 # "rawtext" means the raw text stored in revlog data, which
3953 # "rawtext" means the raw text stored in revlog data, which
3953 # could be retrieved by "rawdata(rev)". "text"
3954 # could be retrieved by "rawdata(rev)". "text"
3954 # mentioned below is "revision(rev)".
3955 # mentioned below is "revision(rev)".
3955 #
3956 #
3956 # There are 3 different lengths stored physically:
3957 # There are 3 different lengths stored physically:
3957 # 1. L1: rawsize, stored in revlog index
3958 # 1. L1: rawsize, stored in revlog index
3958 # 2. L2: len(rawtext), stored in revlog data
3959 # 2. L2: len(rawtext), stored in revlog data
3959 # 3. L3: len(text), stored in revlog data if flags==0, or
3960 # 3. L3: len(text), stored in revlog data if flags==0, or
3960 # possibly somewhere else if flags!=0
3961 # possibly somewhere else if flags!=0
3961 #
3962 #
3962 # L1 should be equal to L2. L3 could be different from them.
3963 # L1 should be equal to L2. L3 could be different from them.
3963 # "text" may or may not affect commit hash depending on flag
3964 # "text" may or may not affect commit hash depending on flag
3964 # processors (see flagutil.addflagprocessor).
3965 # processors (see flagutil.addflagprocessor).
3965 #
3966 #
3966 # | common | rename | meta | ext
3967 # | common | rename | meta | ext
3967 # -------------------------------------------------
3968 # -------------------------------------------------
3968 # rawsize() | L1 | L1 | L1 | L1
3969 # rawsize() | L1 | L1 | L1 | L1
3969 # size() | L1 | L2-LM | L1(*) | L1 (?)
3970 # size() | L1 | L2-LM | L1(*) | L1 (?)
3970 # len(rawtext) | L2 | L2 | L2 | L2
3971 # len(rawtext) | L2 | L2 | L2 | L2
3971 # len(text) | L2 | L2 | L2 | L3
3972 # len(text) | L2 | L2 | L2 | L3
3972 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3973 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3973 #
3974 #
3974 # LM: length of metadata, depending on rawtext
3975 # LM: length of metadata, depending on rawtext
3975 # (*): not ideal, see comment in filelog.size
3976 # (*): not ideal, see comment in filelog.size
3976 # (?): could be "- len(meta)" if the resolved content has
3977 # (?): could be "- len(meta)" if the resolved content has
3977 # rename metadata
3978 # rename metadata
3978 #
3979 #
3979 # Checks needed to be done:
3980 # Checks needed to be done:
3980 # 1. length check: L1 == L2, in all cases.
3981 # 1. length check: L1 == L2, in all cases.
3981 # 2. hash check: depending on flag processor, we may need to
3982 # 2. hash check: depending on flag processor, we may need to
3982 # use either "text" (external), or "rawtext" (in revlog).
3983 # use either "text" (external), or "rawtext" (in revlog).
3983
3984
3984 try:
3985 try:
3985 skipflags = state.get(b'skipflags', 0)
3986 skipflags = state.get(b'skipflags', 0)
3986 if skipflags:
3987 if skipflags:
3987 skipflags &= self.flags(rev)
3988 skipflags &= self.flags(rev)
3988
3989
3989 _verify_revision(self, skipflags, state, node)
3990 _verify_revision(self, skipflags, state, node)
3990
3991
3991 l1 = self.rawsize(rev)
3992 l1 = self.rawsize(rev)
3992 l2 = len(self.rawdata(node))
3993 l2 = len(self.rawdata(node))
3993
3994
3994 if l1 != l2:
3995 if l1 != l2:
3995 yield revlogproblem(
3996 yield revlogproblem(
3996 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3997 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3997 node=node,
3998 node=node,
3998 )
3999 )
3999
4000
4000 except error.CensoredNodeError:
4001 except error.CensoredNodeError:
4001 if state[b'erroroncensored']:
4002 if state[b'erroroncensored']:
4002 yield revlogproblem(
4003 yield revlogproblem(
4003 error=_(b'censored file data'), node=node
4004 error=_(b'censored file data'), node=node
4004 )
4005 )
4005 state[b'skipread'].add(node)
4006 state[b'skipread'].add(node)
4006 except Exception as e:
4007 except Exception as e:
4007 yield revlogproblem(
4008 yield revlogproblem(
4008 error=_(b'unpacking %s: %s')
4009 error=_(b'unpacking %s: %s')
4009 % (short(node), stringutil.forcebytestr(e)),
4010 % (short(node), stringutil.forcebytestr(e)),
4010 node=node,
4011 node=node,
4011 )
4012 )
4012 state[b'skipread'].add(node)
4013 state[b'skipread'].add(node)
4013
4014
4014 def storageinfo(
4015 def storageinfo(
4015 self,
4016 self,
4016 exclusivefiles=False,
4017 exclusivefiles=False,
4017 sharedfiles=False,
4018 sharedfiles=False,
4018 revisionscount=False,
4019 revisionscount=False,
4019 trackedsize=False,
4020 trackedsize=False,
4020 storedsize=False,
4021 storedsize=False,
4021 ):
4022 ):
4022 d = {}
4023 d = {}
4023
4024
4024 if exclusivefiles:
4025 if exclusivefiles:
4025 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4026 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4026 if not self._inline:
4027 if not self._inline:
4027 d[b'exclusivefiles'].append((self.opener, self._datafile))
4028 d[b'exclusivefiles'].append((self.opener, self._datafile))
4028
4029
4029 if sharedfiles:
4030 if sharedfiles:
4030 d[b'sharedfiles'] = []
4031 d[b'sharedfiles'] = []
4031
4032
4032 if revisionscount:
4033 if revisionscount:
4033 d[b'revisionscount'] = len(self)
4034 d[b'revisionscount'] = len(self)
4034
4035
4035 if trackedsize:
4036 if trackedsize:
4036 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4037 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4037
4038
4038 if storedsize:
4039 if storedsize:
4039 d[b'storedsize'] = sum(
4040 d[b'storedsize'] = sum(
4040 self.opener.stat(path).st_size for path in self.files()
4041 self.opener.stat(path).st_size for path in self.files()
4041 )
4042 )
4042
4043
4043 return d
4044 return d
4044
4045
4045 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4046 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4046 if not self.feature_config.has_side_data:
4047 if not self.feature_config.has_side_data:
4047 return
4048 return
4048 # revlog formats with sidedata support does not support inline
4049 # revlog formats with sidedata support does not support inline
4049 assert not self._inline
4050 assert not self._inline
4050 if not helpers[1] and not helpers[2]:
4051 if not helpers[1] and not helpers[2]:
4051 # Nothing to generate or remove
4052 # Nothing to generate or remove
4052 return
4053 return
4053
4054
4054 new_entries = []
4055 new_entries = []
4055 # append the new sidedata
4056 # append the new sidedata
4056 with self._writing(transaction):
4057 with self._writing(transaction):
4057 ifh, dfh, sdfh = self._inner._writinghandles
4058 ifh, dfh, sdfh = self._inner._writinghandles
4058 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4059 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4059
4060
4060 current_offset = sdfh.tell()
4061 current_offset = sdfh.tell()
4061 for rev in range(startrev, endrev + 1):
4062 for rev in range(startrev, endrev + 1):
4062 entry = self.index[rev]
4063 entry = self.index[rev]
4063 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4064 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4064 store=self,
4065 store=self,
4065 sidedata_helpers=helpers,
4066 sidedata_helpers=helpers,
4066 sidedata={},
4067 sidedata={},
4067 rev=rev,
4068 rev=rev,
4068 )
4069 )
4069
4070
4070 serialized_sidedata = sidedatautil.serialize_sidedata(
4071 serialized_sidedata = sidedatautil.serialize_sidedata(
4071 new_sidedata
4072 new_sidedata
4072 )
4073 )
4073
4074
4074 sidedata_compression_mode = COMP_MODE_INLINE
4075 sidedata_compression_mode = COMP_MODE_INLINE
4075 if serialized_sidedata and self.feature_config.has_side_data:
4076 if serialized_sidedata and self.feature_config.has_side_data:
4076 sidedata_compression_mode = COMP_MODE_PLAIN
4077 sidedata_compression_mode = COMP_MODE_PLAIN
4077 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4078 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4078 if (
4079 if (
4079 h != b'u'
4080 h != b'u'
4080 and comp_sidedata[0] != b'\0'
4081 and comp_sidedata[0] != b'\0'
4081 and len(comp_sidedata) < len(serialized_sidedata)
4082 and len(comp_sidedata) < len(serialized_sidedata)
4082 ):
4083 ):
4083 assert not h
4084 assert not h
4084 if (
4085 if (
4085 comp_sidedata[0]
4086 comp_sidedata[0]
4086 == self._docket.default_compression_header
4087 == self._docket.default_compression_header
4087 ):
4088 ):
4088 sidedata_compression_mode = COMP_MODE_DEFAULT
4089 sidedata_compression_mode = COMP_MODE_DEFAULT
4089 serialized_sidedata = comp_sidedata
4090 serialized_sidedata = comp_sidedata
4090 else:
4091 else:
4091 sidedata_compression_mode = COMP_MODE_INLINE
4092 sidedata_compression_mode = COMP_MODE_INLINE
4092 serialized_sidedata = comp_sidedata
4093 serialized_sidedata = comp_sidedata
4093 if entry[8] != 0 or entry[9] != 0:
4094 if entry[8] != 0 or entry[9] != 0:
4094 # rewriting entries that already have sidedata is not
4095 # rewriting entries that already have sidedata is not
4095 # supported yet, because it introduces garbage data in the
4096 # supported yet, because it introduces garbage data in the
4096 # revlog.
4097 # revlog.
4097 msg = b"rewriting existing sidedata is not supported yet"
4098 msg = b"rewriting existing sidedata is not supported yet"
4098 raise error.Abort(msg)
4099 raise error.Abort(msg)
4099
4100
4100 # Apply (potential) flags to add and to remove after running
4101 # Apply (potential) flags to add and to remove after running
4101 # the sidedata helpers
4102 # the sidedata helpers
4102 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4103 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4103 entry_update = (
4104 entry_update = (
4104 current_offset,
4105 current_offset,
4105 len(serialized_sidedata),
4106 len(serialized_sidedata),
4106 new_offset_flags,
4107 new_offset_flags,
4107 sidedata_compression_mode,
4108 sidedata_compression_mode,
4108 )
4109 )
4109
4110
4110 # the sidedata computation might have move the file cursors around
4111 # the sidedata computation might have move the file cursors around
4111 sdfh.seek(current_offset, os.SEEK_SET)
4112 sdfh.seek(current_offset, os.SEEK_SET)
4112 sdfh.write(serialized_sidedata)
4113 sdfh.write(serialized_sidedata)
4113 new_entries.append(entry_update)
4114 new_entries.append(entry_update)
4114 current_offset += len(serialized_sidedata)
4115 current_offset += len(serialized_sidedata)
4115 self._docket.sidedata_end = sdfh.tell()
4116 self._docket.sidedata_end = sdfh.tell()
4116
4117
4117 # rewrite the new index entries
4118 # rewrite the new index entries
4118 ifh.seek(startrev * self.index.entry_size)
4119 ifh.seek(startrev * self.index.entry_size)
4119 for i, e in enumerate(new_entries):
4120 for i, e in enumerate(new_entries):
4120 rev = startrev + i
4121 rev = startrev + i
4121 self.index.replace_sidedata_info(rev, *e)
4122 self.index.replace_sidedata_info(rev, *e)
4122 packed = self.index.entry_binary(rev)
4123 packed = self.index.entry_binary(rev)
4123 if rev == 0 and self._docket is None:
4124 if rev == 0 and self._docket is None:
4124 header = self._format_flags | self._format_version
4125 header = self._format_flags | self._format_version
4125 header = self.index.pack_header(header)
4126 header = self.index.pack_header(header)
4126 packed = header + packed
4127 packed = header + packed
4127 ifh.write(packed)
4128 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now