##// END OF EJS Templates
rust-index: add fast-path for getting a list of all heads as nodes...
Raphaël Gomès -
r52155:f20c4b30 default
parent child Browse files
Show More
@@ -1,482 +1,486 b''
1 # repoview.py - Filtered view of a localrepo object
1 # repoview.py - Filtered view of a localrepo object
2 #
2 #
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Logilab SA <contact@logilab.fr>
4 # Logilab SA <contact@logilab.fr>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 import copy
10 import copy
11 import weakref
11 import weakref
12
12
13 from .i18n import _
13 from .i18n import _
14 from .node import (
14 from .node import (
15 hex,
15 hex,
16 nullrev,
16 nullrev,
17 )
17 )
18 from . import (
18 from . import (
19 error,
19 error,
20 obsolete,
20 obsolete,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 tags as tagsmod,
23 tags as tagsmod,
24 util,
24 util,
25 )
25 )
26 from .utils import repoviewutil
26 from .utils import repoviewutil
27
27
28
28
29 def hideablerevs(repo):
29 def hideablerevs(repo):
30 """Revision candidates to be hidden
30 """Revision candidates to be hidden
31
31
32 This is a standalone function to allow extensions to wrap it.
32 This is a standalone function to allow extensions to wrap it.
33
33
34 Because we use the set of immutable changesets as a fallback subset in
34 Because we use the set of immutable changesets as a fallback subset in
35 branchmap (see mercurial.utils.repoviewutils.subsettable), you cannot set
35 branchmap (see mercurial.utils.repoviewutils.subsettable), you cannot set
36 "public" changesets as "hideable". Doing so would break multiple code
36 "public" changesets as "hideable". Doing so would break multiple code
37 assertions and lead to crashes."""
37 assertions and lead to crashes."""
38 obsoletes = obsolete.getrevs(repo, b'obsolete')
38 obsoletes = obsolete.getrevs(repo, b'obsolete')
39 internals = repo._phasecache.getrevset(repo, phases.localhiddenphases)
39 internals = repo._phasecache.getrevset(repo, phases.localhiddenphases)
40 internals = frozenset(internals)
40 internals = frozenset(internals)
41 return obsoletes | internals
41 return obsoletes | internals
42
42
43
43
44 def pinnedrevs(repo):
44 def pinnedrevs(repo):
45 """revisions blocking hidden changesets from being filtered"""
45 """revisions blocking hidden changesets from being filtered"""
46
46
47 cl = repo.changelog
47 cl = repo.changelog
48 pinned = set()
48 pinned = set()
49 pinned.update([par.rev() for par in repo[None].parents()])
49 pinned.update([par.rev() for par in repo[None].parents()])
50 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
50 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
51
51
52 tags = {}
52 tags = {}
53 tagsmod.readlocaltags(repo.ui, repo, tags, {})
53 tagsmod.readlocaltags(repo.ui, repo, tags, {})
54 if tags:
54 if tags:
55 rev = cl.index.get_rev
55 rev = cl.index.get_rev
56 pinned.update(rev(t[0]) for t in tags.values())
56 pinned.update(rev(t[0]) for t in tags.values())
57 pinned.discard(None)
57 pinned.discard(None)
58
58
59 # Avoid cycle: mercurial.filemerge -> mercurial.templater ->
59 # Avoid cycle: mercurial.filemerge -> mercurial.templater ->
60 # mercurial.templatefuncs -> mercurial.revset -> mercurial.repoview ->
60 # mercurial.templatefuncs -> mercurial.revset -> mercurial.repoview ->
61 # mercurial.mergestate -> mercurial.filemerge
61 # mercurial.mergestate -> mercurial.filemerge
62 from . import mergestate
62 from . import mergestate
63
63
64 ms = mergestate.mergestate.read(repo)
64 ms = mergestate.mergestate.read(repo)
65 if ms.active() and ms.unresolvedcount():
65 if ms.active() and ms.unresolvedcount():
66 for node in (ms.local, ms.other):
66 for node in (ms.local, ms.other):
67 rev = cl.index.get_rev(node)
67 rev = cl.index.get_rev(node)
68 if rev is not None:
68 if rev is not None:
69 pinned.add(rev)
69 pinned.add(rev)
70
70
71 return pinned
71 return pinned
72
72
73
73
74 def _revealancestors(pfunc, hidden, revs):
74 def _revealancestors(pfunc, hidden, revs):
75 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
75 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
76 from 'hidden'
76 from 'hidden'
77
77
78 - pfunc(r): a funtion returning parent of 'r',
78 - pfunc(r): a funtion returning parent of 'r',
79 - hidden: the (preliminary) hidden revisions, to be updated
79 - hidden: the (preliminary) hidden revisions, to be updated
80 - revs: iterable of revnum,
80 - revs: iterable of revnum,
81
81
82 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
82 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
83 *not* revealed)
83 *not* revealed)
84 """
84 """
85 stack = list(revs)
85 stack = list(revs)
86 while stack:
86 while stack:
87 for p in pfunc(stack.pop()):
87 for p in pfunc(stack.pop()):
88 if p != nullrev and p in hidden:
88 if p != nullrev and p in hidden:
89 hidden.remove(p)
89 hidden.remove(p)
90 stack.append(p)
90 stack.append(p)
91
91
92
92
93 def computehidden(repo, visibilityexceptions=None):
93 def computehidden(repo, visibilityexceptions=None):
94 """compute the set of hidden revision to filter
94 """compute the set of hidden revision to filter
95
95
96 During most operation hidden should be filtered."""
96 During most operation hidden should be filtered."""
97 assert not repo.changelog.filteredrevs
97 assert not repo.changelog.filteredrevs
98
98
99 hidden = hideablerevs(repo)
99 hidden = hideablerevs(repo)
100 if hidden:
100 if hidden:
101 hidden = set(hidden - pinnedrevs(repo))
101 hidden = set(hidden - pinnedrevs(repo))
102 if visibilityexceptions:
102 if visibilityexceptions:
103 hidden -= visibilityexceptions
103 hidden -= visibilityexceptions
104 pfunc = repo.changelog.parentrevs
104 pfunc = repo.changelog.parentrevs
105 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
105 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
106
106
107 visible = mutable - hidden
107 visible = mutable - hidden
108 _revealancestors(pfunc, hidden, visible)
108 _revealancestors(pfunc, hidden, visible)
109 return frozenset(hidden)
109 return frozenset(hidden)
110
110
111
111
112 def computesecret(repo, visibilityexceptions=None):
112 def computesecret(repo, visibilityexceptions=None):
113 """compute the set of revision that can never be exposed through hgweb
113 """compute the set of revision that can never be exposed through hgweb
114
114
115 Changeset in the secret phase (or above) should stay unaccessible."""
115 Changeset in the secret phase (or above) should stay unaccessible."""
116 assert not repo.changelog.filteredrevs
116 assert not repo.changelog.filteredrevs
117 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
117 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
118 return frozenset(secrets)
118 return frozenset(secrets)
119
119
120
120
121 def computeunserved(repo, visibilityexceptions=None):
121 def computeunserved(repo, visibilityexceptions=None):
122 """compute the set of revision that should be filtered when used a server
122 """compute the set of revision that should be filtered when used a server
123
123
124 Secret and hidden changeset should not pretend to be here."""
124 Secret and hidden changeset should not pretend to be here."""
125 assert not repo.changelog.filteredrevs
125 assert not repo.changelog.filteredrevs
126 # fast path in simple case to avoid impact of non optimised code
126 # fast path in simple case to avoid impact of non optimised code
127 hiddens = filterrevs(repo, b'visible')
127 hiddens = filterrevs(repo, b'visible')
128 secrets = filterrevs(repo, b'served.hidden')
128 secrets = filterrevs(repo, b'served.hidden')
129 if secrets:
129 if secrets:
130 return frozenset(hiddens | secrets)
130 return frozenset(hiddens | secrets)
131 else:
131 else:
132 return hiddens
132 return hiddens
133
133
134
134
135 def computemutable(repo, visibilityexceptions=None):
135 def computemutable(repo, visibilityexceptions=None):
136 assert not repo.changelog.filteredrevs
136 assert not repo.changelog.filteredrevs
137 # fast check to avoid revset call on huge repo
137 # fast check to avoid revset call on huge repo
138 if repo._phasecache.hasnonpublicphases(repo):
138 if repo._phasecache.hasnonpublicphases(repo):
139 return frozenset(repo._phasecache.getrevset(repo, phases.mutablephases))
139 return frozenset(repo._phasecache.getrevset(repo, phases.mutablephases))
140 return frozenset()
140 return frozenset()
141
141
142
142
143 def computeimpactable(repo, visibilityexceptions=None):
143 def computeimpactable(repo, visibilityexceptions=None):
144 """Everything impactable by mutable revision
144 """Everything impactable by mutable revision
145
145
146 The immutable filter still have some chance to get invalidated. This will
146 The immutable filter still have some chance to get invalidated. This will
147 happen when:
147 happen when:
148
148
149 - you garbage collect hidden changeset,
149 - you garbage collect hidden changeset,
150 - public phase is moved backward,
150 - public phase is moved backward,
151 - something is changed in the filtering (this could be fixed)
151 - something is changed in the filtering (this could be fixed)
152
152
153 This filter out any mutable changeset and any public changeset that may be
153 This filter out any mutable changeset and any public changeset that may be
154 impacted by something happening to a mutable revision.
154 impacted by something happening to a mutable revision.
155
155
156 This is achieved by filtered everything with a revision number equal or
156 This is achieved by filtered everything with a revision number equal or
157 higher than the first mutable changeset is filtered."""
157 higher than the first mutable changeset is filtered."""
158 assert not repo.changelog.filteredrevs
158 assert not repo.changelog.filteredrevs
159 cl = repo.changelog
159 cl = repo.changelog
160 firstmutable = len(cl)
160 firstmutable = len(cl)
161 roots = repo._phasecache.nonpublicphaseroots(repo)
161 roots = repo._phasecache.nonpublicphaseroots(repo)
162 if roots:
162 if roots:
163 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
163 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
164 # protect from nullrev root
164 # protect from nullrev root
165 firstmutable = max(0, firstmutable)
165 firstmutable = max(0, firstmutable)
166 return frozenset(range(firstmutable, len(cl)))
166 return frozenset(range(firstmutable, len(cl)))
167
167
168
168
169 # function to compute filtered set
169 # function to compute filtered set
170 #
170 #
171 # When adding a new filter you MUST update the table at:
171 # When adding a new filter you MUST update the table at:
172 # mercurial.utils.repoviewutil.subsettable
172 # mercurial.utils.repoviewutil.subsettable
173 # Otherwise your filter will have to recompute all its branches cache
173 # Otherwise your filter will have to recompute all its branches cache
174 # from scratch (very slow).
174 # from scratch (very slow).
175 filtertable = {
175 filtertable = {
176 b'visible': computehidden,
176 b'visible': computehidden,
177 b'visible-hidden': computehidden,
177 b'visible-hidden': computehidden,
178 b'served.hidden': computesecret,
178 b'served.hidden': computesecret,
179 b'served': computeunserved,
179 b'served': computeunserved,
180 b'immutable': computemutable,
180 b'immutable': computemutable,
181 b'base': computeimpactable,
181 b'base': computeimpactable,
182 }
182 }
183
183
184 # set of filter level that will include the working copy parent no matter what.
184 # set of filter level that will include the working copy parent no matter what.
185 filter_has_wc = {b'visible', b'visible-hidden'}
185 filter_has_wc = {b'visible', b'visible-hidden'}
186
186
187 _basefiltername = list(filtertable)
187 _basefiltername = list(filtertable)
188
188
189
189
190 def extrafilter(ui):
190 def extrafilter(ui):
191 """initialize extra filter and return its id
191 """initialize extra filter and return its id
192
192
193 If extra filtering is configured, we make sure the associated filtered view
193 If extra filtering is configured, we make sure the associated filtered view
194 are declared and return the associated id.
194 are declared and return the associated id.
195 """
195 """
196 frevs = ui.config(b'experimental', b'extra-filter-revs')
196 frevs = ui.config(b'experimental', b'extra-filter-revs')
197 if frevs is None:
197 if frevs is None:
198 return None
198 return None
199
199
200 fid = pycompat.sysbytes(util.DIGESTS[b'sha1'](frevs).hexdigest())[:12]
200 fid = pycompat.sysbytes(util.DIGESTS[b'sha1'](frevs).hexdigest())[:12]
201
201
202 combine = lambda fname: fname + b'%' + fid
202 combine = lambda fname: fname + b'%' + fid
203
203
204 subsettable = repoviewutil.subsettable
204 subsettable = repoviewutil.subsettable
205
205
206 if combine(b'base') not in filtertable:
206 if combine(b'base') not in filtertable:
207 for base_name in _basefiltername:
207 for base_name in _basefiltername:
208
208
209 def extrafilteredrevs(repo, *args, name=base_name, **kwargs):
209 def extrafilteredrevs(repo, *args, name=base_name, **kwargs):
210 baserevs = filtertable[name](repo, *args, **kwargs)
210 baserevs = filtertable[name](repo, *args, **kwargs)
211 extrarevs = frozenset(repo.revs(frevs))
211 extrarevs = frozenset(repo.revs(frevs))
212 return baserevs | extrarevs
212 return baserevs | extrarevs
213
213
214 filtertable[combine(base_name)] = extrafilteredrevs
214 filtertable[combine(base_name)] = extrafilteredrevs
215 if base_name in subsettable:
215 if base_name in subsettable:
216 subsettable[combine(base_name)] = combine(
216 subsettable[combine(base_name)] = combine(
217 subsettable[base_name]
217 subsettable[base_name]
218 )
218 )
219 return fid
219 return fid
220
220
221
221
222 def filterrevs(repo, filtername, visibilityexceptions=None):
222 def filterrevs(repo, filtername, visibilityexceptions=None):
223 """returns set of filtered revision for this filter name
223 """returns set of filtered revision for this filter name
224
224
225 visibilityexceptions is a set of revs which must are exceptions for
225 visibilityexceptions is a set of revs which must are exceptions for
226 hidden-state and must be visible. They are dynamic and hence we should not
226 hidden-state and must be visible. They are dynamic and hence we should not
227 cache it's result"""
227 cache it's result"""
228 if filtername not in repo.filteredrevcache:
228 if filtername not in repo.filteredrevcache:
229 if repo.ui.configbool(b'devel', b'debug.repo-filters'):
229 if repo.ui.configbool(b'devel', b'debug.repo-filters'):
230 msg = b'computing revision filter for "%s"'
230 msg = b'computing revision filter for "%s"'
231 msg %= filtername
231 msg %= filtername
232 if repo.ui.tracebackflag and repo.ui.debugflag:
232 if repo.ui.tracebackflag and repo.ui.debugflag:
233 # XXX use ui.write_err
233 # XXX use ui.write_err
234 util.debugstacktrace(
234 util.debugstacktrace(
235 msg,
235 msg,
236 f=repo.ui._fout,
236 f=repo.ui._fout,
237 otherf=repo.ui._ferr,
237 otherf=repo.ui._ferr,
238 prefix=b'debug.filters: ',
238 prefix=b'debug.filters: ',
239 )
239 )
240 else:
240 else:
241 repo.ui.debug(b'debug.filters: %s\n' % msg)
241 repo.ui.debug(b'debug.filters: %s\n' % msg)
242 func = filtertable[filtername]
242 func = filtertable[filtername]
243 if visibilityexceptions:
243 if visibilityexceptions:
244 return func(repo.unfiltered, visibilityexceptions)
244 return func(repo.unfiltered, visibilityexceptions)
245 repo.filteredrevcache[filtername] = func(repo.unfiltered())
245 repo.filteredrevcache[filtername] = func(repo.unfiltered())
246 return repo.filteredrevcache[filtername]
246 return repo.filteredrevcache[filtername]
247
247
248
248
249 def wrapchangelog(unfichangelog, filteredrevs):
249 def wrapchangelog(unfichangelog, filteredrevs):
250 cl = copy.copy(unfichangelog)
250 cl = copy.copy(unfichangelog)
251 cl.filteredrevs = filteredrevs
251 cl.filteredrevs = filteredrevs
252
252
253 class filteredchangelog(filteredchangelogmixin, cl.__class__):
253 class filteredchangelog(filteredchangelogmixin, cl.__class__):
254 pass
254 pass
255
255
256 cl.__class__ = filteredchangelog
256 cl.__class__ = filteredchangelog
257
257
258 return cl
258 return cl
259
259
260
260
261 class filteredchangelogmixin:
261 class filteredchangelogmixin:
262 def tiprev(self):
262 def tiprev(self):
263 """filtered version of revlog.tiprev"""
263 """filtered version of revlog.tiprev"""
264 for i in range(len(self) - 1, -2, -1):
264 for i in range(len(self) - 1, -2, -1):
265 if i not in self.filteredrevs:
265 if i not in self.filteredrevs:
266 return i
266 return i
267
267
268 def __contains__(self, rev):
268 def __contains__(self, rev):
269 """filtered version of revlog.__contains__"""
269 """filtered version of revlog.__contains__"""
270 return 0 <= rev < len(self) and rev not in self.filteredrevs
270 return 0 <= rev < len(self) and rev not in self.filteredrevs
271
271
272 def __iter__(self):
272 def __iter__(self):
273 """filtered version of revlog.__iter__"""
273 """filtered version of revlog.__iter__"""
274
274
275 def filterediter():
275 def filterediter():
276 for i in range(len(self)):
276 for i in range(len(self)):
277 if i not in self.filteredrevs:
277 if i not in self.filteredrevs:
278 yield i
278 yield i
279
279
280 return filterediter()
280 return filterediter()
281
281
282 def revs(self, start=0, stop=None):
282 def revs(self, start=0, stop=None):
283 """filtered version of revlog.revs"""
283 """filtered version of revlog.revs"""
284 for i in super(filteredchangelogmixin, self).revs(start, stop):
284 for i in super(filteredchangelogmixin, self).revs(start, stop):
285 if i not in self.filteredrevs:
285 if i not in self.filteredrevs:
286 yield i
286 yield i
287
287
288 def _checknofilteredinrevs(self, revs):
288 def _checknofilteredinrevs(self, revs):
289 """raise the appropriate error if 'revs' contains a filtered revision
289 """raise the appropriate error if 'revs' contains a filtered revision
290
290
291 This returns a version of 'revs' to be used thereafter by the caller.
291 This returns a version of 'revs' to be used thereafter by the caller.
292 In particular, if revs is an iterator, it is converted into a set.
292 In particular, if revs is an iterator, it is converted into a set.
293 """
293 """
294 if hasattr(revs, '__next__'):
294 if hasattr(revs, '__next__'):
295 # Note that inspect.isgenerator() is not true for iterators,
295 # Note that inspect.isgenerator() is not true for iterators,
296 revs = set(revs)
296 revs = set(revs)
297
297
298 filteredrevs = self.filteredrevs
298 filteredrevs = self.filteredrevs
299 if hasattr(revs, 'first'): # smartset
299 if hasattr(revs, 'first'): # smartset
300 offenders = revs & filteredrevs
300 offenders = revs & filteredrevs
301 else:
301 else:
302 offenders = filteredrevs.intersection(revs)
302 offenders = filteredrevs.intersection(revs)
303
303
304 for rev in offenders:
304 for rev in offenders:
305 raise error.FilteredIndexError(rev)
305 raise error.FilteredIndexError(rev)
306 return revs
306 return revs
307
307
308 def _head_node_ids(self):
309 # no Rust fast path implemented yet, so just loop in Python
310 return [self.node(r) for r in self.headrevs()]
311
308 def headrevs(self, revs=None):
312 def headrevs(self, revs=None):
309 if revs is None:
313 if revs is None:
310 try:
314 try:
311 return self.index.headrevsfiltered(self.filteredrevs)
315 return self.index.headrevsfiltered(self.filteredrevs)
312 # AttributeError covers non-c-extension environments and
316 # AttributeError covers non-c-extension environments and
313 # old c extensions without filter handling.
317 # old c extensions without filter handling.
314 except AttributeError:
318 except AttributeError:
315 return self._headrevs()
319 return self._headrevs()
316
320
317 revs = self._checknofilteredinrevs(revs)
321 revs = self._checknofilteredinrevs(revs)
318 return super(filteredchangelogmixin, self).headrevs(revs)
322 return super(filteredchangelogmixin, self).headrevs(revs)
319
323
320 def strip(self, *args, **kwargs):
324 def strip(self, *args, **kwargs):
321 # XXX make something better than assert
325 # XXX make something better than assert
322 # We can't expect proper strip behavior if we are filtered.
326 # We can't expect proper strip behavior if we are filtered.
323 assert not self.filteredrevs
327 assert not self.filteredrevs
324 super(filteredchangelogmixin, self).strip(*args, **kwargs)
328 super(filteredchangelogmixin, self).strip(*args, **kwargs)
325
329
326 def rev(self, node):
330 def rev(self, node):
327 """filtered version of revlog.rev"""
331 """filtered version of revlog.rev"""
328 r = super(filteredchangelogmixin, self).rev(node)
332 r = super(filteredchangelogmixin, self).rev(node)
329 if r in self.filteredrevs:
333 if r in self.filteredrevs:
330 raise error.FilteredLookupError(
334 raise error.FilteredLookupError(
331 hex(node), self.display_id, _(b'filtered node')
335 hex(node), self.display_id, _(b'filtered node')
332 )
336 )
333 return r
337 return r
334
338
335 def node(self, rev):
339 def node(self, rev):
336 """filtered version of revlog.node"""
340 """filtered version of revlog.node"""
337 if rev in self.filteredrevs:
341 if rev in self.filteredrevs:
338 raise error.FilteredIndexError(rev)
342 raise error.FilteredIndexError(rev)
339 return super(filteredchangelogmixin, self).node(rev)
343 return super(filteredchangelogmixin, self).node(rev)
340
344
341 def linkrev(self, rev):
345 def linkrev(self, rev):
342 """filtered version of revlog.linkrev"""
346 """filtered version of revlog.linkrev"""
343 if rev in self.filteredrevs:
347 if rev in self.filteredrevs:
344 raise error.FilteredIndexError(rev)
348 raise error.FilteredIndexError(rev)
345 return super(filteredchangelogmixin, self).linkrev(rev)
349 return super(filteredchangelogmixin, self).linkrev(rev)
346
350
347 def parentrevs(self, rev):
351 def parentrevs(self, rev):
348 """filtered version of revlog.parentrevs"""
352 """filtered version of revlog.parentrevs"""
349 if rev in self.filteredrevs:
353 if rev in self.filteredrevs:
350 raise error.FilteredIndexError(rev)
354 raise error.FilteredIndexError(rev)
351 return super(filteredchangelogmixin, self).parentrevs(rev)
355 return super(filteredchangelogmixin, self).parentrevs(rev)
352
356
353 def flags(self, rev):
357 def flags(self, rev):
354 """filtered version of revlog.flags"""
358 """filtered version of revlog.flags"""
355 if rev in self.filteredrevs:
359 if rev in self.filteredrevs:
356 raise error.FilteredIndexError(rev)
360 raise error.FilteredIndexError(rev)
357 return super(filteredchangelogmixin, self).flags(rev)
361 return super(filteredchangelogmixin, self).flags(rev)
358
362
359
363
360 class repoview:
364 class repoview:
361 """Provide a read/write view of a repo through a filtered changelog
365 """Provide a read/write view of a repo through a filtered changelog
362
366
363 This object is used to access a filtered version of a repository without
367 This object is used to access a filtered version of a repository without
364 altering the original repository object itself. We can not alter the
368 altering the original repository object itself. We can not alter the
365 original object for two main reasons:
369 original object for two main reasons:
366 - It prevents the use of a repo with multiple filters at the same time. In
370 - It prevents the use of a repo with multiple filters at the same time. In
367 particular when multiple threads are involved.
371 particular when multiple threads are involved.
368 - It makes scope of the filtering harder to control.
372 - It makes scope of the filtering harder to control.
369
373
370 This object behaves very closely to the original repository. All attribute
374 This object behaves very closely to the original repository. All attribute
371 operations are done on the original repository:
375 operations are done on the original repository:
372 - An access to `repoview.someattr` actually returns `repo.someattr`,
376 - An access to `repoview.someattr` actually returns `repo.someattr`,
373 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
377 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
374 - A deletion of `repoview.someattr` actually drops `someattr`
378 - A deletion of `repoview.someattr` actually drops `someattr`
375 from `repo.__dict__`.
379 from `repo.__dict__`.
376
380
377 The only exception is the `changelog` property. It is overridden to return
381 The only exception is the `changelog` property. It is overridden to return
378 a (surface) copy of `repo.changelog` with some revisions filtered. The
382 a (surface) copy of `repo.changelog` with some revisions filtered. The
379 `filtername` attribute of the view control the revisions that need to be
383 `filtername` attribute of the view control the revisions that need to be
380 filtered. (the fact the changelog is copied is an implementation detail).
384 filtered. (the fact the changelog is copied is an implementation detail).
381
385
382 Unlike attributes, this object intercepts all method calls. This means that
386 Unlike attributes, this object intercepts all method calls. This means that
383 all methods are run on the `repoview` object with the filtered `changelog`
387 all methods are run on the `repoview` object with the filtered `changelog`
384 property. For this purpose the simple `repoview` class must be mixed with
388 property. For this purpose the simple `repoview` class must be mixed with
385 the actual class of the repository. This ensures that the resulting
389 the actual class of the repository. This ensures that the resulting
386 `repoview` object have the very same methods than the repo object. This
390 `repoview` object have the very same methods than the repo object. This
387 leads to the property below.
391 leads to the property below.
388
392
389 repoview.method() --> repo.__class__.method(repoview)
393 repoview.method() --> repo.__class__.method(repoview)
390
394
391 The inheritance has to be done dynamically because `repo` can be of any
395 The inheritance has to be done dynamically because `repo` can be of any
392 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
396 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
393 """
397 """
394
398
395 def __init__(self, repo, filtername, visibilityexceptions=None):
399 def __init__(self, repo, filtername, visibilityexceptions=None):
396 object.__setattr__(self, '_unfilteredrepo', repo)
400 object.__setattr__(self, '_unfilteredrepo', repo)
397 object.__setattr__(self, 'filtername', filtername)
401 object.__setattr__(self, 'filtername', filtername)
398 object.__setattr__(self, '_clcachekey', None)
402 object.__setattr__(self, '_clcachekey', None)
399 object.__setattr__(self, '_clcache', None)
403 object.__setattr__(self, '_clcache', None)
400 # revs which are exceptions and must not be hidden
404 # revs which are exceptions and must not be hidden
401 object.__setattr__(self, '_visibilityexceptions', visibilityexceptions)
405 object.__setattr__(self, '_visibilityexceptions', visibilityexceptions)
402
406
403 # not a propertycache on purpose we shall implement a proper cache later
407 # not a propertycache on purpose we shall implement a proper cache later
404 @property
408 @property
405 def changelog(self):
409 def changelog(self):
406 """return a filtered version of the changeset
410 """return a filtered version of the changeset
407
411
408 this changelog must not be used for writing"""
412 this changelog must not be used for writing"""
409 # some cache may be implemented later
413 # some cache may be implemented later
410 unfi = self._unfilteredrepo
414 unfi = self._unfilteredrepo
411 unfichangelog = unfi.changelog
415 unfichangelog = unfi.changelog
412 # bypass call to changelog.method
416 # bypass call to changelog.method
413 unfiindex = unfichangelog.index
417 unfiindex = unfichangelog.index
414 unfilen = len(unfiindex)
418 unfilen = len(unfiindex)
415 unfinode = unfiindex[unfilen - 1][7]
419 unfinode = unfiindex[unfilen - 1][7]
416 with util.timedcm('repo filter for %s', self.filtername):
420 with util.timedcm('repo filter for %s', self.filtername):
417 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
421 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
418 cl = self._clcache
422 cl = self._clcache
419 newkey = (unfilen, unfinode, hash(revs), unfichangelog.is_delaying)
423 newkey = (unfilen, unfinode, hash(revs), unfichangelog.is_delaying)
420 # if cl.index is not unfiindex, unfi.changelog would be
424 # if cl.index is not unfiindex, unfi.changelog would be
421 # recreated, and our clcache refers to garbage object
425 # recreated, and our clcache refers to garbage object
422 if cl is not None and (
426 if cl is not None and (
423 cl.index is not unfiindex or newkey != self._clcachekey
427 cl.index is not unfiindex or newkey != self._clcachekey
424 ):
428 ):
425 cl = None
429 cl = None
426 # could have been made None by the previous if
430 # could have been made None by the previous if
427 if cl is None:
431 if cl is None:
428 # Only filter if there's something to filter
432 # Only filter if there's something to filter
429 cl = wrapchangelog(unfichangelog, revs) if revs else unfichangelog
433 cl = wrapchangelog(unfichangelog, revs) if revs else unfichangelog
430 object.__setattr__(self, '_clcache', cl)
434 object.__setattr__(self, '_clcache', cl)
431 object.__setattr__(self, '_clcachekey', newkey)
435 object.__setattr__(self, '_clcachekey', newkey)
432 return cl
436 return cl
433
437
434 def unfiltered(self):
438 def unfiltered(self):
435 """Return an unfiltered version of a repo"""
439 """Return an unfiltered version of a repo"""
436 return self._unfilteredrepo
440 return self._unfilteredrepo
437
441
438 def filtered(self, name, visibilityexceptions=None):
442 def filtered(self, name, visibilityexceptions=None):
439 """Return a filtered version of a repository"""
443 """Return a filtered version of a repository"""
440 if name == self.filtername and not visibilityexceptions:
444 if name == self.filtername and not visibilityexceptions:
441 return self
445 return self
442 return self.unfiltered().filtered(name, visibilityexceptions)
446 return self.unfiltered().filtered(name, visibilityexceptions)
443
447
444 def __repr__(self):
448 def __repr__(self):
445 return '<%s:%s %r>' % (
449 return '<%s:%s %r>' % (
446 self.__class__.__name__,
450 self.__class__.__name__,
447 pycompat.sysstr(self.filtername),
451 pycompat.sysstr(self.filtername),
448 self.unfiltered(),
452 self.unfiltered(),
449 )
453 )
450
454
451 # everything access are forwarded to the proxied repo
455 # everything access are forwarded to the proxied repo
452 def __getattr__(self, attr):
456 def __getattr__(self, attr):
453 return getattr(self._unfilteredrepo, attr)
457 return getattr(self._unfilteredrepo, attr)
454
458
455 def __setattr__(self, attr, value):
459 def __setattr__(self, attr, value):
456 return setattr(self._unfilteredrepo, attr, value)
460 return setattr(self._unfilteredrepo, attr, value)
457
461
458 def __delattr__(self, attr):
462 def __delattr__(self, attr):
459 return delattr(self._unfilteredrepo, attr)
463 return delattr(self._unfilteredrepo, attr)
460
464
461
465
462 # Dynamically created classes introduce memory cycles via __mro__. See
466 # Dynamically created classes introduce memory cycles via __mro__. See
463 # https://bugs.python.org/issue17950.
467 # https://bugs.python.org/issue17950.
464 # This need of the garbage collector can turn into memory leak in
468 # This need of the garbage collector can turn into memory leak in
465 # Python <3.4, which is the first version released with PEP 442.
469 # Python <3.4, which is the first version released with PEP 442.
466 _filteredrepotypes = weakref.WeakKeyDictionary()
470 _filteredrepotypes = weakref.WeakKeyDictionary()
467
471
468
472
469 def newtype(base):
473 def newtype(base):
470 """Create a new type with the repoview mixin and the given base class"""
474 """Create a new type with the repoview mixin and the given base class"""
471 ref = _filteredrepotypes.get(base)
475 ref = _filteredrepotypes.get(base)
472 if ref is not None:
476 if ref is not None:
473 cls = ref()
477 cls = ref()
474 if cls is not None:
478 if cls is not None:
475 return cls
479 return cls
476
480
477 class filteredrepo(repoview, base):
481 class filteredrepo(repoview, base):
478 pass
482 pass
479
483
480 _filteredrepotypes[base] = weakref.ref(filteredrepo)
484 _filteredrepotypes[base] = weakref.ref(filteredrepo)
481 # do not reread from weakref to be 100% sure not to return None
485 # do not reread from weakref to be 100% sure not to return None
482 return filteredrepo
486 return filteredrepo
@@ -1,4062 +1,4067 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import weakref
23 import weakref
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 if hasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_rust(data, inline, default_header):
228 def parse_index_v1_rust(data, inline, default_header):
229 cache = (0, data) if inline else None
229 cache = (0, data) if inline else None
230 return rustrevlog.Index(data, default_header), cache
230 return rustrevlog.Index(data, default_header), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class _Config:
245 class _Config:
246 def copy(self):
246 def copy(self):
247 return self.__class__(**self.__dict__)
247 return self.__class__(**self.__dict__)
248
248
249
249
250 @attr.s()
250 @attr.s()
251 class FeatureConfig(_Config):
251 class FeatureConfig(_Config):
252 """Hold configuration values about the available revlog features"""
252 """Hold configuration values about the available revlog features"""
253
253
254 # the default compression engine
254 # the default compression engine
255 compression_engine = attr.ib(default=b'zlib')
255 compression_engine = attr.ib(default=b'zlib')
256 # compression engines options
256 # compression engines options
257 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 compression_engine_options = attr.ib(default=attr.Factory(dict))
258
258
259 # can we use censor on this revlog
259 # can we use censor on this revlog
260 censorable = attr.ib(default=False)
260 censorable = attr.ib(default=False)
261 # does this revlog use the "side data" feature
261 # does this revlog use the "side data" feature
262 has_side_data = attr.ib(default=False)
262 has_side_data = attr.ib(default=False)
263 # might remove rank configuration once the computation has no impact
263 # might remove rank configuration once the computation has no impact
264 compute_rank = attr.ib(default=False)
264 compute_rank = attr.ib(default=False)
265 # parent order is supposed to be semantically irrelevant, so we
265 # parent order is supposed to be semantically irrelevant, so we
266 # normally resort parents to ensure that the first parent is non-null,
266 # normally resort parents to ensure that the first parent is non-null,
267 # if there is a non-null parent at all.
267 # if there is a non-null parent at all.
268 # filelog abuses the parent order as flag to mark some instances of
268 # filelog abuses the parent order as flag to mark some instances of
269 # meta-encoded files, so allow it to disable this behavior.
269 # meta-encoded files, so allow it to disable this behavior.
270 canonical_parent_order = attr.ib(default=False)
270 canonical_parent_order = attr.ib(default=False)
271 # can ellipsis commit be used
271 # can ellipsis commit be used
272 enable_ellipsis = attr.ib(default=False)
272 enable_ellipsis = attr.ib(default=False)
273
273
274 def copy(self):
274 def copy(self):
275 new = super().copy()
275 new = super().copy()
276 new.compression_engine_options = self.compression_engine_options.copy()
276 new.compression_engine_options = self.compression_engine_options.copy()
277 return new
277 return new
278
278
279
279
280 @attr.s()
280 @attr.s()
281 class DataConfig(_Config):
281 class DataConfig(_Config):
282 """Hold configuration value about how the revlog data are read"""
282 """Hold configuration value about how the revlog data are read"""
283
283
284 # should we try to open the "pending" version of the revlog
284 # should we try to open the "pending" version of the revlog
285 try_pending = attr.ib(default=False)
285 try_pending = attr.ib(default=False)
286 # should we try to open the "splitted" version of the revlog
286 # should we try to open the "splitted" version of the revlog
287 try_split = attr.ib(default=False)
287 try_split = attr.ib(default=False)
288 # When True, indexfile should be opened with checkambig=True at writing,
288 # When True, indexfile should be opened with checkambig=True at writing,
289 # to avoid file stat ambiguity.
289 # to avoid file stat ambiguity.
290 check_ambig = attr.ib(default=False)
290 check_ambig = attr.ib(default=False)
291
291
292 # If true, use mmap instead of reading to deal with large index
292 # If true, use mmap instead of reading to deal with large index
293 mmap_large_index = attr.ib(default=False)
293 mmap_large_index = attr.ib(default=False)
294 # how much data is large
294 # how much data is large
295 mmap_index_threshold = attr.ib(default=None)
295 mmap_index_threshold = attr.ib(default=None)
296 # How much data to read and cache into the raw revlog data cache.
296 # How much data to read and cache into the raw revlog data cache.
297 chunk_cache_size = attr.ib(default=65536)
297 chunk_cache_size = attr.ib(default=65536)
298
298
299 # The size of the uncompressed cache compared to the largest revision seen.
299 # The size of the uncompressed cache compared to the largest revision seen.
300 uncompressed_cache_factor = attr.ib(default=None)
300 uncompressed_cache_factor = attr.ib(default=None)
301
301
302 # The number of chunk cached
302 # The number of chunk cached
303 uncompressed_cache_count = attr.ib(default=None)
303 uncompressed_cache_count = attr.ib(default=None)
304
304
305 # Allow sparse reading of the revlog data
305 # Allow sparse reading of the revlog data
306 with_sparse_read = attr.ib(default=False)
306 with_sparse_read = attr.ib(default=False)
307 # minimal density of a sparse read chunk
307 # minimal density of a sparse read chunk
308 sr_density_threshold = attr.ib(default=0.50)
308 sr_density_threshold = attr.ib(default=0.50)
309 # minimal size of data we skip when performing sparse read
309 # minimal size of data we skip when performing sparse read
310 sr_min_gap_size = attr.ib(default=262144)
310 sr_min_gap_size = attr.ib(default=262144)
311
311
312 # are delta encoded against arbitrary bases.
312 # are delta encoded against arbitrary bases.
313 generaldelta = attr.ib(default=False)
313 generaldelta = attr.ib(default=False)
314
314
315
315
316 @attr.s()
316 @attr.s()
317 class DeltaConfig(_Config):
317 class DeltaConfig(_Config):
318 """Hold configuration value about how new delta are computed
318 """Hold configuration value about how new delta are computed
319
319
320 Some attributes are duplicated from DataConfig to help havign each object
320 Some attributes are duplicated from DataConfig to help havign each object
321 self contained.
321 self contained.
322 """
322 """
323
323
324 # can delta be encoded against arbitrary bases.
324 # can delta be encoded against arbitrary bases.
325 general_delta = attr.ib(default=False)
325 general_delta = attr.ib(default=False)
326 # Allow sparse writing of the revlog data
326 # Allow sparse writing of the revlog data
327 sparse_revlog = attr.ib(default=False)
327 sparse_revlog = attr.ib(default=False)
328 # maximum length of a delta chain
328 # maximum length of a delta chain
329 max_chain_len = attr.ib(default=None)
329 max_chain_len = attr.ib(default=None)
330 # Maximum distance between delta chain base start and end
330 # Maximum distance between delta chain base start and end
331 max_deltachain_span = attr.ib(default=-1)
331 max_deltachain_span = attr.ib(default=-1)
332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
333 # compression for the data content.
333 # compression for the data content.
334 upper_bound_comp = attr.ib(default=None)
334 upper_bound_comp = attr.ib(default=None)
335 # Should we try a delta against both parent
335 # Should we try a delta against both parent
336 delta_both_parents = attr.ib(default=True)
336 delta_both_parents = attr.ib(default=True)
337 # Test delta base candidate group by chunk of this maximal size.
337 # Test delta base candidate group by chunk of this maximal size.
338 candidate_group_chunk_size = attr.ib(default=0)
338 candidate_group_chunk_size = attr.ib(default=0)
339 # Should we display debug information about delta computation
339 # Should we display debug information about delta computation
340 debug_delta = attr.ib(default=False)
340 debug_delta = attr.ib(default=False)
341 # trust incoming delta by default
341 # trust incoming delta by default
342 lazy_delta = attr.ib(default=True)
342 lazy_delta = attr.ib(default=True)
343 # trust the base of incoming delta by default
343 # trust the base of incoming delta by default
344 lazy_delta_base = attr.ib(default=False)
344 lazy_delta_base = attr.ib(default=False)
345
345
346
346
347 class _InnerRevlog:
347 class _InnerRevlog:
348 """An inner layer of the revlog object
348 """An inner layer of the revlog object
349
349
350 That layer exist to be able to delegate some operation to Rust, its
350 That layer exist to be able to delegate some operation to Rust, its
351 boundaries are arbitrary and based on what we can delegate to Rust.
351 boundaries are arbitrary and based on what we can delegate to Rust.
352 """
352 """
353
353
354 def __init__(
354 def __init__(
355 self,
355 self,
356 opener,
356 opener,
357 index,
357 index,
358 index_file,
358 index_file,
359 data_file,
359 data_file,
360 sidedata_file,
360 sidedata_file,
361 inline,
361 inline,
362 data_config,
362 data_config,
363 delta_config,
363 delta_config,
364 feature_config,
364 feature_config,
365 chunk_cache,
365 chunk_cache,
366 default_compression_header,
366 default_compression_header,
367 ):
367 ):
368 self.opener = opener
368 self.opener = opener
369 self.index = index
369 self.index = index
370
370
371 self.index_file = index_file
371 self.index_file = index_file
372 self.data_file = data_file
372 self.data_file = data_file
373 self.sidedata_file = sidedata_file
373 self.sidedata_file = sidedata_file
374 self.inline = inline
374 self.inline = inline
375 self.data_config = data_config
375 self.data_config = data_config
376 self.delta_config = delta_config
376 self.delta_config = delta_config
377 self.feature_config = feature_config
377 self.feature_config = feature_config
378
378
379 # used during diverted write.
379 # used during diverted write.
380 self._orig_index_file = None
380 self._orig_index_file = None
381
381
382 self._default_compression_header = default_compression_header
382 self._default_compression_header = default_compression_header
383
383
384 # index
384 # index
385
385
386 # 3-tuple of file handles being used for active writing.
386 # 3-tuple of file handles being used for active writing.
387 self._writinghandles = None
387 self._writinghandles = None
388
388
389 self._segmentfile = randomaccessfile.randomaccessfile(
389 self._segmentfile = randomaccessfile.randomaccessfile(
390 self.opener,
390 self.opener,
391 (self.index_file if self.inline else self.data_file),
391 (self.index_file if self.inline else self.data_file),
392 self.data_config.chunk_cache_size,
392 self.data_config.chunk_cache_size,
393 chunk_cache,
393 chunk_cache,
394 )
394 )
395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
396 self.opener,
396 self.opener,
397 self.sidedata_file,
397 self.sidedata_file,
398 self.data_config.chunk_cache_size,
398 self.data_config.chunk_cache_size,
399 )
399 )
400
400
401 # revlog header -> revlog compressor
401 # revlog header -> revlog compressor
402 self._decompressors = {}
402 self._decompressors = {}
403 # 3-tuple of (node, rev, text) for a raw revision.
403 # 3-tuple of (node, rev, text) for a raw revision.
404 self._revisioncache = None
404 self._revisioncache = None
405
405
406 # cache some uncompressed chunks
406 # cache some uncompressed chunks
407 # rev → uncompressed_chunk
407 # rev → uncompressed_chunk
408 #
408 #
409 # the max cost is dynamically updated to be proportionnal to the
409 # the max cost is dynamically updated to be proportionnal to the
410 # size of revision we actually encounter.
410 # size of revision we actually encounter.
411 self._uncompressed_chunk_cache = None
411 self._uncompressed_chunk_cache = None
412 if self.data_config.uncompressed_cache_factor is not None:
412 if self.data_config.uncompressed_cache_factor is not None:
413 self._uncompressed_chunk_cache = util.lrucachedict(
413 self._uncompressed_chunk_cache = util.lrucachedict(
414 self.data_config.uncompressed_cache_count,
414 self.data_config.uncompressed_cache_count,
415 maxcost=65536, # some arbitrary initial value
415 maxcost=65536, # some arbitrary initial value
416 )
416 )
417
417
418 self._delay_buffer = None
418 self._delay_buffer = None
419
419
420 def __len__(self):
420 def __len__(self):
421 return len(self.index)
421 return len(self.index)
422
422
423 def clear_cache(self):
423 def clear_cache(self):
424 assert not self.is_delaying
424 assert not self.is_delaying
425 self._revisioncache = None
425 self._revisioncache = None
426 if self._uncompressed_chunk_cache is not None:
426 if self._uncompressed_chunk_cache is not None:
427 self._uncompressed_chunk_cache.clear()
427 self._uncompressed_chunk_cache.clear()
428 self._segmentfile.clear_cache()
428 self._segmentfile.clear_cache()
429 self._segmentfile_sidedata.clear_cache()
429 self._segmentfile_sidedata.clear_cache()
430
430
431 @property
431 @property
432 def canonical_index_file(self):
432 def canonical_index_file(self):
433 if self._orig_index_file is not None:
433 if self._orig_index_file is not None:
434 return self._orig_index_file
434 return self._orig_index_file
435 return self.index_file
435 return self.index_file
436
436
437 @property
437 @property
438 def is_delaying(self):
438 def is_delaying(self):
439 """is the revlog is currently delaying the visibility of written data?
439 """is the revlog is currently delaying the visibility of written data?
440
440
441 The delaying mechanism can be either in-memory or written on disk in a
441 The delaying mechanism can be either in-memory or written on disk in a
442 side-file."""
442 side-file."""
443 return (self._delay_buffer is not None) or (
443 return (self._delay_buffer is not None) or (
444 self._orig_index_file is not None
444 self._orig_index_file is not None
445 )
445 )
446
446
447 # Derived from index values.
447 # Derived from index values.
448
448
449 def start(self, rev):
449 def start(self, rev):
450 """the offset of the data chunk for this revision"""
450 """the offset of the data chunk for this revision"""
451 return int(self.index[rev][0] >> 16)
451 return int(self.index[rev][0] >> 16)
452
452
453 def length(self, rev):
453 def length(self, rev):
454 """the length of the data chunk for this revision"""
454 """the length of the data chunk for this revision"""
455 return self.index[rev][1]
455 return self.index[rev][1]
456
456
457 def end(self, rev):
457 def end(self, rev):
458 """the end of the data chunk for this revision"""
458 """the end of the data chunk for this revision"""
459 return self.start(rev) + self.length(rev)
459 return self.start(rev) + self.length(rev)
460
460
461 def deltaparent(self, rev):
461 def deltaparent(self, rev):
462 """return deltaparent of the given revision"""
462 """return deltaparent of the given revision"""
463 base = self.index[rev][3]
463 base = self.index[rev][3]
464 if base == rev:
464 if base == rev:
465 return nullrev
465 return nullrev
466 elif self.delta_config.general_delta:
466 elif self.delta_config.general_delta:
467 return base
467 return base
468 else:
468 else:
469 return rev - 1
469 return rev - 1
470
470
471 def issnapshot(self, rev):
471 def issnapshot(self, rev):
472 """tells whether rev is a snapshot"""
472 """tells whether rev is a snapshot"""
473 if not self.delta_config.sparse_revlog:
473 if not self.delta_config.sparse_revlog:
474 return self.deltaparent(rev) == nullrev
474 return self.deltaparent(rev) == nullrev
475 elif hasattr(self.index, 'issnapshot'):
475 elif hasattr(self.index, 'issnapshot'):
476 # directly assign the method to cache the testing and access
476 # directly assign the method to cache the testing and access
477 self.issnapshot = self.index.issnapshot
477 self.issnapshot = self.index.issnapshot
478 return self.issnapshot(rev)
478 return self.issnapshot(rev)
479 if rev == nullrev:
479 if rev == nullrev:
480 return True
480 return True
481 entry = self.index[rev]
481 entry = self.index[rev]
482 base = entry[3]
482 base = entry[3]
483 if base == rev:
483 if base == rev:
484 return True
484 return True
485 if base == nullrev:
485 if base == nullrev:
486 return True
486 return True
487 p1 = entry[5]
487 p1 = entry[5]
488 while self.length(p1) == 0:
488 while self.length(p1) == 0:
489 b = self.deltaparent(p1)
489 b = self.deltaparent(p1)
490 if b == p1:
490 if b == p1:
491 break
491 break
492 p1 = b
492 p1 = b
493 p2 = entry[6]
493 p2 = entry[6]
494 while self.length(p2) == 0:
494 while self.length(p2) == 0:
495 b = self.deltaparent(p2)
495 b = self.deltaparent(p2)
496 if b == p2:
496 if b == p2:
497 break
497 break
498 p2 = b
498 p2 = b
499 if base == p1 or base == p2:
499 if base == p1 or base == p2:
500 return False
500 return False
501 return self.issnapshot(base)
501 return self.issnapshot(base)
502
502
503 def _deltachain(self, rev, stoprev=None):
503 def _deltachain(self, rev, stoprev=None):
504 """Obtain the delta chain for a revision.
504 """Obtain the delta chain for a revision.
505
505
506 ``stoprev`` specifies a revision to stop at. If not specified, we
506 ``stoprev`` specifies a revision to stop at. If not specified, we
507 stop at the base of the chain.
507 stop at the base of the chain.
508
508
509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
510 revs in ascending order and ``stopped`` is a bool indicating whether
510 revs in ascending order and ``stopped`` is a bool indicating whether
511 ``stoprev`` was hit.
511 ``stoprev`` was hit.
512 """
512 """
513 generaldelta = self.delta_config.general_delta
513 generaldelta = self.delta_config.general_delta
514 # Try C implementation.
514 # Try C implementation.
515 try:
515 try:
516 return self.index.deltachain(rev, stoprev, generaldelta)
516 return self.index.deltachain(rev, stoprev, generaldelta)
517 except AttributeError:
517 except AttributeError:
518 pass
518 pass
519
519
520 chain = []
520 chain = []
521
521
522 # Alias to prevent attribute lookup in tight loop.
522 # Alias to prevent attribute lookup in tight loop.
523 index = self.index
523 index = self.index
524
524
525 iterrev = rev
525 iterrev = rev
526 e = index[iterrev]
526 e = index[iterrev]
527 while iterrev != e[3] and iterrev != stoprev:
527 while iterrev != e[3] and iterrev != stoprev:
528 chain.append(iterrev)
528 chain.append(iterrev)
529 if generaldelta:
529 if generaldelta:
530 iterrev = e[3]
530 iterrev = e[3]
531 else:
531 else:
532 iterrev -= 1
532 iterrev -= 1
533 e = index[iterrev]
533 e = index[iterrev]
534
534
535 if iterrev == stoprev:
535 if iterrev == stoprev:
536 stopped = True
536 stopped = True
537 else:
537 else:
538 chain.append(iterrev)
538 chain.append(iterrev)
539 stopped = False
539 stopped = False
540
540
541 chain.reverse()
541 chain.reverse()
542 return chain, stopped
542 return chain, stopped
543
543
544 @util.propertycache
544 @util.propertycache
545 def _compressor(self):
545 def _compressor(self):
546 engine = util.compengines[self.feature_config.compression_engine]
546 engine = util.compengines[self.feature_config.compression_engine]
547 return engine.revlogcompressor(
547 return engine.revlogcompressor(
548 self.feature_config.compression_engine_options
548 self.feature_config.compression_engine_options
549 )
549 )
550
550
551 @util.propertycache
551 @util.propertycache
552 def _decompressor(self):
552 def _decompressor(self):
553 """the default decompressor"""
553 """the default decompressor"""
554 if self._default_compression_header is None:
554 if self._default_compression_header is None:
555 return None
555 return None
556 t = self._default_compression_header
556 t = self._default_compression_header
557 c = self._get_decompressor(t)
557 c = self._get_decompressor(t)
558 return c.decompress
558 return c.decompress
559
559
560 def _get_decompressor(self, t):
560 def _get_decompressor(self, t):
561 try:
561 try:
562 compressor = self._decompressors[t]
562 compressor = self._decompressors[t]
563 except KeyError:
563 except KeyError:
564 try:
564 try:
565 engine = util.compengines.forrevlogheader(t)
565 engine = util.compengines.forrevlogheader(t)
566 compressor = engine.revlogcompressor(
566 compressor = engine.revlogcompressor(
567 self.feature_config.compression_engine_options
567 self.feature_config.compression_engine_options
568 )
568 )
569 self._decompressors[t] = compressor
569 self._decompressors[t] = compressor
570 except KeyError:
570 except KeyError:
571 raise error.RevlogError(
571 raise error.RevlogError(
572 _(b'unknown compression type %s') % binascii.hexlify(t)
572 _(b'unknown compression type %s') % binascii.hexlify(t)
573 )
573 )
574 return compressor
574 return compressor
575
575
576 def compress(self, data):
576 def compress(self, data):
577 """Generate a possibly-compressed representation of data."""
577 """Generate a possibly-compressed representation of data."""
578 if not data:
578 if not data:
579 return b'', data
579 return b'', data
580
580
581 compressed = self._compressor.compress(data)
581 compressed = self._compressor.compress(data)
582
582
583 if compressed:
583 if compressed:
584 # The revlog compressor added the header in the returned data.
584 # The revlog compressor added the header in the returned data.
585 return b'', compressed
585 return b'', compressed
586
586
587 if data[0:1] == b'\0':
587 if data[0:1] == b'\0':
588 return b'', data
588 return b'', data
589 return b'u', data
589 return b'u', data
590
590
591 def decompress(self, data):
591 def decompress(self, data):
592 """Decompress a revlog chunk.
592 """Decompress a revlog chunk.
593
593
594 The chunk is expected to begin with a header identifying the
594 The chunk is expected to begin with a header identifying the
595 format type so it can be routed to an appropriate decompressor.
595 format type so it can be routed to an appropriate decompressor.
596 """
596 """
597 if not data:
597 if not data:
598 return data
598 return data
599
599
600 # Revlogs are read much more frequently than they are written and many
600 # Revlogs are read much more frequently than they are written and many
601 # chunks only take microseconds to decompress, so performance is
601 # chunks only take microseconds to decompress, so performance is
602 # important here.
602 # important here.
603 #
603 #
604 # We can make a few assumptions about revlogs:
604 # We can make a few assumptions about revlogs:
605 #
605 #
606 # 1) the majority of chunks will be compressed (as opposed to inline
606 # 1) the majority of chunks will be compressed (as opposed to inline
607 # raw data).
607 # raw data).
608 # 2) decompressing *any* data will likely by at least 10x slower than
608 # 2) decompressing *any* data will likely by at least 10x slower than
609 # returning raw inline data.
609 # returning raw inline data.
610 # 3) we want to prioritize common and officially supported compression
610 # 3) we want to prioritize common and officially supported compression
611 # engines
611 # engines
612 #
612 #
613 # It follows that we want to optimize for "decompress compressed data
613 # It follows that we want to optimize for "decompress compressed data
614 # when encoded with common and officially supported compression engines"
614 # when encoded with common and officially supported compression engines"
615 # case over "raw data" and "data encoded by less common or non-official
615 # case over "raw data" and "data encoded by less common or non-official
616 # compression engines." That is why we have the inline lookup first
616 # compression engines." That is why we have the inline lookup first
617 # followed by the compengines lookup.
617 # followed by the compengines lookup.
618 #
618 #
619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
620 # compressed chunks. And this matters for changelog and manifest reads.
620 # compressed chunks. And this matters for changelog and manifest reads.
621 t = data[0:1]
621 t = data[0:1]
622
622
623 if t == b'x':
623 if t == b'x':
624 try:
624 try:
625 return _zlibdecompress(data)
625 return _zlibdecompress(data)
626 except zlib.error as e:
626 except zlib.error as e:
627 raise error.RevlogError(
627 raise error.RevlogError(
628 _(b'revlog decompress error: %s')
628 _(b'revlog decompress error: %s')
629 % stringutil.forcebytestr(e)
629 % stringutil.forcebytestr(e)
630 )
630 )
631 # '\0' is more common than 'u' so it goes first.
631 # '\0' is more common than 'u' so it goes first.
632 elif t == b'\0':
632 elif t == b'\0':
633 return data
633 return data
634 elif t == b'u':
634 elif t == b'u':
635 return util.buffer(data, 1)
635 return util.buffer(data, 1)
636
636
637 compressor = self._get_decompressor(t)
637 compressor = self._get_decompressor(t)
638
638
639 return compressor.decompress(data)
639 return compressor.decompress(data)
640
640
641 @contextlib.contextmanager
641 @contextlib.contextmanager
642 def reading(self):
642 def reading(self):
643 """Context manager that keeps data and sidedata files open for reading"""
643 """Context manager that keeps data and sidedata files open for reading"""
644 if len(self.index) == 0:
644 if len(self.index) == 0:
645 yield # nothing to be read
645 yield # nothing to be read
646 elif self._delay_buffer is not None and self.inline:
646 elif self._delay_buffer is not None and self.inline:
647 msg = "revlog with delayed write should not be inline"
647 msg = "revlog with delayed write should not be inline"
648 raise error.ProgrammingError(msg)
648 raise error.ProgrammingError(msg)
649 else:
649 else:
650 with self._segmentfile.reading():
650 with self._segmentfile.reading():
651 with self._segmentfile_sidedata.reading():
651 with self._segmentfile_sidedata.reading():
652 yield
652 yield
653
653
654 @property
654 @property
655 def is_writing(self):
655 def is_writing(self):
656 """True is a writing context is open"""
656 """True is a writing context is open"""
657 return self._writinghandles is not None
657 return self._writinghandles is not None
658
658
659 @property
659 @property
660 def is_open(self):
660 def is_open(self):
661 """True if any file handle is being held
661 """True if any file handle is being held
662
662
663 Used for assert and debug in the python code"""
663 Used for assert and debug in the python code"""
664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
665
665
666 @contextlib.contextmanager
666 @contextlib.contextmanager
667 def writing(self, transaction, data_end=None, sidedata_end=None):
667 def writing(self, transaction, data_end=None, sidedata_end=None):
668 """Open the revlog files for writing
668 """Open the revlog files for writing
669
669
670 Add content to a revlog should be done within such context.
670 Add content to a revlog should be done within such context.
671 """
671 """
672 if self.is_writing:
672 if self.is_writing:
673 yield
673 yield
674 else:
674 else:
675 ifh = dfh = sdfh = None
675 ifh = dfh = sdfh = None
676 try:
676 try:
677 r = len(self.index)
677 r = len(self.index)
678 # opening the data file.
678 # opening the data file.
679 dsize = 0
679 dsize = 0
680 if r:
680 if r:
681 dsize = self.end(r - 1)
681 dsize = self.end(r - 1)
682 dfh = None
682 dfh = None
683 if not self.inline:
683 if not self.inline:
684 try:
684 try:
685 dfh = self.opener(self.data_file, mode=b"r+")
685 dfh = self.opener(self.data_file, mode=b"r+")
686 if data_end is None:
686 if data_end is None:
687 dfh.seek(0, os.SEEK_END)
687 dfh.seek(0, os.SEEK_END)
688 else:
688 else:
689 dfh.seek(data_end, os.SEEK_SET)
689 dfh.seek(data_end, os.SEEK_SET)
690 except FileNotFoundError:
690 except FileNotFoundError:
691 dfh = self.opener(self.data_file, mode=b"w+")
691 dfh = self.opener(self.data_file, mode=b"w+")
692 transaction.add(self.data_file, dsize)
692 transaction.add(self.data_file, dsize)
693 if self.sidedata_file is not None:
693 if self.sidedata_file is not None:
694 assert sidedata_end is not None
694 assert sidedata_end is not None
695 # revlog-v2 does not inline, help Pytype
695 # revlog-v2 does not inline, help Pytype
696 assert dfh is not None
696 assert dfh is not None
697 try:
697 try:
698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
699 dfh.seek(sidedata_end, os.SEEK_SET)
699 dfh.seek(sidedata_end, os.SEEK_SET)
700 except FileNotFoundError:
700 except FileNotFoundError:
701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
702 transaction.add(self.sidedata_file, sidedata_end)
702 transaction.add(self.sidedata_file, sidedata_end)
703
703
704 # opening the index file.
704 # opening the index file.
705 isize = r * self.index.entry_size
705 isize = r * self.index.entry_size
706 ifh = self.__index_write_fp()
706 ifh = self.__index_write_fp()
707 if self.inline:
707 if self.inline:
708 transaction.add(self.index_file, dsize + isize)
708 transaction.add(self.index_file, dsize + isize)
709 else:
709 else:
710 transaction.add(self.index_file, isize)
710 transaction.add(self.index_file, isize)
711 # exposing all file handle for writing.
711 # exposing all file handle for writing.
712 self._writinghandles = (ifh, dfh, sdfh)
712 self._writinghandles = (ifh, dfh, sdfh)
713 self._segmentfile.writing_handle = ifh if self.inline else dfh
713 self._segmentfile.writing_handle = ifh if self.inline else dfh
714 self._segmentfile_sidedata.writing_handle = sdfh
714 self._segmentfile_sidedata.writing_handle = sdfh
715 yield
715 yield
716 finally:
716 finally:
717 self._writinghandles = None
717 self._writinghandles = None
718 self._segmentfile.writing_handle = None
718 self._segmentfile.writing_handle = None
719 self._segmentfile_sidedata.writing_handle = None
719 self._segmentfile_sidedata.writing_handle = None
720 if dfh is not None:
720 if dfh is not None:
721 dfh.close()
721 dfh.close()
722 if sdfh is not None:
722 if sdfh is not None:
723 sdfh.close()
723 sdfh.close()
724 # closing the index file last to avoid exposing referent to
724 # closing the index file last to avoid exposing referent to
725 # potential unflushed data content.
725 # potential unflushed data content.
726 if ifh is not None:
726 if ifh is not None:
727 ifh.close()
727 ifh.close()
728
728
729 def __index_write_fp(self, index_end=None):
729 def __index_write_fp(self, index_end=None):
730 """internal method to open the index file for writing
730 """internal method to open the index file for writing
731
731
732 You should not use this directly and use `_writing` instead
732 You should not use this directly and use `_writing` instead
733 """
733 """
734 try:
734 try:
735 if self._delay_buffer is None:
735 if self._delay_buffer is None:
736 f = self.opener(
736 f = self.opener(
737 self.index_file,
737 self.index_file,
738 mode=b"r+",
738 mode=b"r+",
739 checkambig=self.data_config.check_ambig,
739 checkambig=self.data_config.check_ambig,
740 )
740 )
741 else:
741 else:
742 # check_ambig affect we way we open file for writing, however
742 # check_ambig affect we way we open file for writing, however
743 # here, we do not actually open a file for writting as write
743 # here, we do not actually open a file for writting as write
744 # will appened to a delay_buffer. So check_ambig is not
744 # will appened to a delay_buffer. So check_ambig is not
745 # meaningful and unneeded here.
745 # meaningful and unneeded here.
746 f = randomaccessfile.appender(
746 f = randomaccessfile.appender(
747 self.opener, self.index_file, b"r+", self._delay_buffer
747 self.opener, self.index_file, b"r+", self._delay_buffer
748 )
748 )
749 if index_end is None:
749 if index_end is None:
750 f.seek(0, os.SEEK_END)
750 f.seek(0, os.SEEK_END)
751 else:
751 else:
752 f.seek(index_end, os.SEEK_SET)
752 f.seek(index_end, os.SEEK_SET)
753 return f
753 return f
754 except FileNotFoundError:
754 except FileNotFoundError:
755 if self._delay_buffer is None:
755 if self._delay_buffer is None:
756 return self.opener(
756 return self.opener(
757 self.index_file,
757 self.index_file,
758 mode=b"w+",
758 mode=b"w+",
759 checkambig=self.data_config.check_ambig,
759 checkambig=self.data_config.check_ambig,
760 )
760 )
761 else:
761 else:
762 return randomaccessfile.appender(
762 return randomaccessfile.appender(
763 self.opener, self.index_file, b"w+", self._delay_buffer
763 self.opener, self.index_file, b"w+", self._delay_buffer
764 )
764 )
765
765
766 def __index_new_fp(self):
766 def __index_new_fp(self):
767 """internal method to create a new index file for writing
767 """internal method to create a new index file for writing
768
768
769 You should not use this unless you are upgrading from inline revlog
769 You should not use this unless you are upgrading from inline revlog
770 """
770 """
771 return self.opener(
771 return self.opener(
772 self.index_file,
772 self.index_file,
773 mode=b"w",
773 mode=b"w",
774 checkambig=self.data_config.check_ambig,
774 checkambig=self.data_config.check_ambig,
775 atomictemp=True,
775 atomictemp=True,
776 )
776 )
777
777
778 def split_inline(self, tr, header, new_index_file_path=None):
778 def split_inline(self, tr, header, new_index_file_path=None):
779 """split the data of an inline revlog into an index and a data file"""
779 """split the data of an inline revlog into an index and a data file"""
780 assert self._delay_buffer is None
780 assert self._delay_buffer is None
781 existing_handles = False
781 existing_handles = False
782 if self._writinghandles is not None:
782 if self._writinghandles is not None:
783 existing_handles = True
783 existing_handles = True
784 fp = self._writinghandles[0]
784 fp = self._writinghandles[0]
785 fp.flush()
785 fp.flush()
786 fp.close()
786 fp.close()
787 # We can't use the cached file handle after close(). So prevent
787 # We can't use the cached file handle after close(). So prevent
788 # its usage.
788 # its usage.
789 self._writinghandles = None
789 self._writinghandles = None
790 self._segmentfile.writing_handle = None
790 self._segmentfile.writing_handle = None
791 # No need to deal with sidedata writing handle as it is only
791 # No need to deal with sidedata writing handle as it is only
792 # relevant with revlog-v2 which is never inline, not reaching
792 # relevant with revlog-v2 which is never inline, not reaching
793 # this code
793 # this code
794
794
795 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh = self.opener(self.data_file, mode=b"w+")
796 new_dfh.truncate(0) # drop any potentially existing data
796 new_dfh.truncate(0) # drop any potentially existing data
797 try:
797 try:
798 with self.reading():
798 with self.reading():
799 for r in range(len(self.index)):
799 for r in range(len(self.index)):
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
801 new_dfh.flush()
801 new_dfh.flush()
802
802
803 if new_index_file_path is not None:
803 if new_index_file_path is not None:
804 self.index_file = new_index_file_path
804 self.index_file = new_index_file_path
805 with self.__index_new_fp() as fp:
805 with self.__index_new_fp() as fp:
806 self.inline = False
806 self.inline = False
807 for i in range(len(self.index)):
807 for i in range(len(self.index)):
808 e = self.index.entry_binary(i)
808 e = self.index.entry_binary(i)
809 if i == 0:
809 if i == 0:
810 packed_header = self.index.pack_header(header)
810 packed_header = self.index.pack_header(header)
811 e = packed_header + e
811 e = packed_header + e
812 fp.write(e)
812 fp.write(e)
813
813
814 # If we don't use side-write, the temp file replace the real
814 # If we don't use side-write, the temp file replace the real
815 # index when we exit the context manager
815 # index when we exit the context manager
816
816
817 self._segmentfile = randomaccessfile.randomaccessfile(
817 self._segmentfile = randomaccessfile.randomaccessfile(
818 self.opener,
818 self.opener,
819 self.data_file,
819 self.data_file,
820 self.data_config.chunk_cache_size,
820 self.data_config.chunk_cache_size,
821 )
821 )
822
822
823 if existing_handles:
823 if existing_handles:
824 # switched from inline to conventional reopen the index
824 # switched from inline to conventional reopen the index
825 ifh = self.__index_write_fp()
825 ifh = self.__index_write_fp()
826 self._writinghandles = (ifh, new_dfh, None)
826 self._writinghandles = (ifh, new_dfh, None)
827 self._segmentfile.writing_handle = new_dfh
827 self._segmentfile.writing_handle = new_dfh
828 new_dfh = None
828 new_dfh = None
829 # No need to deal with sidedata writing handle as it is only
829 # No need to deal with sidedata writing handle as it is only
830 # relevant with revlog-v2 which is never inline, not reaching
830 # relevant with revlog-v2 which is never inline, not reaching
831 # this code
831 # this code
832 finally:
832 finally:
833 if new_dfh is not None:
833 if new_dfh is not None:
834 new_dfh.close()
834 new_dfh.close()
835 return self.index_file
835 return self.index_file
836
836
837 def get_segment_for_revs(self, startrev, endrev):
837 def get_segment_for_revs(self, startrev, endrev):
838 """Obtain a segment of raw data corresponding to a range of revisions.
838 """Obtain a segment of raw data corresponding to a range of revisions.
839
839
840 Accepts the start and end revisions and an optional already-open
840 Accepts the start and end revisions and an optional already-open
841 file handle to be used for reading. If the file handle is read, its
841 file handle to be used for reading. If the file handle is read, its
842 seek position will not be preserved.
842 seek position will not be preserved.
843
843
844 Requests for data may be satisfied by a cache.
844 Requests for data may be satisfied by a cache.
845
845
846 Returns a 2-tuple of (offset, data) for the requested range of
846 Returns a 2-tuple of (offset, data) for the requested range of
847 revisions. Offset is the integer offset from the beginning of the
847 revisions. Offset is the integer offset from the beginning of the
848 revlog and data is a str or buffer of the raw byte data.
848 revlog and data is a str or buffer of the raw byte data.
849
849
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
851 to determine where each revision's data begins and ends.
851 to determine where each revision's data begins and ends.
852
852
853 API: we should consider making this a private part of the InnerRevlog
853 API: we should consider making this a private part of the InnerRevlog
854 at some point.
854 at some point.
855 """
855 """
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
857 # (functions are expensive).
857 # (functions are expensive).
858 index = self.index
858 index = self.index
859 istart = index[startrev]
859 istart = index[startrev]
860 start = int(istart[0] >> 16)
860 start = int(istart[0] >> 16)
861 if startrev == endrev:
861 if startrev == endrev:
862 end = start + istart[1]
862 end = start + istart[1]
863 else:
863 else:
864 iend = index[endrev]
864 iend = index[endrev]
865 end = int(iend[0] >> 16) + iend[1]
865 end = int(iend[0] >> 16) + iend[1]
866
866
867 if self.inline:
867 if self.inline:
868 start += (startrev + 1) * self.index.entry_size
868 start += (startrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
870 length = end - start
870 length = end - start
871
871
872 return start, self._segmentfile.read_chunk(start, length)
872 return start, self._segmentfile.read_chunk(start, length)
873
873
874 def _chunk(self, rev):
874 def _chunk(self, rev):
875 """Obtain a single decompressed chunk for a revision.
875 """Obtain a single decompressed chunk for a revision.
876
876
877 Accepts an integer revision and an optional already-open file handle
877 Accepts an integer revision and an optional already-open file handle
878 to be used for reading. If used, the seek position of the file will not
878 to be used for reading. If used, the seek position of the file will not
879 be preserved.
879 be preserved.
880
880
881 Returns a str holding uncompressed data for the requested revision.
881 Returns a str holding uncompressed data for the requested revision.
882 """
882 """
883 if self._uncompressed_chunk_cache is not None:
883 if self._uncompressed_chunk_cache is not None:
884 uncomp = self._uncompressed_chunk_cache.get(rev)
884 uncomp = self._uncompressed_chunk_cache.get(rev)
885 if uncomp is not None:
885 if uncomp is not None:
886 return uncomp
886 return uncomp
887
887
888 compression_mode = self.index[rev][10]
888 compression_mode = self.index[rev][10]
889 data = self.get_segment_for_revs(rev, rev)[1]
889 data = self.get_segment_for_revs(rev, rev)[1]
890 if compression_mode == COMP_MODE_PLAIN:
890 if compression_mode == COMP_MODE_PLAIN:
891 uncomp = data
891 uncomp = data
892 elif compression_mode == COMP_MODE_DEFAULT:
892 elif compression_mode == COMP_MODE_DEFAULT:
893 uncomp = self._decompressor(data)
893 uncomp = self._decompressor(data)
894 elif compression_mode == COMP_MODE_INLINE:
894 elif compression_mode == COMP_MODE_INLINE:
895 uncomp = self.decompress(data)
895 uncomp = self.decompress(data)
896 else:
896 else:
897 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
898 msg %= compression_mode
898 msg %= compression_mode
899 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
900 if self._uncompressed_chunk_cache is not None:
900 if self._uncompressed_chunk_cache is not None:
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
902 return uncomp
902 return uncomp
903
903
904 def _chunks(self, revs, targetsize=None):
904 def _chunks(self, revs, targetsize=None):
905 """Obtain decompressed chunks for the specified revisions.
905 """Obtain decompressed chunks for the specified revisions.
906
906
907 Accepts an iterable of numeric revisions that are assumed to be in
907 Accepts an iterable of numeric revisions that are assumed to be in
908 ascending order. Also accepts an optional already-open file handle
908 ascending order. Also accepts an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will
909 to be used for reading. If used, the seek position of the file will
910 not be preserved.
910 not be preserved.
911
911
912 This function is similar to calling ``self._chunk()`` multiple times,
912 This function is similar to calling ``self._chunk()`` multiple times,
913 but is faster.
913 but is faster.
914
914
915 Returns a list with decompressed data for each requested revision.
915 Returns a list with decompressed data for each requested revision.
916 """
916 """
917 if not revs:
917 if not revs:
918 return []
918 return []
919 start = self.start
919 start = self.start
920 length = self.length
920 length = self.length
921 inline = self.inline
921 inline = self.inline
922 iosize = self.index.entry_size
922 iosize = self.index.entry_size
923 buffer = util.buffer
923 buffer = util.buffer
924
924
925 fetched_revs = []
925 fetched_revs = []
926 fadd = fetched_revs.append
926 fadd = fetched_revs.append
927
927
928 chunks = []
928 chunks = []
929 ladd = chunks.append
929 ladd = chunks.append
930
930
931 if self._uncompressed_chunk_cache is None:
931 if self._uncompressed_chunk_cache is None:
932 fetched_revs = revs
932 fetched_revs = revs
933 else:
933 else:
934 for rev in revs:
934 for rev in revs:
935 cached_value = self._uncompressed_chunk_cache.get(rev)
935 cached_value = self._uncompressed_chunk_cache.get(rev)
936 if cached_value is None:
936 if cached_value is None:
937 fadd(rev)
937 fadd(rev)
938 else:
938 else:
939 ladd((rev, cached_value))
939 ladd((rev, cached_value))
940
940
941 if not fetched_revs:
941 if not fetched_revs:
942 slicedchunks = ()
942 slicedchunks = ()
943 elif not self.data_config.with_sparse_read:
943 elif not self.data_config.with_sparse_read:
944 slicedchunks = (fetched_revs,)
944 slicedchunks = (fetched_revs,)
945 else:
945 else:
946 slicedchunks = deltautil.slicechunk(
946 slicedchunks = deltautil.slicechunk(
947 self,
947 self,
948 fetched_revs,
948 fetched_revs,
949 targetsize=targetsize,
949 targetsize=targetsize,
950 )
950 )
951
951
952 for revschunk in slicedchunks:
952 for revschunk in slicedchunks:
953 firstrev = revschunk[0]
953 firstrev = revschunk[0]
954 # Skip trailing revisions with empty diff
954 # Skip trailing revisions with empty diff
955 for lastrev in revschunk[::-1]:
955 for lastrev in revschunk[::-1]:
956 if length(lastrev) != 0:
956 if length(lastrev) != 0:
957 break
957 break
958
958
959 try:
959 try:
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
961 except OverflowError:
961 except OverflowError:
962 # issue4215 - we can't cache a run of chunks greater than
962 # issue4215 - we can't cache a run of chunks greater than
963 # 2G on Windows
963 # 2G on Windows
964 for rev in revschunk:
964 for rev in revschunk:
965 ladd((rev, self._chunk(rev)))
965 ladd((rev, self._chunk(rev)))
966
966
967 decomp = self.decompress
967 decomp = self.decompress
968 # self._decompressor might be None, but will not be used in that case
968 # self._decompressor might be None, but will not be used in that case
969 def_decomp = self._decompressor
969 def_decomp = self._decompressor
970 for rev in revschunk:
970 for rev in revschunk:
971 chunkstart = start(rev)
971 chunkstart = start(rev)
972 if inline:
972 if inline:
973 chunkstart += (rev + 1) * iosize
973 chunkstart += (rev + 1) * iosize
974 chunklength = length(rev)
974 chunklength = length(rev)
975 comp_mode = self.index[rev][10]
975 comp_mode = self.index[rev][10]
976 c = buffer(data, chunkstart - offset, chunklength)
976 c = buffer(data, chunkstart - offset, chunklength)
977 if comp_mode == COMP_MODE_PLAIN:
977 if comp_mode == COMP_MODE_PLAIN:
978 c = c
978 c = c
979 elif comp_mode == COMP_MODE_INLINE:
979 elif comp_mode == COMP_MODE_INLINE:
980 c = decomp(c)
980 c = decomp(c)
981 elif comp_mode == COMP_MODE_DEFAULT:
981 elif comp_mode == COMP_MODE_DEFAULT:
982 c = def_decomp(c)
982 c = def_decomp(c)
983 else:
983 else:
984 msg = b'unknown compression mode %d'
984 msg = b'unknown compression mode %d'
985 msg %= comp_mode
985 msg %= comp_mode
986 raise error.RevlogError(msg)
986 raise error.RevlogError(msg)
987 ladd((rev, c))
987 ladd((rev, c))
988 if self._uncompressed_chunk_cache is not None:
988 if self._uncompressed_chunk_cache is not None:
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
990
990
991 chunks.sort()
991 chunks.sort()
992 return [x[1] for x in chunks]
992 return [x[1] for x in chunks]
993
993
994 def raw_text(self, node, rev):
994 def raw_text(self, node, rev):
995 """return the possibly unvalidated rawtext for a revision
995 """return the possibly unvalidated rawtext for a revision
996
996
997 returns (rev, rawtext, validated)
997 returns (rev, rawtext, validated)
998 """
998 """
999
999
1000 # revision in the cache (could be useful to apply delta)
1000 # revision in the cache (could be useful to apply delta)
1001 cachedrev = None
1001 cachedrev = None
1002 # An intermediate text to apply deltas to
1002 # An intermediate text to apply deltas to
1003 basetext = None
1003 basetext = None
1004
1004
1005 # Check if we have the entry in cache
1005 # Check if we have the entry in cache
1006 # The cache entry looks like (node, rev, rawtext)
1006 # The cache entry looks like (node, rev, rawtext)
1007 if self._revisioncache:
1007 if self._revisioncache:
1008 cachedrev = self._revisioncache[1]
1008 cachedrev = self._revisioncache[1]
1009
1009
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1011 if stopped:
1011 if stopped:
1012 basetext = self._revisioncache[2]
1012 basetext = self._revisioncache[2]
1013
1013
1014 # drop cache to save memory, the caller is expected to
1014 # drop cache to save memory, the caller is expected to
1015 # update self._inner._revisioncache after validating the text
1015 # update self._inner._revisioncache after validating the text
1016 self._revisioncache = None
1016 self._revisioncache = None
1017
1017
1018 targetsize = None
1018 targetsize = None
1019 rawsize = self.index[rev][2]
1019 rawsize = self.index[rev][2]
1020 if 0 <= rawsize:
1020 if 0 <= rawsize:
1021 targetsize = 4 * rawsize
1021 targetsize = 4 * rawsize
1022
1022
1023 if self._uncompressed_chunk_cache is not None:
1023 if self._uncompressed_chunk_cache is not None:
1024 # dynamically update the uncompressed_chunk_cache size to the
1024 # dynamically update the uncompressed_chunk_cache size to the
1025 # largest revision we saw in this revlog.
1025 # largest revision we saw in this revlog.
1026 factor = self.data_config.uncompressed_cache_factor
1026 factor = self.data_config.uncompressed_cache_factor
1027 candidate_size = rawsize * factor
1027 candidate_size = rawsize * factor
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1030
1030
1031 bins = self._chunks(chain, targetsize=targetsize)
1031 bins = self._chunks(chain, targetsize=targetsize)
1032 if basetext is None:
1032 if basetext is None:
1033 basetext = bytes(bins[0])
1033 basetext = bytes(bins[0])
1034 bins = bins[1:]
1034 bins = bins[1:]
1035
1035
1036 rawtext = mdiff.patches(basetext, bins)
1036 rawtext = mdiff.patches(basetext, bins)
1037 del basetext # let us have a chance to free memory early
1037 del basetext # let us have a chance to free memory early
1038 return (rev, rawtext, False)
1038 return (rev, rawtext, False)
1039
1039
1040 def sidedata(self, rev, sidedata_end):
1040 def sidedata(self, rev, sidedata_end):
1041 """Return the sidedata for a given revision number."""
1041 """Return the sidedata for a given revision number."""
1042 index_entry = self.index[rev]
1042 index_entry = self.index[rev]
1043 sidedata_offset = index_entry[8]
1043 sidedata_offset = index_entry[8]
1044 sidedata_size = index_entry[9]
1044 sidedata_size = index_entry[9]
1045
1045
1046 if self.inline:
1046 if self.inline:
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1048 if sidedata_size == 0:
1048 if sidedata_size == 0:
1049 return {}
1049 return {}
1050
1050
1051 if sidedata_end < sidedata_offset + sidedata_size:
1051 if sidedata_end < sidedata_offset + sidedata_size:
1052 filename = self.sidedata_file
1052 filename = self.sidedata_file
1053 end = sidedata_end
1053 end = sidedata_end
1054 offset = sidedata_offset
1054 offset = sidedata_offset
1055 length = sidedata_size
1055 length = sidedata_size
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1057 raise error.RevlogError(m)
1057 raise error.RevlogError(m)
1058
1058
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1060 sidedata_offset, sidedata_size
1060 sidedata_offset, sidedata_size
1061 )
1061 )
1062
1062
1063 comp = self.index[rev][11]
1063 comp = self.index[rev][11]
1064 if comp == COMP_MODE_PLAIN:
1064 if comp == COMP_MODE_PLAIN:
1065 segment = comp_segment
1065 segment = comp_segment
1066 elif comp == COMP_MODE_DEFAULT:
1066 elif comp == COMP_MODE_DEFAULT:
1067 segment = self._decompressor(comp_segment)
1067 segment = self._decompressor(comp_segment)
1068 elif comp == COMP_MODE_INLINE:
1068 elif comp == COMP_MODE_INLINE:
1069 segment = self.decompress(comp_segment)
1069 segment = self.decompress(comp_segment)
1070 else:
1070 else:
1071 msg = b'unknown compression mode %d'
1071 msg = b'unknown compression mode %d'
1072 msg %= comp
1072 msg %= comp
1073 raise error.RevlogError(msg)
1073 raise error.RevlogError(msg)
1074
1074
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1076 return sidedata
1076 return sidedata
1077
1077
1078 def write_entry(
1078 def write_entry(
1079 self,
1079 self,
1080 transaction,
1080 transaction,
1081 entry,
1081 entry,
1082 data,
1082 data,
1083 link,
1083 link,
1084 offset,
1084 offset,
1085 sidedata,
1085 sidedata,
1086 sidedata_offset,
1086 sidedata_offset,
1087 index_end,
1087 index_end,
1088 data_end,
1088 data_end,
1089 sidedata_end,
1089 sidedata_end,
1090 ):
1090 ):
1091 # Files opened in a+ mode have inconsistent behavior on various
1091 # Files opened in a+ mode have inconsistent behavior on various
1092 # platforms. Windows requires that a file positioning call be made
1092 # platforms. Windows requires that a file positioning call be made
1093 # when the file handle transitions between reads and writes. See
1093 # when the file handle transitions between reads and writes. See
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1095 # platforms, Python or the platform itself can be buggy. Some versions
1095 # platforms, Python or the platform itself can be buggy. Some versions
1096 # of Solaris have been observed to not append at the end of the file
1096 # of Solaris have been observed to not append at the end of the file
1097 # if the file was seeked to before the end. See issue4943 for more.
1097 # if the file was seeked to before the end. See issue4943 for more.
1098 #
1098 #
1099 # We work around this issue by inserting a seek() before writing.
1099 # We work around this issue by inserting a seek() before writing.
1100 # Note: This is likely not necessary on Python 3. However, because
1100 # Note: This is likely not necessary on Python 3. However, because
1101 # the file handle is reused for reads and may be seeked there, we need
1101 # the file handle is reused for reads and may be seeked there, we need
1102 # to be careful before changing this.
1102 # to be careful before changing this.
1103 if self._writinghandles is None:
1103 if self._writinghandles is None:
1104 msg = b'adding revision outside `revlog._writing` context'
1104 msg = b'adding revision outside `revlog._writing` context'
1105 raise error.ProgrammingError(msg)
1105 raise error.ProgrammingError(msg)
1106 ifh, dfh, sdfh = self._writinghandles
1106 ifh, dfh, sdfh = self._writinghandles
1107 if index_end is None:
1107 if index_end is None:
1108 ifh.seek(0, os.SEEK_END)
1108 ifh.seek(0, os.SEEK_END)
1109 else:
1109 else:
1110 ifh.seek(index_end, os.SEEK_SET)
1110 ifh.seek(index_end, os.SEEK_SET)
1111 if dfh:
1111 if dfh:
1112 if data_end is None:
1112 if data_end is None:
1113 dfh.seek(0, os.SEEK_END)
1113 dfh.seek(0, os.SEEK_END)
1114 else:
1114 else:
1115 dfh.seek(data_end, os.SEEK_SET)
1115 dfh.seek(data_end, os.SEEK_SET)
1116 if sdfh:
1116 if sdfh:
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1118
1118
1119 curr = len(self.index) - 1
1119 curr = len(self.index) - 1
1120 if not self.inline:
1120 if not self.inline:
1121 transaction.add(self.data_file, offset)
1121 transaction.add(self.data_file, offset)
1122 if self.sidedata_file:
1122 if self.sidedata_file:
1123 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.sidedata_file, sidedata_offset)
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1125 if data[0]:
1125 if data[0]:
1126 dfh.write(data[0])
1126 dfh.write(data[0])
1127 dfh.write(data[1])
1127 dfh.write(data[1])
1128 if sidedata:
1128 if sidedata:
1129 sdfh.write(sidedata)
1129 sdfh.write(sidedata)
1130 if self._delay_buffer is None:
1130 if self._delay_buffer is None:
1131 ifh.write(entry)
1131 ifh.write(entry)
1132 else:
1132 else:
1133 self._delay_buffer.append(entry)
1133 self._delay_buffer.append(entry)
1134 elif self._delay_buffer is not None:
1134 elif self._delay_buffer is not None:
1135 msg = b'invalid delayed write on inline revlog'
1135 msg = b'invalid delayed write on inline revlog'
1136 raise error.ProgrammingError(msg)
1136 raise error.ProgrammingError(msg)
1137 else:
1137 else:
1138 offset += curr * self.index.entry_size
1138 offset += curr * self.index.entry_size
1139 transaction.add(self.canonical_index_file, offset)
1139 transaction.add(self.canonical_index_file, offset)
1140 assert not sidedata
1140 assert not sidedata
1141 ifh.write(entry)
1141 ifh.write(entry)
1142 ifh.write(data[0])
1142 ifh.write(data[0])
1143 ifh.write(data[1])
1143 ifh.write(data[1])
1144 return (
1144 return (
1145 ifh.tell(),
1145 ifh.tell(),
1146 dfh.tell() if dfh else None,
1146 dfh.tell() if dfh else None,
1147 sdfh.tell() if sdfh else None,
1147 sdfh.tell() if sdfh else None,
1148 )
1148 )
1149
1149
1150 def _divert_index(self):
1150 def _divert_index(self):
1151 return self.index_file + b'.a'
1151 return self.index_file + b'.a'
1152
1152
1153 def delay(self):
1153 def delay(self):
1154 assert not self.is_open
1154 assert not self.is_open
1155 if self.inline:
1155 if self.inline:
1156 msg = "revlog with delayed write should not be inline"
1156 msg = "revlog with delayed write should not be inline"
1157 raise error.ProgrammingError(msg)
1157 raise error.ProgrammingError(msg)
1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1159 # delay or divert already in place
1159 # delay or divert already in place
1160 return None
1160 return None
1161 elif len(self.index) == 0:
1161 elif len(self.index) == 0:
1162 self._orig_index_file = self.index_file
1162 self._orig_index_file = self.index_file
1163 self.index_file = self._divert_index()
1163 self.index_file = self._divert_index()
1164 assert self._orig_index_file is not None
1164 assert self._orig_index_file is not None
1165 assert self.index_file is not None
1165 assert self.index_file is not None
1166 if self.opener.exists(self.index_file):
1166 if self.opener.exists(self.index_file):
1167 self.opener.unlink(self.index_file)
1167 self.opener.unlink(self.index_file)
1168 return self.index_file
1168 return self.index_file
1169 else:
1169 else:
1170 self._delay_buffer = []
1170 self._delay_buffer = []
1171 return None
1171 return None
1172
1172
1173 def write_pending(self):
1173 def write_pending(self):
1174 assert not self.is_open
1174 assert not self.is_open
1175 if self.inline:
1175 if self.inline:
1176 msg = "revlog with delayed write should not be inline"
1176 msg = "revlog with delayed write should not be inline"
1177 raise error.ProgrammingError(msg)
1177 raise error.ProgrammingError(msg)
1178 if self._orig_index_file is not None:
1178 if self._orig_index_file is not None:
1179 return None, True
1179 return None, True
1180 any_pending = False
1180 any_pending = False
1181 pending_index_file = self._divert_index()
1181 pending_index_file = self._divert_index()
1182 if self.opener.exists(pending_index_file):
1182 if self.opener.exists(pending_index_file):
1183 self.opener.unlink(pending_index_file)
1183 self.opener.unlink(pending_index_file)
1184 util.copyfile(
1184 util.copyfile(
1185 self.opener.join(self.index_file),
1185 self.opener.join(self.index_file),
1186 self.opener.join(pending_index_file),
1186 self.opener.join(pending_index_file),
1187 )
1187 )
1188 if self._delay_buffer:
1188 if self._delay_buffer:
1189 with self.opener(pending_index_file, b'r+') as ifh:
1189 with self.opener(pending_index_file, b'r+') as ifh:
1190 ifh.seek(0, os.SEEK_END)
1190 ifh.seek(0, os.SEEK_END)
1191 ifh.write(b"".join(self._delay_buffer))
1191 ifh.write(b"".join(self._delay_buffer))
1192 any_pending = True
1192 any_pending = True
1193 self._delay_buffer = None
1193 self._delay_buffer = None
1194 self._orig_index_file = self.index_file
1194 self._orig_index_file = self.index_file
1195 self.index_file = pending_index_file
1195 self.index_file = pending_index_file
1196 return self.index_file, any_pending
1196 return self.index_file, any_pending
1197
1197
1198 def finalize_pending(self):
1198 def finalize_pending(self):
1199 assert not self.is_open
1199 assert not self.is_open
1200 if self.inline:
1200 if self.inline:
1201 msg = "revlog with delayed write should not be inline"
1201 msg = "revlog with delayed write should not be inline"
1202 raise error.ProgrammingError(msg)
1202 raise error.ProgrammingError(msg)
1203
1203
1204 delay = self._delay_buffer is not None
1204 delay = self._delay_buffer is not None
1205 divert = self._orig_index_file is not None
1205 divert = self._orig_index_file is not None
1206
1206
1207 if delay and divert:
1207 if delay and divert:
1208 assert False, "unreachable"
1208 assert False, "unreachable"
1209 elif delay:
1209 elif delay:
1210 if self._delay_buffer:
1210 if self._delay_buffer:
1211 with self.opener(self.index_file, b'r+') as ifh:
1211 with self.opener(self.index_file, b'r+') as ifh:
1212 ifh.seek(0, os.SEEK_END)
1212 ifh.seek(0, os.SEEK_END)
1213 ifh.write(b"".join(self._delay_buffer))
1213 ifh.write(b"".join(self._delay_buffer))
1214 self._delay_buffer = None
1214 self._delay_buffer = None
1215 elif divert:
1215 elif divert:
1216 if self.opener.exists(self.index_file):
1216 if self.opener.exists(self.index_file):
1217 self.opener.rename(
1217 self.opener.rename(
1218 self.index_file,
1218 self.index_file,
1219 self._orig_index_file,
1219 self._orig_index_file,
1220 checkambig=True,
1220 checkambig=True,
1221 )
1221 )
1222 self.index_file = self._orig_index_file
1222 self.index_file = self._orig_index_file
1223 self._orig_index_file = None
1223 self._orig_index_file = None
1224 else:
1224 else:
1225 msg = b"not delay or divert found on this revlog"
1225 msg = b"not delay or divert found on this revlog"
1226 raise error.ProgrammingError(msg)
1226 raise error.ProgrammingError(msg)
1227 return self.canonical_index_file
1227 return self.canonical_index_file
1228
1228
1229
1229
1230 class revlog:
1230 class revlog:
1231 """
1231 """
1232 the underlying revision storage object
1232 the underlying revision storage object
1233
1233
1234 A revlog consists of two parts, an index and the revision data.
1234 A revlog consists of two parts, an index and the revision data.
1235
1235
1236 The index is a file with a fixed record size containing
1236 The index is a file with a fixed record size containing
1237 information on each revision, including its nodeid (hash), the
1237 information on each revision, including its nodeid (hash), the
1238 nodeids of its parents, the position and offset of its data within
1238 nodeids of its parents, the position and offset of its data within
1239 the data file, and the revision it's based on. Finally, each entry
1239 the data file, and the revision it's based on. Finally, each entry
1240 contains a linkrev entry that can serve as a pointer to external
1240 contains a linkrev entry that can serve as a pointer to external
1241 data.
1241 data.
1242
1242
1243 The revision data itself is a linear collection of data chunks.
1243 The revision data itself is a linear collection of data chunks.
1244 Each chunk represents a revision and is usually represented as a
1244 Each chunk represents a revision and is usually represented as a
1245 delta against the previous chunk. To bound lookup time, runs of
1245 delta against the previous chunk. To bound lookup time, runs of
1246 deltas are limited to about 2 times the length of the original
1246 deltas are limited to about 2 times the length of the original
1247 version data. This makes retrieval of a version proportional to
1247 version data. This makes retrieval of a version proportional to
1248 its size, or O(1) relative to the number of revisions.
1248 its size, or O(1) relative to the number of revisions.
1249
1249
1250 Both pieces of the revlog are written to in an append-only
1250 Both pieces of the revlog are written to in an append-only
1251 fashion, which means we never need to rewrite a file to insert or
1251 fashion, which means we never need to rewrite a file to insert or
1252 remove data, and can use some simple techniques to avoid the need
1252 remove data, and can use some simple techniques to avoid the need
1253 for locking while reading.
1253 for locking while reading.
1254
1254
1255 If checkambig, indexfile is opened with checkambig=True at
1255 If checkambig, indexfile is opened with checkambig=True at
1256 writing, to avoid file stat ambiguity.
1256 writing, to avoid file stat ambiguity.
1257
1257
1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1259 index will be mmapped rather than read if it is larger than the
1259 index will be mmapped rather than read if it is larger than the
1260 configured threshold.
1260 configured threshold.
1261
1261
1262 If censorable is True, the revlog can have censored revisions.
1262 If censorable is True, the revlog can have censored revisions.
1263
1263
1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1265 compression for the data content.
1265 compression for the data content.
1266
1266
1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1268 file handle, a filename, and an expected position. It should check whether
1268 file handle, a filename, and an expected position. It should check whether
1269 the current position in the file handle is valid, and log/warn/fail (by
1269 the current position in the file handle is valid, and log/warn/fail (by
1270 raising).
1270 raising).
1271
1271
1272 See mercurial/revlogutils/contants.py for details about the content of an
1272 See mercurial/revlogutils/contants.py for details about the content of an
1273 index entry.
1273 index entry.
1274 """
1274 """
1275
1275
1276 _flagserrorclass = error.RevlogError
1276 _flagserrorclass = error.RevlogError
1277
1277
1278 @staticmethod
1278 @staticmethod
1279 def is_inline_index(header_bytes):
1279 def is_inline_index(header_bytes):
1280 """Determine if a revlog is inline from the initial bytes of the index"""
1280 """Determine if a revlog is inline from the initial bytes of the index"""
1281 if len(header_bytes) == 0:
1281 if len(header_bytes) == 0:
1282 return True
1282 return True
1283
1283
1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1285
1285
1286 _format_flags = header & ~0xFFFF
1286 _format_flags = header & ~0xFFFF
1287 _format_version = header & 0xFFFF
1287 _format_version = header & 0xFFFF
1288
1288
1289 features = FEATURES_BY_VERSION[_format_version]
1289 features = FEATURES_BY_VERSION[_format_version]
1290 return features[b'inline'](_format_flags)
1290 return features[b'inline'](_format_flags)
1291
1291
1292 def __init__(
1292 def __init__(
1293 self,
1293 self,
1294 opener,
1294 opener,
1295 target,
1295 target,
1296 radix,
1296 radix,
1297 postfix=None, # only exist for `tmpcensored` now
1297 postfix=None, # only exist for `tmpcensored` now
1298 checkambig=False,
1298 checkambig=False,
1299 mmaplargeindex=False,
1299 mmaplargeindex=False,
1300 censorable=False,
1300 censorable=False,
1301 upperboundcomp=None,
1301 upperboundcomp=None,
1302 persistentnodemap=False,
1302 persistentnodemap=False,
1303 concurrencychecker=None,
1303 concurrencychecker=None,
1304 trypending=False,
1304 trypending=False,
1305 try_split=False,
1305 try_split=False,
1306 canonical_parent_order=True,
1306 canonical_parent_order=True,
1307 data_config=None,
1307 data_config=None,
1308 delta_config=None,
1308 delta_config=None,
1309 feature_config=None,
1309 feature_config=None,
1310 may_inline=True, # may inline new revlog
1310 may_inline=True, # may inline new revlog
1311 ):
1311 ):
1312 """
1312 """
1313 create a revlog object
1313 create a revlog object
1314
1314
1315 opener is a function that abstracts the file opening operation
1315 opener is a function that abstracts the file opening operation
1316 and can be used to implement COW semantics or the like.
1316 and can be used to implement COW semantics or the like.
1317
1317
1318 `target`: a (KIND, ID) tuple that identify the content stored in
1318 `target`: a (KIND, ID) tuple that identify the content stored in
1319 this revlog. It help the rest of the code to understand what the revlog
1319 this revlog. It help the rest of the code to understand what the revlog
1320 is about without having to resort to heuristic and index filename
1320 is about without having to resort to heuristic and index filename
1321 analysis. Note: that this must be reliably be set by normal code, but
1321 analysis. Note: that this must be reliably be set by normal code, but
1322 that test, debug, or performance measurement code might not set this to
1322 that test, debug, or performance measurement code might not set this to
1323 accurate value.
1323 accurate value.
1324 """
1324 """
1325
1325
1326 self.radix = radix
1326 self.radix = radix
1327
1327
1328 self._docket_file = None
1328 self._docket_file = None
1329 self._indexfile = None
1329 self._indexfile = None
1330 self._datafile = None
1330 self._datafile = None
1331 self._sidedatafile = None
1331 self._sidedatafile = None
1332 self._nodemap_file = None
1332 self._nodemap_file = None
1333 self.postfix = postfix
1333 self.postfix = postfix
1334 self._trypending = trypending
1334 self._trypending = trypending
1335 self._try_split = try_split
1335 self._try_split = try_split
1336 self._may_inline = may_inline
1336 self._may_inline = may_inline
1337 self.opener = opener
1337 self.opener = opener
1338 if persistentnodemap:
1338 if persistentnodemap:
1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1340
1340
1341 assert target[0] in ALL_KINDS
1341 assert target[0] in ALL_KINDS
1342 assert len(target) == 2
1342 assert len(target) == 2
1343 self.target = target
1343 self.target = target
1344 if feature_config is not None:
1344 if feature_config is not None:
1345 self.feature_config = feature_config.copy()
1345 self.feature_config = feature_config.copy()
1346 elif b'feature-config' in self.opener.options:
1346 elif b'feature-config' in self.opener.options:
1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1348 else:
1348 else:
1349 self.feature_config = FeatureConfig()
1349 self.feature_config = FeatureConfig()
1350 self.feature_config.censorable = censorable
1350 self.feature_config.censorable = censorable
1351 self.feature_config.canonical_parent_order = canonical_parent_order
1351 self.feature_config.canonical_parent_order = canonical_parent_order
1352 if data_config is not None:
1352 if data_config is not None:
1353 self.data_config = data_config.copy()
1353 self.data_config = data_config.copy()
1354 elif b'data-config' in self.opener.options:
1354 elif b'data-config' in self.opener.options:
1355 self.data_config = self.opener.options[b'data-config'].copy()
1355 self.data_config = self.opener.options[b'data-config'].copy()
1356 else:
1356 else:
1357 self.data_config = DataConfig()
1357 self.data_config = DataConfig()
1358 self.data_config.check_ambig = checkambig
1358 self.data_config.check_ambig = checkambig
1359 self.data_config.mmap_large_index = mmaplargeindex
1359 self.data_config.mmap_large_index = mmaplargeindex
1360 if delta_config is not None:
1360 if delta_config is not None:
1361 self.delta_config = delta_config.copy()
1361 self.delta_config = delta_config.copy()
1362 elif b'delta-config' in self.opener.options:
1362 elif b'delta-config' in self.opener.options:
1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1364 else:
1364 else:
1365 self.delta_config = DeltaConfig()
1365 self.delta_config = DeltaConfig()
1366 self.delta_config.upper_bound_comp = upperboundcomp
1366 self.delta_config.upper_bound_comp = upperboundcomp
1367
1367
1368 # Maps rev to chain base rev.
1368 # Maps rev to chain base rev.
1369 self._chainbasecache = util.lrucachedict(100)
1369 self._chainbasecache = util.lrucachedict(100)
1370
1370
1371 self.index = None
1371 self.index = None
1372 self._docket = None
1372 self._docket = None
1373 self._nodemap_docket = None
1373 self._nodemap_docket = None
1374 # Mapping of partial identifiers to full nodes.
1374 # Mapping of partial identifiers to full nodes.
1375 self._pcache = {}
1375 self._pcache = {}
1376
1376
1377 # other optionnals features
1377 # other optionnals features
1378
1378
1379 # Make copy of flag processors so each revlog instance can support
1379 # Make copy of flag processors so each revlog instance can support
1380 # custom flags.
1380 # custom flags.
1381 self._flagprocessors = dict(flagutil.flagprocessors)
1381 self._flagprocessors = dict(flagutil.flagprocessors)
1382 # prevent nesting of addgroup
1382 # prevent nesting of addgroup
1383 self._adding_group = None
1383 self._adding_group = None
1384
1384
1385 chunk_cache = self._loadindex()
1385 chunk_cache = self._loadindex()
1386 self._load_inner(chunk_cache)
1386 self._load_inner(chunk_cache)
1387 self._concurrencychecker = concurrencychecker
1387 self._concurrencychecker = concurrencychecker
1388
1388
1389 def _init_opts(self):
1389 def _init_opts(self):
1390 """process options (from above/config) to setup associated default revlog mode
1390 """process options (from above/config) to setup associated default revlog mode
1391
1391
1392 These values might be affected when actually reading on disk information.
1392 These values might be affected when actually reading on disk information.
1393
1393
1394 The relevant values are returned for use in _loadindex().
1394 The relevant values are returned for use in _loadindex().
1395
1395
1396 * newversionflags:
1396 * newversionflags:
1397 version header to use if we need to create a new revlog
1397 version header to use if we need to create a new revlog
1398
1398
1399 * mmapindexthreshold:
1399 * mmapindexthreshold:
1400 minimal index size for start to use mmap
1400 minimal index size for start to use mmap
1401
1401
1402 * force_nodemap:
1402 * force_nodemap:
1403 force the usage of a "development" version of the nodemap code
1403 force the usage of a "development" version of the nodemap code
1404 """
1404 """
1405 opts = self.opener.options
1405 opts = self.opener.options
1406
1406
1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1408 new_header = CHANGELOGV2
1408 new_header = CHANGELOGV2
1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1410 self.feature_config.compute_rank = compute_rank
1410 self.feature_config.compute_rank = compute_rank
1411 elif b'revlogv2' in opts:
1411 elif b'revlogv2' in opts:
1412 new_header = REVLOGV2
1412 new_header = REVLOGV2
1413 elif b'revlogv1' in opts:
1413 elif b'revlogv1' in opts:
1414 new_header = REVLOGV1
1414 new_header = REVLOGV1
1415 if self._may_inline:
1415 if self._may_inline:
1416 new_header |= FLAG_INLINE_DATA
1416 new_header |= FLAG_INLINE_DATA
1417 if b'generaldelta' in opts:
1417 if b'generaldelta' in opts:
1418 new_header |= FLAG_GENERALDELTA
1418 new_header |= FLAG_GENERALDELTA
1419 elif b'revlogv0' in self.opener.options:
1419 elif b'revlogv0' in self.opener.options:
1420 new_header = REVLOGV0
1420 new_header = REVLOGV0
1421 else:
1421 else:
1422 new_header = REVLOG_DEFAULT_VERSION
1422 new_header = REVLOG_DEFAULT_VERSION
1423
1423
1424 mmapindexthreshold = None
1424 mmapindexthreshold = None
1425 if self.data_config.mmap_large_index:
1425 if self.data_config.mmap_large_index:
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1427 if self.feature_config.enable_ellipsis:
1427 if self.feature_config.enable_ellipsis:
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1429
1429
1430 # revlog v0 doesn't have flag processors
1430 # revlog v0 doesn't have flag processors
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1433
1433
1434 chunk_cache_size = self.data_config.chunk_cache_size
1434 chunk_cache_size = self.data_config.chunk_cache_size
1435 if chunk_cache_size <= 0:
1435 if chunk_cache_size <= 0:
1436 raise error.RevlogError(
1436 raise error.RevlogError(
1437 _(b'revlog chunk cache size %r is not greater than 0')
1437 _(b'revlog chunk cache size %r is not greater than 0')
1438 % chunk_cache_size
1438 % chunk_cache_size
1439 )
1439 )
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1441 raise error.RevlogError(
1441 raise error.RevlogError(
1442 _(b'revlog chunk cache size %r is not a power of 2')
1442 _(b'revlog chunk cache size %r is not a power of 2')
1443 % chunk_cache_size
1443 % chunk_cache_size
1444 )
1444 )
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1446 return new_header, mmapindexthreshold, force_nodemap
1446 return new_header, mmapindexthreshold, force_nodemap
1447
1447
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1449 """return a file content with or without mmap
1449 """return a file content with or without mmap
1450
1450
1451 If the file is missing return the empty string"""
1451 If the file is missing return the empty string"""
1452 try:
1452 try:
1453 with self.opener(filepath) as fp:
1453 with self.opener(filepath) as fp:
1454 if mmap_threshold is not None:
1454 if mmap_threshold is not None:
1455 file_size = self.opener.fstat(fp).st_size
1455 file_size = self.opener.fstat(fp).st_size
1456 if file_size >= mmap_threshold:
1456 if file_size >= mmap_threshold:
1457 if size is not None:
1457 if size is not None:
1458 # avoid potentiel mmap crash
1458 # avoid potentiel mmap crash
1459 size = min(file_size, size)
1459 size = min(file_size, size)
1460 # TODO: should .close() to release resources without
1460 # TODO: should .close() to release resources without
1461 # relying on Python GC
1461 # relying on Python GC
1462 if size is None:
1462 if size is None:
1463 return util.buffer(util.mmapread(fp))
1463 return util.buffer(util.mmapread(fp))
1464 else:
1464 else:
1465 return util.buffer(util.mmapread(fp, size))
1465 return util.buffer(util.mmapread(fp, size))
1466 if size is None:
1466 if size is None:
1467 return fp.read()
1467 return fp.read()
1468 else:
1468 else:
1469 return fp.read(size)
1469 return fp.read(size)
1470 except FileNotFoundError:
1470 except FileNotFoundError:
1471 return b''
1471 return b''
1472
1472
1473 def get_streams(self, max_linkrev, force_inline=False):
1473 def get_streams(self, max_linkrev, force_inline=False):
1474 """return a list of streams that represent this revlog
1474 """return a list of streams that represent this revlog
1475
1475
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1477
1477
1478 This streams data for all revisions that refer to a changelog revision up
1478 This streams data for all revisions that refer to a changelog revision up
1479 to `max_linkrev`.
1479 to `max_linkrev`.
1480
1480
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1482
1482
1483 It returns is a list of three-tuple:
1483 It returns is a list of three-tuple:
1484
1484
1485 [
1485 [
1486 (filename, bytes_stream, stream_size),
1486 (filename, bytes_stream, stream_size),
1487
1487
1488 ]
1488 ]
1489 """
1489 """
1490 n = len(self)
1490 n = len(self)
1491 index = self.index
1491 index = self.index
1492 while n > 0:
1492 while n > 0:
1493 linkrev = index[n - 1][4]
1493 linkrev = index[n - 1][4]
1494 if linkrev < max_linkrev:
1494 if linkrev < max_linkrev:
1495 break
1495 break
1496 # note: this loop will rarely go through multiple iterations, since
1496 # note: this loop will rarely go through multiple iterations, since
1497 # it only traverses commits created during the current streaming
1497 # it only traverses commits created during the current streaming
1498 # pull operation.
1498 # pull operation.
1499 #
1499 #
1500 # If this become a problem, using a binary search should cap the
1500 # If this become a problem, using a binary search should cap the
1501 # runtime of this.
1501 # runtime of this.
1502 n = n - 1
1502 n = n - 1
1503 if n == 0:
1503 if n == 0:
1504 # no data to send
1504 # no data to send
1505 return []
1505 return []
1506 index_size = n * index.entry_size
1506 index_size = n * index.entry_size
1507 data_size = self.end(n - 1)
1507 data_size = self.end(n - 1)
1508
1508
1509 # XXX we might have been split (or stripped) since the object
1509 # XXX we might have been split (or stripped) since the object
1510 # initialization, We need to close this race too, but having a way to
1510 # initialization, We need to close this race too, but having a way to
1511 # pre-open the file we feed to the revlog and never closing them before
1511 # pre-open the file we feed to the revlog and never closing them before
1512 # we are done streaming.
1512 # we are done streaming.
1513
1513
1514 if self._inline:
1514 if self._inline:
1515
1515
1516 def get_stream():
1516 def get_stream():
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1518 yield None
1518 yield None
1519 size = index_size + data_size
1519 size = index_size + data_size
1520 if size <= 65536:
1520 if size <= 65536:
1521 yield fp.read(size)
1521 yield fp.read(size)
1522 else:
1522 else:
1523 yield from util.filechunkiter(fp, limit=size)
1523 yield from util.filechunkiter(fp, limit=size)
1524
1524
1525 inline_stream = get_stream()
1525 inline_stream = get_stream()
1526 next(inline_stream)
1526 next(inline_stream)
1527 return [
1527 return [
1528 (self._indexfile, inline_stream, index_size + data_size),
1528 (self._indexfile, inline_stream, index_size + data_size),
1529 ]
1529 ]
1530 elif force_inline:
1530 elif force_inline:
1531
1531
1532 def get_stream():
1532 def get_stream():
1533 with self.reading():
1533 with self.reading():
1534 yield None
1534 yield None
1535
1535
1536 for rev in range(n):
1536 for rev in range(n):
1537 idx = self.index.entry_binary(rev)
1537 idx = self.index.entry_binary(rev)
1538 if rev == 0 and self._docket is None:
1538 if rev == 0 and self._docket is None:
1539 # re-inject the inline flag
1539 # re-inject the inline flag
1540 header = self._format_flags
1540 header = self._format_flags
1541 header |= self._format_version
1541 header |= self._format_version
1542 header |= FLAG_INLINE_DATA
1542 header |= FLAG_INLINE_DATA
1543 header = self.index.pack_header(header)
1543 header = self.index.pack_header(header)
1544 idx = header + idx
1544 idx = header + idx
1545 yield idx
1545 yield idx
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1547
1547
1548 inline_stream = get_stream()
1548 inline_stream = get_stream()
1549 next(inline_stream)
1549 next(inline_stream)
1550 return [
1550 return [
1551 (self._indexfile, inline_stream, index_size + data_size),
1551 (self._indexfile, inline_stream, index_size + data_size),
1552 ]
1552 ]
1553 else:
1553 else:
1554
1554
1555 def get_index_stream():
1555 def get_index_stream():
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1557 yield None
1557 yield None
1558 if index_size <= 65536:
1558 if index_size <= 65536:
1559 yield fp.read(index_size)
1559 yield fp.read(index_size)
1560 else:
1560 else:
1561 yield from util.filechunkiter(fp, limit=index_size)
1561 yield from util.filechunkiter(fp, limit=index_size)
1562
1562
1563 def get_data_stream():
1563 def get_data_stream():
1564 with self._datafp() as fp:
1564 with self._datafp() as fp:
1565 yield None
1565 yield None
1566 if data_size <= 65536:
1566 if data_size <= 65536:
1567 yield fp.read(data_size)
1567 yield fp.read(data_size)
1568 else:
1568 else:
1569 yield from util.filechunkiter(fp, limit=data_size)
1569 yield from util.filechunkiter(fp, limit=data_size)
1570
1570
1571 index_stream = get_index_stream()
1571 index_stream = get_index_stream()
1572 next(index_stream)
1572 next(index_stream)
1573 data_stream = get_data_stream()
1573 data_stream = get_data_stream()
1574 next(data_stream)
1574 next(data_stream)
1575 return [
1575 return [
1576 (self._datafile, data_stream, data_size),
1576 (self._datafile, data_stream, data_size),
1577 (self._indexfile, index_stream, index_size),
1577 (self._indexfile, index_stream, index_size),
1578 ]
1578 ]
1579
1579
1580 def _loadindex(self, docket=None):
1580 def _loadindex(self, docket=None):
1581
1581
1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1583
1583
1584 if self.postfix is not None:
1584 if self.postfix is not None:
1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1587 entry_point = b'%s.i.a' % self.radix
1587 entry_point = b'%s.i.a' % self.radix
1588 elif self._try_split and self.opener.exists(self._split_index_file):
1588 elif self._try_split and self.opener.exists(self._split_index_file):
1589 entry_point = self._split_index_file
1589 entry_point = self._split_index_file
1590 else:
1590 else:
1591 entry_point = b'%s.i' % self.radix
1591 entry_point = b'%s.i' % self.radix
1592
1592
1593 if docket is not None:
1593 if docket is not None:
1594 self._docket = docket
1594 self._docket = docket
1595 self._docket_file = entry_point
1595 self._docket_file = entry_point
1596 else:
1596 else:
1597 self._initempty = True
1597 self._initempty = True
1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1599 if len(entry_data) > 0:
1599 if len(entry_data) > 0:
1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1601 self._initempty = False
1601 self._initempty = False
1602 else:
1602 else:
1603 header = new_header
1603 header = new_header
1604
1604
1605 self._format_flags = header & ~0xFFFF
1605 self._format_flags = header & ~0xFFFF
1606 self._format_version = header & 0xFFFF
1606 self._format_version = header & 0xFFFF
1607
1607
1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1609 if supported_flags is None:
1609 if supported_flags is None:
1610 msg = _(b'unknown version (%d) in revlog %s')
1610 msg = _(b'unknown version (%d) in revlog %s')
1611 msg %= (self._format_version, self.display_id)
1611 msg %= (self._format_version, self.display_id)
1612 raise error.RevlogError(msg)
1612 raise error.RevlogError(msg)
1613 elif self._format_flags & ~supported_flags:
1613 elif self._format_flags & ~supported_flags:
1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1615 display_flag = self._format_flags >> 16
1615 display_flag = self._format_flags >> 16
1616 msg %= (display_flag, self._format_version, self.display_id)
1616 msg %= (display_flag, self._format_version, self.display_id)
1617 raise error.RevlogError(msg)
1617 raise error.RevlogError(msg)
1618
1618
1619 features = FEATURES_BY_VERSION[self._format_version]
1619 features = FEATURES_BY_VERSION[self._format_version]
1620 self._inline = features[b'inline'](self._format_flags)
1620 self._inline = features[b'inline'](self._format_flags)
1621 self.delta_config.general_delta = features[b'generaldelta'](
1621 self.delta_config.general_delta = features[b'generaldelta'](
1622 self._format_flags
1622 self._format_flags
1623 )
1623 )
1624 self.feature_config.has_side_data = features[b'sidedata']
1624 self.feature_config.has_side_data = features[b'sidedata']
1625
1625
1626 if not features[b'docket']:
1626 if not features[b'docket']:
1627 self._indexfile = entry_point
1627 self._indexfile = entry_point
1628 index_data = entry_data
1628 index_data = entry_data
1629 else:
1629 else:
1630 self._docket_file = entry_point
1630 self._docket_file = entry_point
1631 if self._initempty:
1631 if self._initempty:
1632 self._docket = docketutil.default_docket(self, header)
1632 self._docket = docketutil.default_docket(self, header)
1633 else:
1633 else:
1634 self._docket = docketutil.parse_docket(
1634 self._docket = docketutil.parse_docket(
1635 self, entry_data, use_pending=self._trypending
1635 self, entry_data, use_pending=self._trypending
1636 )
1636 )
1637
1637
1638 if self._docket is not None:
1638 if self._docket is not None:
1639 self._indexfile = self._docket.index_filepath()
1639 self._indexfile = self._docket.index_filepath()
1640 index_data = b''
1640 index_data = b''
1641 index_size = self._docket.index_end
1641 index_size = self._docket.index_end
1642 if index_size > 0:
1642 if index_size > 0:
1643 index_data = self._get_data(
1643 index_data = self._get_data(
1644 self._indexfile, mmapindexthreshold, size=index_size
1644 self._indexfile, mmapindexthreshold, size=index_size
1645 )
1645 )
1646 if len(index_data) < index_size:
1646 if len(index_data) < index_size:
1647 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg = _(b'too few index data for %s: got %d, expected %d')
1648 msg %= (self.display_id, len(index_data), index_size)
1648 msg %= (self.display_id, len(index_data), index_size)
1649 raise error.RevlogError(msg)
1649 raise error.RevlogError(msg)
1650
1650
1651 self._inline = False
1651 self._inline = False
1652 # generaldelta implied by version 2 revlogs.
1652 # generaldelta implied by version 2 revlogs.
1653 self.delta_config.general_delta = True
1653 self.delta_config.general_delta = True
1654 # the logic for persistent nodemap will be dealt with within the
1654 # the logic for persistent nodemap will be dealt with within the
1655 # main docket, so disable it for now.
1655 # main docket, so disable it for now.
1656 self._nodemap_file = None
1656 self._nodemap_file = None
1657
1657
1658 if self._docket is not None:
1658 if self._docket is not None:
1659 self._datafile = self._docket.data_filepath()
1659 self._datafile = self._docket.data_filepath()
1660 self._sidedatafile = self._docket.sidedata_filepath()
1660 self._sidedatafile = self._docket.sidedata_filepath()
1661 elif self.postfix is None:
1661 elif self.postfix is None:
1662 self._datafile = b'%s.d' % self.radix
1662 self._datafile = b'%s.d' % self.radix
1663 else:
1663 else:
1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1665
1665
1666 self.nodeconstants = sha1nodeconstants
1666 self.nodeconstants = sha1nodeconstants
1667 self.nullid = self.nodeconstants.nullid
1667 self.nullid = self.nodeconstants.nullid
1668
1668
1669 # sparse-revlog can't be on without general-delta (issue6056)
1669 # sparse-revlog can't be on without general-delta (issue6056)
1670 if not self.delta_config.general_delta:
1670 if not self.delta_config.general_delta:
1671 self.delta_config.sparse_revlog = False
1671 self.delta_config.sparse_revlog = False
1672
1672
1673 self._storedeltachains = True
1673 self._storedeltachains = True
1674
1674
1675 devel_nodemap = (
1675 devel_nodemap = (
1676 self._nodemap_file
1676 self._nodemap_file
1677 and force_nodemap
1677 and force_nodemap
1678 and parse_index_v1_nodemap is not None
1678 and parse_index_v1_nodemap is not None
1679 )
1679 )
1680
1680
1681 use_rust_index = False
1681 use_rust_index = False
1682 if rustrevlog is not None:
1682 if rustrevlog is not None:
1683 if self._nodemap_file is not None:
1683 if self._nodemap_file is not None:
1684 use_rust_index = True
1684 use_rust_index = True
1685 else:
1685 else:
1686 # Using the CIndex is not longer possible, as the
1686 # Using the CIndex is not longer possible, as the
1687 # `AncestorsIterator` and `LazyAncestors` classes now require
1687 # `AncestorsIterator` and `LazyAncestors` classes now require
1688 # a Rust index for instantiation.
1688 # a Rust index for instantiation.
1689 use_rust_index = True
1689 use_rust_index = True
1690
1690
1691 self._parse_index = parse_index_v1
1691 self._parse_index = parse_index_v1
1692 if self._format_version == REVLOGV0:
1692 if self._format_version == REVLOGV0:
1693 self._parse_index = revlogv0.parse_index_v0
1693 self._parse_index = revlogv0.parse_index_v0
1694 elif self._format_version == REVLOGV2:
1694 elif self._format_version == REVLOGV2:
1695 self._parse_index = parse_index_v2
1695 self._parse_index = parse_index_v2
1696 elif self._format_version == CHANGELOGV2:
1696 elif self._format_version == CHANGELOGV2:
1697 self._parse_index = parse_index_cl_v2
1697 self._parse_index = parse_index_cl_v2
1698 elif devel_nodemap:
1698 elif devel_nodemap:
1699 self._parse_index = parse_index_v1_nodemap
1699 self._parse_index = parse_index_v1_nodemap
1700 elif use_rust_index:
1700 elif use_rust_index:
1701 self._parse_index = functools.partial(
1701 self._parse_index = functools.partial(
1702 parse_index_v1_rust, default_header=new_header
1702 parse_index_v1_rust, default_header=new_header
1703 )
1703 )
1704 try:
1704 try:
1705 d = self._parse_index(index_data, self._inline)
1705 d = self._parse_index(index_data, self._inline)
1706 index, chunkcache = d
1706 index, chunkcache = d
1707 use_nodemap = (
1707 use_nodemap = (
1708 not self._inline
1708 not self._inline
1709 and self._nodemap_file is not None
1709 and self._nodemap_file is not None
1710 and hasattr(index, 'update_nodemap_data')
1710 and hasattr(index, 'update_nodemap_data')
1711 )
1711 )
1712 if use_nodemap:
1712 if use_nodemap:
1713 nodemap_data = nodemaputil.persisted_data(self)
1713 nodemap_data = nodemaputil.persisted_data(self)
1714 if nodemap_data is not None:
1714 if nodemap_data is not None:
1715 docket = nodemap_data[0]
1715 docket = nodemap_data[0]
1716 if (
1716 if (
1717 len(d[0]) > docket.tip_rev
1717 len(d[0]) > docket.tip_rev
1718 and d[0][docket.tip_rev][7] == docket.tip_node
1718 and d[0][docket.tip_rev][7] == docket.tip_node
1719 ):
1719 ):
1720 # no changelog tampering
1720 # no changelog tampering
1721 self._nodemap_docket = docket
1721 self._nodemap_docket = docket
1722 index.update_nodemap_data(*nodemap_data)
1722 index.update_nodemap_data(*nodemap_data)
1723 except (ValueError, IndexError):
1723 except (ValueError, IndexError):
1724 raise error.RevlogError(
1724 raise error.RevlogError(
1725 _(b"index %s is corrupted") % self.display_id
1725 _(b"index %s is corrupted") % self.display_id
1726 )
1726 )
1727 self.index = index
1727 self.index = index
1728 # revnum -> (chain-length, sum-delta-length)
1728 # revnum -> (chain-length, sum-delta-length)
1729 self._chaininfocache = util.lrucachedict(500)
1729 self._chaininfocache = util.lrucachedict(500)
1730
1730
1731 return chunkcache
1731 return chunkcache
1732
1732
1733 def _load_inner(self, chunk_cache):
1733 def _load_inner(self, chunk_cache):
1734 if self._docket is None:
1734 if self._docket is None:
1735 default_compression_header = None
1735 default_compression_header = None
1736 else:
1736 else:
1737 default_compression_header = self._docket.default_compression_header
1737 default_compression_header = self._docket.default_compression_header
1738
1738
1739 self._inner = _InnerRevlog(
1739 self._inner = _InnerRevlog(
1740 opener=self.opener,
1740 opener=self.opener,
1741 index=self.index,
1741 index=self.index,
1742 index_file=self._indexfile,
1742 index_file=self._indexfile,
1743 data_file=self._datafile,
1743 data_file=self._datafile,
1744 sidedata_file=self._sidedatafile,
1744 sidedata_file=self._sidedatafile,
1745 inline=self._inline,
1745 inline=self._inline,
1746 data_config=self.data_config,
1746 data_config=self.data_config,
1747 delta_config=self.delta_config,
1747 delta_config=self.delta_config,
1748 feature_config=self.feature_config,
1748 feature_config=self.feature_config,
1749 chunk_cache=chunk_cache,
1749 chunk_cache=chunk_cache,
1750 default_compression_header=default_compression_header,
1750 default_compression_header=default_compression_header,
1751 )
1751 )
1752
1752
1753 def get_revlog(self):
1753 def get_revlog(self):
1754 """simple function to mirror API of other not-really-revlog API"""
1754 """simple function to mirror API of other not-really-revlog API"""
1755 return self
1755 return self
1756
1756
1757 @util.propertycache
1757 @util.propertycache
1758 def revlog_kind(self):
1758 def revlog_kind(self):
1759 return self.target[0]
1759 return self.target[0]
1760
1760
1761 @util.propertycache
1761 @util.propertycache
1762 def display_id(self):
1762 def display_id(self):
1763 """The public facing "ID" of the revlog that we use in message"""
1763 """The public facing "ID" of the revlog that we use in message"""
1764 if self.revlog_kind == KIND_FILELOG:
1764 if self.revlog_kind == KIND_FILELOG:
1765 # Reference the file without the "data/" prefix, so it is familiar
1765 # Reference the file without the "data/" prefix, so it is familiar
1766 # to the user.
1766 # to the user.
1767 return self.target[1]
1767 return self.target[1]
1768 else:
1768 else:
1769 return self.radix
1769 return self.radix
1770
1770
1771 def _datafp(self, mode=b'r'):
1771 def _datafp(self, mode=b'r'):
1772 """file object for the revlog's data file"""
1772 """file object for the revlog's data file"""
1773 return self.opener(self._datafile, mode=mode)
1773 return self.opener(self._datafile, mode=mode)
1774
1774
1775 def tiprev(self):
1775 def tiprev(self):
1776 return len(self.index) - 1
1776 return len(self.index) - 1
1777
1777
1778 def tip(self):
1778 def tip(self):
1779 return self.node(self.tiprev())
1779 return self.node(self.tiprev())
1780
1780
1781 def __contains__(self, rev):
1781 def __contains__(self, rev):
1782 return 0 <= rev < len(self)
1782 return 0 <= rev < len(self)
1783
1783
1784 def __len__(self):
1784 def __len__(self):
1785 return len(self.index)
1785 return len(self.index)
1786
1786
1787 def __iter__(self):
1787 def __iter__(self):
1788 return iter(range(len(self)))
1788 return iter(range(len(self)))
1789
1789
1790 def revs(self, start=0, stop=None):
1790 def revs(self, start=0, stop=None):
1791 """iterate over all rev in this revlog (from start to stop)"""
1791 """iterate over all rev in this revlog (from start to stop)"""
1792 return storageutil.iterrevs(len(self), start=start, stop=stop)
1792 return storageutil.iterrevs(len(self), start=start, stop=stop)
1793
1793
1794 def hasnode(self, node):
1794 def hasnode(self, node):
1795 try:
1795 try:
1796 self.rev(node)
1796 self.rev(node)
1797 return True
1797 return True
1798 except KeyError:
1798 except KeyError:
1799 return False
1799 return False
1800
1800
1801 def _candelta(self, baserev, rev):
1801 def _candelta(self, baserev, rev):
1802 """whether two revisions (baserev, rev) can be delta-ed or not"""
1802 """whether two revisions (baserev, rev) can be delta-ed or not"""
1803 # Disable delta if either rev requires a content-changing flag
1803 # Disable delta if either rev requires a content-changing flag
1804 # processor (ex. LFS). This is because such flag processor can alter
1804 # processor (ex. LFS). This is because such flag processor can alter
1805 # the rawtext content that the delta will be based on, and two clients
1805 # the rawtext content that the delta will be based on, and two clients
1806 # could have a same revlog node with different flags (i.e. different
1806 # could have a same revlog node with different flags (i.e. different
1807 # rawtext contents) and the delta could be incompatible.
1807 # rawtext contents) and the delta could be incompatible.
1808 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1808 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1809 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1809 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1810 ):
1810 ):
1811 return False
1811 return False
1812 return True
1812 return True
1813
1813
1814 def update_caches(self, transaction):
1814 def update_caches(self, transaction):
1815 """update on disk cache
1815 """update on disk cache
1816
1816
1817 If a transaction is passed, the update may be delayed to transaction
1817 If a transaction is passed, the update may be delayed to transaction
1818 commit."""
1818 commit."""
1819 if self._nodemap_file is not None:
1819 if self._nodemap_file is not None:
1820 if transaction is None:
1820 if transaction is None:
1821 nodemaputil.update_persistent_nodemap(self)
1821 nodemaputil.update_persistent_nodemap(self)
1822 else:
1822 else:
1823 nodemaputil.setup_persistent_nodemap(transaction, self)
1823 nodemaputil.setup_persistent_nodemap(transaction, self)
1824
1824
1825 def clearcaches(self):
1825 def clearcaches(self):
1826 """Clear in-memory caches"""
1826 """Clear in-memory caches"""
1827 self._chainbasecache.clear()
1827 self._chainbasecache.clear()
1828 self._inner.clear_cache()
1828 self._inner.clear_cache()
1829 self._pcache = {}
1829 self._pcache = {}
1830 self._nodemap_docket = None
1830 self._nodemap_docket = None
1831 self.index.clearcaches()
1831 self.index.clearcaches()
1832 # The python code is the one responsible for validating the docket, we
1832 # The python code is the one responsible for validating the docket, we
1833 # end up having to refresh it here.
1833 # end up having to refresh it here.
1834 use_nodemap = (
1834 use_nodemap = (
1835 not self._inline
1835 not self._inline
1836 and self._nodemap_file is not None
1836 and self._nodemap_file is not None
1837 and hasattr(self.index, 'update_nodemap_data')
1837 and hasattr(self.index, 'update_nodemap_data')
1838 )
1838 )
1839 if use_nodemap:
1839 if use_nodemap:
1840 nodemap_data = nodemaputil.persisted_data(self)
1840 nodemap_data = nodemaputil.persisted_data(self)
1841 if nodemap_data is not None:
1841 if nodemap_data is not None:
1842 self._nodemap_docket = nodemap_data[0]
1842 self._nodemap_docket = nodemap_data[0]
1843 self.index.update_nodemap_data(*nodemap_data)
1843 self.index.update_nodemap_data(*nodemap_data)
1844
1844
1845 def rev(self, node):
1845 def rev(self, node):
1846 """return the revision number associated with a <nodeid>"""
1846 """return the revision number associated with a <nodeid>"""
1847 try:
1847 try:
1848 return self.index.rev(node)
1848 return self.index.rev(node)
1849 except TypeError:
1849 except TypeError:
1850 raise
1850 raise
1851 except error.RevlogError:
1851 except error.RevlogError:
1852 # parsers.c radix tree lookup failed
1852 # parsers.c radix tree lookup failed
1853 if (
1853 if (
1854 node == self.nodeconstants.wdirid
1854 node == self.nodeconstants.wdirid
1855 or node in self.nodeconstants.wdirfilenodeids
1855 or node in self.nodeconstants.wdirfilenodeids
1856 ):
1856 ):
1857 raise error.WdirUnsupported
1857 raise error.WdirUnsupported
1858 raise error.LookupError(node, self.display_id, _(b'no node'))
1858 raise error.LookupError(node, self.display_id, _(b'no node'))
1859
1859
1860 # Accessors for index entries.
1860 # Accessors for index entries.
1861
1861
1862 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1862 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1863 # are flags.
1863 # are flags.
1864 def start(self, rev):
1864 def start(self, rev):
1865 return int(self.index[rev][0] >> 16)
1865 return int(self.index[rev][0] >> 16)
1866
1866
1867 def sidedata_cut_off(self, rev):
1867 def sidedata_cut_off(self, rev):
1868 sd_cut_off = self.index[rev][8]
1868 sd_cut_off = self.index[rev][8]
1869 if sd_cut_off != 0:
1869 if sd_cut_off != 0:
1870 return sd_cut_off
1870 return sd_cut_off
1871 # This is some annoying dance, because entries without sidedata
1871 # This is some annoying dance, because entries without sidedata
1872 # currently use 0 as their ofsset. (instead of previous-offset +
1872 # currently use 0 as their ofsset. (instead of previous-offset +
1873 # previous-size)
1873 # previous-size)
1874 #
1874 #
1875 # We should reconsider this sidedata → 0 sidata_offset policy.
1875 # We should reconsider this sidedata → 0 sidata_offset policy.
1876 # In the meantime, we need this.
1876 # In the meantime, we need this.
1877 while 0 <= rev:
1877 while 0 <= rev:
1878 e = self.index[rev]
1878 e = self.index[rev]
1879 if e[9] != 0:
1879 if e[9] != 0:
1880 return e[8] + e[9]
1880 return e[8] + e[9]
1881 rev -= 1
1881 rev -= 1
1882 return 0
1882 return 0
1883
1883
1884 def flags(self, rev):
1884 def flags(self, rev):
1885 return self.index[rev][0] & 0xFFFF
1885 return self.index[rev][0] & 0xFFFF
1886
1886
1887 def length(self, rev):
1887 def length(self, rev):
1888 return self.index[rev][1]
1888 return self.index[rev][1]
1889
1889
1890 def sidedata_length(self, rev):
1890 def sidedata_length(self, rev):
1891 if not self.feature_config.has_side_data:
1891 if not self.feature_config.has_side_data:
1892 return 0
1892 return 0
1893 return self.index[rev][9]
1893 return self.index[rev][9]
1894
1894
1895 def rawsize(self, rev):
1895 def rawsize(self, rev):
1896 """return the length of the uncompressed text for a given revision"""
1896 """return the length of the uncompressed text for a given revision"""
1897 l = self.index[rev][2]
1897 l = self.index[rev][2]
1898 if l >= 0:
1898 if l >= 0:
1899 return l
1899 return l
1900
1900
1901 t = self.rawdata(rev)
1901 t = self.rawdata(rev)
1902 return len(t)
1902 return len(t)
1903
1903
1904 def size(self, rev):
1904 def size(self, rev):
1905 """length of non-raw text (processed by a "read" flag processor)"""
1905 """length of non-raw text (processed by a "read" flag processor)"""
1906 # fast path: if no "read" flag processor could change the content,
1906 # fast path: if no "read" flag processor could change the content,
1907 # size is rawsize. note: ELLIPSIS is known to not change the content.
1907 # size is rawsize. note: ELLIPSIS is known to not change the content.
1908 flags = self.flags(rev)
1908 flags = self.flags(rev)
1909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1910 return self.rawsize(rev)
1910 return self.rawsize(rev)
1911
1911
1912 return len(self.revision(rev))
1912 return len(self.revision(rev))
1913
1913
1914 def fast_rank(self, rev):
1914 def fast_rank(self, rev):
1915 """Return the rank of a revision if already known, or None otherwise.
1915 """Return the rank of a revision if already known, or None otherwise.
1916
1916
1917 The rank of a revision is the size of the sub-graph it defines as a
1917 The rank of a revision is the size of the sub-graph it defines as a
1918 head. Equivalently, the rank of a revision `r` is the size of the set
1918 head. Equivalently, the rank of a revision `r` is the size of the set
1919 `ancestors(r)`, `r` included.
1919 `ancestors(r)`, `r` included.
1920
1920
1921 This method returns the rank retrieved from the revlog in constant
1921 This method returns the rank retrieved from the revlog in constant
1922 time. It makes no attempt at computing unknown values for versions of
1922 time. It makes no attempt at computing unknown values for versions of
1923 the revlog which do not persist the rank.
1923 the revlog which do not persist the rank.
1924 """
1924 """
1925 rank = self.index[rev][ENTRY_RANK]
1925 rank = self.index[rev][ENTRY_RANK]
1926 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1926 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1927 return None
1927 return None
1928 if rev == nullrev:
1928 if rev == nullrev:
1929 return 0 # convention
1929 return 0 # convention
1930 return rank
1930 return rank
1931
1931
1932 def chainbase(self, rev):
1932 def chainbase(self, rev):
1933 base = self._chainbasecache.get(rev)
1933 base = self._chainbasecache.get(rev)
1934 if base is not None:
1934 if base is not None:
1935 return base
1935 return base
1936
1936
1937 index = self.index
1937 index = self.index
1938 iterrev = rev
1938 iterrev = rev
1939 base = index[iterrev][3]
1939 base = index[iterrev][3]
1940 while base != iterrev:
1940 while base != iterrev:
1941 iterrev = base
1941 iterrev = base
1942 base = index[iterrev][3]
1942 base = index[iterrev][3]
1943
1943
1944 self._chainbasecache[rev] = base
1944 self._chainbasecache[rev] = base
1945 return base
1945 return base
1946
1946
1947 def linkrev(self, rev):
1947 def linkrev(self, rev):
1948 return self.index[rev][4]
1948 return self.index[rev][4]
1949
1949
1950 def parentrevs(self, rev):
1950 def parentrevs(self, rev):
1951 try:
1951 try:
1952 entry = self.index[rev]
1952 entry = self.index[rev]
1953 except IndexError:
1953 except IndexError:
1954 if rev == wdirrev:
1954 if rev == wdirrev:
1955 raise error.WdirUnsupported
1955 raise error.WdirUnsupported
1956 raise
1956 raise
1957
1957
1958 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1958 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1959 return entry[6], entry[5]
1959 return entry[6], entry[5]
1960 else:
1960 else:
1961 return entry[5], entry[6]
1961 return entry[5], entry[6]
1962
1962
1963 # fast parentrevs(rev) where rev isn't filtered
1963 # fast parentrevs(rev) where rev isn't filtered
1964 _uncheckedparentrevs = parentrevs
1964 _uncheckedparentrevs = parentrevs
1965
1965
1966 def node(self, rev):
1966 def node(self, rev):
1967 try:
1967 try:
1968 return self.index[rev][7]
1968 return self.index[rev][7]
1969 except IndexError:
1969 except IndexError:
1970 if rev == wdirrev:
1970 if rev == wdirrev:
1971 raise error.WdirUnsupported
1971 raise error.WdirUnsupported
1972 raise
1972 raise
1973
1973
1974 # Derived from index values.
1974 # Derived from index values.
1975
1975
1976 def end(self, rev):
1976 def end(self, rev):
1977 return self.start(rev) + self.length(rev)
1977 return self.start(rev) + self.length(rev)
1978
1978
1979 def parents(self, node):
1979 def parents(self, node):
1980 i = self.index
1980 i = self.index
1981 d = i[self.rev(node)]
1981 d = i[self.rev(node)]
1982 # inline node() to avoid function call overhead
1982 # inline node() to avoid function call overhead
1983 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1983 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1984 return i[d[6]][7], i[d[5]][7]
1984 return i[d[6]][7], i[d[5]][7]
1985 else:
1985 else:
1986 return i[d[5]][7], i[d[6]][7]
1986 return i[d[5]][7], i[d[6]][7]
1987
1987
1988 def chainlen(self, rev):
1988 def chainlen(self, rev):
1989 return self._chaininfo(rev)[0]
1989 return self._chaininfo(rev)[0]
1990
1990
1991 def _chaininfo(self, rev):
1991 def _chaininfo(self, rev):
1992 chaininfocache = self._chaininfocache
1992 chaininfocache = self._chaininfocache
1993 if rev in chaininfocache:
1993 if rev in chaininfocache:
1994 return chaininfocache[rev]
1994 return chaininfocache[rev]
1995 index = self.index
1995 index = self.index
1996 generaldelta = self.delta_config.general_delta
1996 generaldelta = self.delta_config.general_delta
1997 iterrev = rev
1997 iterrev = rev
1998 e = index[iterrev]
1998 e = index[iterrev]
1999 clen = 0
1999 clen = 0
2000 compresseddeltalen = 0
2000 compresseddeltalen = 0
2001 while iterrev != e[3]:
2001 while iterrev != e[3]:
2002 clen += 1
2002 clen += 1
2003 compresseddeltalen += e[1]
2003 compresseddeltalen += e[1]
2004 if generaldelta:
2004 if generaldelta:
2005 iterrev = e[3]
2005 iterrev = e[3]
2006 else:
2006 else:
2007 iterrev -= 1
2007 iterrev -= 1
2008 if iterrev in chaininfocache:
2008 if iterrev in chaininfocache:
2009 t = chaininfocache[iterrev]
2009 t = chaininfocache[iterrev]
2010 clen += t[0]
2010 clen += t[0]
2011 compresseddeltalen += t[1]
2011 compresseddeltalen += t[1]
2012 break
2012 break
2013 e = index[iterrev]
2013 e = index[iterrev]
2014 else:
2014 else:
2015 # Add text length of base since decompressing that also takes
2015 # Add text length of base since decompressing that also takes
2016 # work. For cache hits the length is already included.
2016 # work. For cache hits the length is already included.
2017 compresseddeltalen += e[1]
2017 compresseddeltalen += e[1]
2018 r = (clen, compresseddeltalen)
2018 r = (clen, compresseddeltalen)
2019 chaininfocache[rev] = r
2019 chaininfocache[rev] = r
2020 return r
2020 return r
2021
2021
2022 def _deltachain(self, rev, stoprev=None):
2022 def _deltachain(self, rev, stoprev=None):
2023 return self._inner._deltachain(rev, stoprev=stoprev)
2023 return self._inner._deltachain(rev, stoprev=stoprev)
2024
2024
2025 def ancestors(self, revs, stoprev=0, inclusive=False):
2025 def ancestors(self, revs, stoprev=0, inclusive=False):
2026 """Generate the ancestors of 'revs' in reverse revision order.
2026 """Generate the ancestors of 'revs' in reverse revision order.
2027 Does not generate revs lower than stoprev.
2027 Does not generate revs lower than stoprev.
2028
2028
2029 See the documentation for ancestor.lazyancestors for more details."""
2029 See the documentation for ancestor.lazyancestors for more details."""
2030
2030
2031 # first, make sure start revisions aren't filtered
2031 # first, make sure start revisions aren't filtered
2032 revs = list(revs)
2032 revs = list(revs)
2033 checkrev = self.node
2033 checkrev = self.node
2034 for r in revs:
2034 for r in revs:
2035 checkrev(r)
2035 checkrev(r)
2036 # and we're sure ancestors aren't filtered as well
2036 # and we're sure ancestors aren't filtered as well
2037
2037
2038 if rustancestor is not None and self.index.rust_ext_compat:
2038 if rustancestor is not None and self.index.rust_ext_compat:
2039 lazyancestors = rustancestor.LazyAncestors
2039 lazyancestors = rustancestor.LazyAncestors
2040 arg = self.index
2040 arg = self.index
2041 else:
2041 else:
2042 lazyancestors = ancestor.lazyancestors
2042 lazyancestors = ancestor.lazyancestors
2043 arg = self._uncheckedparentrevs
2043 arg = self._uncheckedparentrevs
2044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2045
2045
2046 def descendants(self, revs):
2046 def descendants(self, revs):
2047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2048
2048
2049 def findcommonmissing(self, common=None, heads=None):
2049 def findcommonmissing(self, common=None, heads=None):
2050 """Return a tuple of the ancestors of common and the ancestors of heads
2050 """Return a tuple of the ancestors of common and the ancestors of heads
2051 that are not ancestors of common. In revset terminology, we return the
2051 that are not ancestors of common. In revset terminology, we return the
2052 tuple:
2052 tuple:
2053
2053
2054 ::common, (::heads) - (::common)
2054 ::common, (::heads) - (::common)
2055
2055
2056 The list is sorted by revision number, meaning it is
2056 The list is sorted by revision number, meaning it is
2057 topologically sorted.
2057 topologically sorted.
2058
2058
2059 'heads' and 'common' are both lists of node IDs. If heads is
2059 'heads' and 'common' are both lists of node IDs. If heads is
2060 not supplied, uses all of the revlog's heads. If common is not
2060 not supplied, uses all of the revlog's heads. If common is not
2061 supplied, uses nullid."""
2061 supplied, uses nullid."""
2062 if common is None:
2062 if common is None:
2063 common = [self.nullid]
2063 common = [self.nullid]
2064 if heads is None:
2064 if heads is None:
2065 heads = self.heads()
2065 heads = self.heads()
2066
2066
2067 common = [self.rev(n) for n in common]
2067 common = [self.rev(n) for n in common]
2068 heads = [self.rev(n) for n in heads]
2068 heads = [self.rev(n) for n in heads]
2069
2069
2070 # we want the ancestors, but inclusive
2070 # we want the ancestors, but inclusive
2071 class lazyset:
2071 class lazyset:
2072 def __init__(self, lazyvalues):
2072 def __init__(self, lazyvalues):
2073 self.addedvalues = set()
2073 self.addedvalues = set()
2074 self.lazyvalues = lazyvalues
2074 self.lazyvalues = lazyvalues
2075
2075
2076 def __contains__(self, value):
2076 def __contains__(self, value):
2077 return value in self.addedvalues or value in self.lazyvalues
2077 return value in self.addedvalues or value in self.lazyvalues
2078
2078
2079 def __iter__(self):
2079 def __iter__(self):
2080 added = self.addedvalues
2080 added = self.addedvalues
2081 for r in added:
2081 for r in added:
2082 yield r
2082 yield r
2083 for r in self.lazyvalues:
2083 for r in self.lazyvalues:
2084 if not r in added:
2084 if not r in added:
2085 yield r
2085 yield r
2086
2086
2087 def add(self, value):
2087 def add(self, value):
2088 self.addedvalues.add(value)
2088 self.addedvalues.add(value)
2089
2089
2090 def update(self, values):
2090 def update(self, values):
2091 self.addedvalues.update(values)
2091 self.addedvalues.update(values)
2092
2092
2093 has = lazyset(self.ancestors(common))
2093 has = lazyset(self.ancestors(common))
2094 has.add(nullrev)
2094 has.add(nullrev)
2095 has.update(common)
2095 has.update(common)
2096
2096
2097 # take all ancestors from heads that aren't in has
2097 # take all ancestors from heads that aren't in has
2098 missing = set()
2098 missing = set()
2099 visit = collections.deque(r for r in heads if r not in has)
2099 visit = collections.deque(r for r in heads if r not in has)
2100 while visit:
2100 while visit:
2101 r = visit.popleft()
2101 r = visit.popleft()
2102 if r in missing:
2102 if r in missing:
2103 continue
2103 continue
2104 else:
2104 else:
2105 missing.add(r)
2105 missing.add(r)
2106 for p in self.parentrevs(r):
2106 for p in self.parentrevs(r):
2107 if p not in has:
2107 if p not in has:
2108 visit.append(p)
2108 visit.append(p)
2109 missing = list(missing)
2109 missing = list(missing)
2110 missing.sort()
2110 missing.sort()
2111 return has, [self.node(miss) for miss in missing]
2111 return has, [self.node(miss) for miss in missing]
2112
2112
2113 def incrementalmissingrevs(self, common=None):
2113 def incrementalmissingrevs(self, common=None):
2114 """Return an object that can be used to incrementally compute the
2114 """Return an object that can be used to incrementally compute the
2115 revision numbers of the ancestors of arbitrary sets that are not
2115 revision numbers of the ancestors of arbitrary sets that are not
2116 ancestors of common. This is an ancestor.incrementalmissingancestors
2116 ancestors of common. This is an ancestor.incrementalmissingancestors
2117 object.
2117 object.
2118
2118
2119 'common' is a list of revision numbers. If common is not supplied, uses
2119 'common' is a list of revision numbers. If common is not supplied, uses
2120 nullrev.
2120 nullrev.
2121 """
2121 """
2122 if common is None:
2122 if common is None:
2123 common = [nullrev]
2123 common = [nullrev]
2124
2124
2125 if rustancestor is not None and self.index.rust_ext_compat:
2125 if rustancestor is not None and self.index.rust_ext_compat:
2126 return rustancestor.MissingAncestors(self.index, common)
2126 return rustancestor.MissingAncestors(self.index, common)
2127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2128
2128
2129 def findmissingrevs(self, common=None, heads=None):
2129 def findmissingrevs(self, common=None, heads=None):
2130 """Return the revision numbers of the ancestors of heads that
2130 """Return the revision numbers of the ancestors of heads that
2131 are not ancestors of common.
2131 are not ancestors of common.
2132
2132
2133 More specifically, return a list of revision numbers corresponding to
2133 More specifically, return a list of revision numbers corresponding to
2134 nodes N such that every N satisfies the following constraints:
2134 nodes N such that every N satisfies the following constraints:
2135
2135
2136 1. N is an ancestor of some node in 'heads'
2136 1. N is an ancestor of some node in 'heads'
2137 2. N is not an ancestor of any node in 'common'
2137 2. N is not an ancestor of any node in 'common'
2138
2138
2139 The list is sorted by revision number, meaning it is
2139 The list is sorted by revision number, meaning it is
2140 topologically sorted.
2140 topologically sorted.
2141
2141
2142 'heads' and 'common' are both lists of revision numbers. If heads is
2142 'heads' and 'common' are both lists of revision numbers. If heads is
2143 not supplied, uses all of the revlog's heads. If common is not
2143 not supplied, uses all of the revlog's heads. If common is not
2144 supplied, uses nullid."""
2144 supplied, uses nullid."""
2145 if common is None:
2145 if common is None:
2146 common = [nullrev]
2146 common = [nullrev]
2147 if heads is None:
2147 if heads is None:
2148 heads = self.headrevs()
2148 heads = self.headrevs()
2149
2149
2150 inc = self.incrementalmissingrevs(common=common)
2150 inc = self.incrementalmissingrevs(common=common)
2151 return inc.missingancestors(heads)
2151 return inc.missingancestors(heads)
2152
2152
2153 def findmissing(self, common=None, heads=None):
2153 def findmissing(self, common=None, heads=None):
2154 """Return the ancestors of heads that are not ancestors of common.
2154 """Return the ancestors of heads that are not ancestors of common.
2155
2155
2156 More specifically, return a list of nodes N such that every N
2156 More specifically, return a list of nodes N such that every N
2157 satisfies the following constraints:
2157 satisfies the following constraints:
2158
2158
2159 1. N is an ancestor of some node in 'heads'
2159 1. N is an ancestor of some node in 'heads'
2160 2. N is not an ancestor of any node in 'common'
2160 2. N is not an ancestor of any node in 'common'
2161
2161
2162 The list is sorted by revision number, meaning it is
2162 The list is sorted by revision number, meaning it is
2163 topologically sorted.
2163 topologically sorted.
2164
2164
2165 'heads' and 'common' are both lists of node IDs. If heads is
2165 'heads' and 'common' are both lists of node IDs. If heads is
2166 not supplied, uses all of the revlog's heads. If common is not
2166 not supplied, uses all of the revlog's heads. If common is not
2167 supplied, uses nullid."""
2167 supplied, uses nullid."""
2168 if common is None:
2168 if common is None:
2169 common = [self.nullid]
2169 common = [self.nullid]
2170 if heads is None:
2170 if heads is None:
2171 heads = self.heads()
2171 heads = self.heads()
2172
2172
2173 common = [self.rev(n) for n in common]
2173 common = [self.rev(n) for n in common]
2174 heads = [self.rev(n) for n in heads]
2174 heads = [self.rev(n) for n in heads]
2175
2175
2176 inc = self.incrementalmissingrevs(common=common)
2176 inc = self.incrementalmissingrevs(common=common)
2177 return [self.node(r) for r in inc.missingancestors(heads)]
2177 return [self.node(r) for r in inc.missingancestors(heads)]
2178
2178
2179 def nodesbetween(self, roots=None, heads=None):
2179 def nodesbetween(self, roots=None, heads=None):
2180 """Return a topological path from 'roots' to 'heads'.
2180 """Return a topological path from 'roots' to 'heads'.
2181
2181
2182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2183 topologically sorted list of all nodes N that satisfy both of
2183 topologically sorted list of all nodes N that satisfy both of
2184 these constraints:
2184 these constraints:
2185
2185
2186 1. N is a descendant of some node in 'roots'
2186 1. N is a descendant of some node in 'roots'
2187 2. N is an ancestor of some node in 'heads'
2187 2. N is an ancestor of some node in 'heads'
2188
2188
2189 Every node is considered to be both a descendant and an ancestor
2189 Every node is considered to be both a descendant and an ancestor
2190 of itself, so every reachable node in 'roots' and 'heads' will be
2190 of itself, so every reachable node in 'roots' and 'heads' will be
2191 included in 'nodes'.
2191 included in 'nodes'.
2192
2192
2193 'outroots' is the list of reachable nodes in 'roots', i.e., the
2193 'outroots' is the list of reachable nodes in 'roots', i.e., the
2194 subset of 'roots' that is returned in 'nodes'. Likewise,
2194 subset of 'roots' that is returned in 'nodes'. Likewise,
2195 'outheads' is the subset of 'heads' that is also in 'nodes'.
2195 'outheads' is the subset of 'heads' that is also in 'nodes'.
2196
2196
2197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2198 unspecified, uses nullid as the only root. If 'heads' is
2198 unspecified, uses nullid as the only root. If 'heads' is
2199 unspecified, uses list of all of the revlog's heads."""
2199 unspecified, uses list of all of the revlog's heads."""
2200 nonodes = ([], [], [])
2200 nonodes = ([], [], [])
2201 if roots is not None:
2201 if roots is not None:
2202 roots = list(roots)
2202 roots = list(roots)
2203 if not roots:
2203 if not roots:
2204 return nonodes
2204 return nonodes
2205 lowestrev = min([self.rev(n) for n in roots])
2205 lowestrev = min([self.rev(n) for n in roots])
2206 else:
2206 else:
2207 roots = [self.nullid] # Everybody's a descendant of nullid
2207 roots = [self.nullid] # Everybody's a descendant of nullid
2208 lowestrev = nullrev
2208 lowestrev = nullrev
2209 if (lowestrev == nullrev) and (heads is None):
2209 if (lowestrev == nullrev) and (heads is None):
2210 # We want _all_ the nodes!
2210 # We want _all_ the nodes!
2211 return (
2211 return (
2212 [self.node(r) for r in self],
2212 [self.node(r) for r in self],
2213 [self.nullid],
2213 [self.nullid],
2214 list(self.heads()),
2214 list(self.heads()),
2215 )
2215 )
2216 if heads is None:
2216 if heads is None:
2217 # All nodes are ancestors, so the latest ancestor is the last
2217 # All nodes are ancestors, so the latest ancestor is the last
2218 # node.
2218 # node.
2219 highestrev = len(self) - 1
2219 highestrev = len(self) - 1
2220 # Set ancestors to None to signal that every node is an ancestor.
2220 # Set ancestors to None to signal that every node is an ancestor.
2221 ancestors = None
2221 ancestors = None
2222 # Set heads to an empty dictionary for later discovery of heads
2222 # Set heads to an empty dictionary for later discovery of heads
2223 heads = {}
2223 heads = {}
2224 else:
2224 else:
2225 heads = list(heads)
2225 heads = list(heads)
2226 if not heads:
2226 if not heads:
2227 return nonodes
2227 return nonodes
2228 ancestors = set()
2228 ancestors = set()
2229 # Turn heads into a dictionary so we can remove 'fake' heads.
2229 # Turn heads into a dictionary so we can remove 'fake' heads.
2230 # Also, later we will be using it to filter out the heads we can't
2230 # Also, later we will be using it to filter out the heads we can't
2231 # find from roots.
2231 # find from roots.
2232 heads = dict.fromkeys(heads, False)
2232 heads = dict.fromkeys(heads, False)
2233 # Start at the top and keep marking parents until we're done.
2233 # Start at the top and keep marking parents until we're done.
2234 nodestotag = set(heads)
2234 nodestotag = set(heads)
2235 # Remember where the top was so we can use it as a limit later.
2235 # Remember where the top was so we can use it as a limit later.
2236 highestrev = max([self.rev(n) for n in nodestotag])
2236 highestrev = max([self.rev(n) for n in nodestotag])
2237 while nodestotag:
2237 while nodestotag:
2238 # grab a node to tag
2238 # grab a node to tag
2239 n = nodestotag.pop()
2239 n = nodestotag.pop()
2240 # Never tag nullid
2240 # Never tag nullid
2241 if n == self.nullid:
2241 if n == self.nullid:
2242 continue
2242 continue
2243 # A node's revision number represents its place in a
2243 # A node's revision number represents its place in a
2244 # topologically sorted list of nodes.
2244 # topologically sorted list of nodes.
2245 r = self.rev(n)
2245 r = self.rev(n)
2246 if r >= lowestrev:
2246 if r >= lowestrev:
2247 if n not in ancestors:
2247 if n not in ancestors:
2248 # If we are possibly a descendant of one of the roots
2248 # If we are possibly a descendant of one of the roots
2249 # and we haven't already been marked as an ancestor
2249 # and we haven't already been marked as an ancestor
2250 ancestors.add(n) # Mark as ancestor
2250 ancestors.add(n) # Mark as ancestor
2251 # Add non-nullid parents to list of nodes to tag.
2251 # Add non-nullid parents to list of nodes to tag.
2252 nodestotag.update(
2252 nodestotag.update(
2253 [p for p in self.parents(n) if p != self.nullid]
2253 [p for p in self.parents(n) if p != self.nullid]
2254 )
2254 )
2255 elif n in heads: # We've seen it before, is it a fake head?
2255 elif n in heads: # We've seen it before, is it a fake head?
2256 # So it is, real heads should not be the ancestors of
2256 # So it is, real heads should not be the ancestors of
2257 # any other heads.
2257 # any other heads.
2258 heads.pop(n)
2258 heads.pop(n)
2259 if not ancestors:
2259 if not ancestors:
2260 return nonodes
2260 return nonodes
2261 # Now that we have our set of ancestors, we want to remove any
2261 # Now that we have our set of ancestors, we want to remove any
2262 # roots that are not ancestors.
2262 # roots that are not ancestors.
2263
2263
2264 # If one of the roots was nullid, everything is included anyway.
2264 # If one of the roots was nullid, everything is included anyway.
2265 if lowestrev > nullrev:
2265 if lowestrev > nullrev:
2266 # But, since we weren't, let's recompute the lowest rev to not
2266 # But, since we weren't, let's recompute the lowest rev to not
2267 # include roots that aren't ancestors.
2267 # include roots that aren't ancestors.
2268
2268
2269 # Filter out roots that aren't ancestors of heads
2269 # Filter out roots that aren't ancestors of heads
2270 roots = [root for root in roots if root in ancestors]
2270 roots = [root for root in roots if root in ancestors]
2271 # Recompute the lowest revision
2271 # Recompute the lowest revision
2272 if roots:
2272 if roots:
2273 lowestrev = min([self.rev(root) for root in roots])
2273 lowestrev = min([self.rev(root) for root in roots])
2274 else:
2274 else:
2275 # No more roots? Return empty list
2275 # No more roots? Return empty list
2276 return nonodes
2276 return nonodes
2277 else:
2277 else:
2278 # We are descending from nullid, and don't need to care about
2278 # We are descending from nullid, and don't need to care about
2279 # any other roots.
2279 # any other roots.
2280 lowestrev = nullrev
2280 lowestrev = nullrev
2281 roots = [self.nullid]
2281 roots = [self.nullid]
2282 # Transform our roots list into a set.
2282 # Transform our roots list into a set.
2283 descendants = set(roots)
2283 descendants = set(roots)
2284 # Also, keep the original roots so we can filter out roots that aren't
2284 # Also, keep the original roots so we can filter out roots that aren't
2285 # 'real' roots (i.e. are descended from other roots).
2285 # 'real' roots (i.e. are descended from other roots).
2286 roots = descendants.copy()
2286 roots = descendants.copy()
2287 # Our topologically sorted list of output nodes.
2287 # Our topologically sorted list of output nodes.
2288 orderedout = []
2288 orderedout = []
2289 # Don't start at nullid since we don't want nullid in our output list,
2289 # Don't start at nullid since we don't want nullid in our output list,
2290 # and if nullid shows up in descendants, empty parents will look like
2290 # and if nullid shows up in descendants, empty parents will look like
2291 # they're descendants.
2291 # they're descendants.
2292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2293 n = self.node(r)
2293 n = self.node(r)
2294 isdescendant = False
2294 isdescendant = False
2295 if lowestrev == nullrev: # Everybody is a descendant of nullid
2295 if lowestrev == nullrev: # Everybody is a descendant of nullid
2296 isdescendant = True
2296 isdescendant = True
2297 elif n in descendants:
2297 elif n in descendants:
2298 # n is already a descendant
2298 # n is already a descendant
2299 isdescendant = True
2299 isdescendant = True
2300 # This check only needs to be done here because all the roots
2300 # This check only needs to be done here because all the roots
2301 # will start being marked is descendants before the loop.
2301 # will start being marked is descendants before the loop.
2302 if n in roots:
2302 if n in roots:
2303 # If n was a root, check if it's a 'real' root.
2303 # If n was a root, check if it's a 'real' root.
2304 p = tuple(self.parents(n))
2304 p = tuple(self.parents(n))
2305 # If any of its parents are descendants, it's not a root.
2305 # If any of its parents are descendants, it's not a root.
2306 if (p[0] in descendants) or (p[1] in descendants):
2306 if (p[0] in descendants) or (p[1] in descendants):
2307 roots.remove(n)
2307 roots.remove(n)
2308 else:
2308 else:
2309 p = tuple(self.parents(n))
2309 p = tuple(self.parents(n))
2310 # A node is a descendant if either of its parents are
2310 # A node is a descendant if either of its parents are
2311 # descendants. (We seeded the dependents list with the roots
2311 # descendants. (We seeded the dependents list with the roots
2312 # up there, remember?)
2312 # up there, remember?)
2313 if (p[0] in descendants) or (p[1] in descendants):
2313 if (p[0] in descendants) or (p[1] in descendants):
2314 descendants.add(n)
2314 descendants.add(n)
2315 isdescendant = True
2315 isdescendant = True
2316 if isdescendant and ((ancestors is None) or (n in ancestors)):
2316 if isdescendant and ((ancestors is None) or (n in ancestors)):
2317 # Only include nodes that are both descendants and ancestors.
2317 # Only include nodes that are both descendants and ancestors.
2318 orderedout.append(n)
2318 orderedout.append(n)
2319 if (ancestors is not None) and (n in heads):
2319 if (ancestors is not None) and (n in heads):
2320 # We're trying to figure out which heads are reachable
2320 # We're trying to figure out which heads are reachable
2321 # from roots.
2321 # from roots.
2322 # Mark this head as having been reached
2322 # Mark this head as having been reached
2323 heads[n] = True
2323 heads[n] = True
2324 elif ancestors is None:
2324 elif ancestors is None:
2325 # Otherwise, we're trying to discover the heads.
2325 # Otherwise, we're trying to discover the heads.
2326 # Assume this is a head because if it isn't, the next step
2326 # Assume this is a head because if it isn't, the next step
2327 # will eventually remove it.
2327 # will eventually remove it.
2328 heads[n] = True
2328 heads[n] = True
2329 # But, obviously its parents aren't.
2329 # But, obviously its parents aren't.
2330 for p in self.parents(n):
2330 for p in self.parents(n):
2331 heads.pop(p, None)
2331 heads.pop(p, None)
2332 heads = [head for head, flag in heads.items() if flag]
2332 heads = [head for head, flag in heads.items() if flag]
2333 roots = list(roots)
2333 roots = list(roots)
2334 assert orderedout
2334 assert orderedout
2335 assert roots
2335 assert roots
2336 assert heads
2336 assert heads
2337 return (orderedout, roots, heads)
2337 return (orderedout, roots, heads)
2338
2338
2339 def headrevs(self, revs=None):
2339 def headrevs(self, revs=None):
2340 if revs is None:
2340 if revs is None:
2341 try:
2341 try:
2342 return self.index.headrevs()
2342 return self.index.headrevs()
2343 except AttributeError:
2343 except AttributeError:
2344 return self._headrevs()
2344 return self._headrevs()
2345 if rustdagop is not None and self.index.rust_ext_compat:
2345 if rustdagop is not None and self.index.rust_ext_compat:
2346 return rustdagop.headrevs(self.index, revs)
2346 return rustdagop.headrevs(self.index, revs)
2347 return dagop.headrevs(revs, self._uncheckedparentrevs)
2347 return dagop.headrevs(revs, self._uncheckedparentrevs)
2348
2348
2349 def computephases(self, roots):
2349 def computephases(self, roots):
2350 return self.index.computephasesmapsets(roots)
2350 return self.index.computephasesmapsets(roots)
2351
2351
2352 def _headrevs(self):
2352 def _headrevs(self):
2353 count = len(self)
2353 count = len(self)
2354 if not count:
2354 if not count:
2355 return [nullrev]
2355 return [nullrev]
2356 # we won't iter over filtered rev so nobody is a head at start
2356 # we won't iter over filtered rev so nobody is a head at start
2357 ishead = [0] * (count + 1)
2357 ishead = [0] * (count + 1)
2358 index = self.index
2358 index = self.index
2359 for r in self:
2359 for r in self:
2360 ishead[r] = 1 # I may be an head
2360 ishead[r] = 1 # I may be an head
2361 e = index[r]
2361 e = index[r]
2362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2363 return [r for r, val in enumerate(ishead) if val]
2363 return [r for r, val in enumerate(ishead) if val]
2364
2364
2365 def _head_node_ids(self):
2366 try:
2367 return self.index.head_node_ids()
2368 except AttributeError:
2369 return [self.node(r) for r in self.headrevs()]
2370
2365 def heads(self, start=None, stop=None):
2371 def heads(self, start=None, stop=None):
2366 """return the list of all nodes that have no children
2372 """return the list of all nodes that have no children
2367
2373
2368 if start is specified, only heads that are descendants of
2374 if start is specified, only heads that are descendants of
2369 start will be returned
2375 start will be returned
2370 if stop is specified, it will consider all the revs from stop
2376 if stop is specified, it will consider all the revs from stop
2371 as if they had no children
2377 as if they had no children
2372 """
2378 """
2373 if start is None and stop is None:
2379 if start is None and stop is None:
2374 if not len(self):
2380 if not len(self):
2375 return [self.nullid]
2381 return [self.nullid]
2376 return [self.node(r) for r in self.headrevs()]
2382 return self._head_node_ids()
2377
2378 if start is None:
2383 if start is None:
2379 start = nullrev
2384 start = nullrev
2380 else:
2385 else:
2381 start = self.rev(start)
2386 start = self.rev(start)
2382
2387
2383 stoprevs = {self.rev(n) for n in stop or []}
2388 stoprevs = {self.rev(n) for n in stop or []}
2384
2389
2385 revs = dagop.headrevssubset(
2390 revs = dagop.headrevssubset(
2386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2391 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2387 )
2392 )
2388
2393
2389 return [self.node(rev) for rev in revs]
2394 return [self.node(rev) for rev in revs]
2390
2395
2391 def children(self, node):
2396 def children(self, node):
2392 """find the children of a given node"""
2397 """find the children of a given node"""
2393 c = []
2398 c = []
2394 p = self.rev(node)
2399 p = self.rev(node)
2395 for r in self.revs(start=p + 1):
2400 for r in self.revs(start=p + 1):
2396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2401 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2397 if prevs:
2402 if prevs:
2398 for pr in prevs:
2403 for pr in prevs:
2399 if pr == p:
2404 if pr == p:
2400 c.append(self.node(r))
2405 c.append(self.node(r))
2401 elif p == nullrev:
2406 elif p == nullrev:
2402 c.append(self.node(r))
2407 c.append(self.node(r))
2403 return c
2408 return c
2404
2409
2405 def commonancestorsheads(self, a, b):
2410 def commonancestorsheads(self, a, b):
2406 """calculate all the heads of the common ancestors of nodes a and b"""
2411 """calculate all the heads of the common ancestors of nodes a and b"""
2407 a, b = self.rev(a), self.rev(b)
2412 a, b = self.rev(a), self.rev(b)
2408 ancs = self._commonancestorsheads(a, b)
2413 ancs = self._commonancestorsheads(a, b)
2409 return pycompat.maplist(self.node, ancs)
2414 return pycompat.maplist(self.node, ancs)
2410
2415
2411 def _commonancestorsheads(self, *revs):
2416 def _commonancestorsheads(self, *revs):
2412 """calculate all the heads of the common ancestors of revs"""
2417 """calculate all the heads of the common ancestors of revs"""
2413 try:
2418 try:
2414 ancs = self.index.commonancestorsheads(*revs)
2419 ancs = self.index.commonancestorsheads(*revs)
2415 except (AttributeError, OverflowError): # C implementation failed
2420 except (AttributeError, OverflowError): # C implementation failed
2416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2421 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2417 return ancs
2422 return ancs
2418
2423
2419 def isancestor(self, a, b):
2424 def isancestor(self, a, b):
2420 """return True if node a is an ancestor of node b
2425 """return True if node a is an ancestor of node b
2421
2426
2422 A revision is considered an ancestor of itself."""
2427 A revision is considered an ancestor of itself."""
2423 a, b = self.rev(a), self.rev(b)
2428 a, b = self.rev(a), self.rev(b)
2424 return self.isancestorrev(a, b)
2429 return self.isancestorrev(a, b)
2425
2430
2426 def isancestorrev(self, a, b):
2431 def isancestorrev(self, a, b):
2427 """return True if revision a is an ancestor of revision b
2432 """return True if revision a is an ancestor of revision b
2428
2433
2429 A revision is considered an ancestor of itself.
2434 A revision is considered an ancestor of itself.
2430
2435
2431 The implementation of this is trivial but the use of
2436 The implementation of this is trivial but the use of
2432 reachableroots is not."""
2437 reachableroots is not."""
2433 if a == nullrev:
2438 if a == nullrev:
2434 return True
2439 return True
2435 elif a == b:
2440 elif a == b:
2436 return True
2441 return True
2437 elif a > b:
2442 elif a > b:
2438 return False
2443 return False
2439 return bool(self.reachableroots(a, [b], [a], includepath=False))
2444 return bool(self.reachableroots(a, [b], [a], includepath=False))
2440
2445
2441 def reachableroots(self, minroot, heads, roots, includepath=False):
2446 def reachableroots(self, minroot, heads, roots, includepath=False):
2442 """return (heads(::(<roots> and <roots>::<heads>)))
2447 """return (heads(::(<roots> and <roots>::<heads>)))
2443
2448
2444 If includepath is True, return (<roots>::<heads>)."""
2449 If includepath is True, return (<roots>::<heads>)."""
2445 try:
2450 try:
2446 return self.index.reachableroots2(
2451 return self.index.reachableroots2(
2447 minroot, heads, roots, includepath
2452 minroot, heads, roots, includepath
2448 )
2453 )
2449 except AttributeError:
2454 except AttributeError:
2450 return dagop._reachablerootspure(
2455 return dagop._reachablerootspure(
2451 self.parentrevs, minroot, roots, heads, includepath
2456 self.parentrevs, minroot, roots, heads, includepath
2452 )
2457 )
2453
2458
2454 def ancestor(self, a, b):
2459 def ancestor(self, a, b):
2455 """calculate the "best" common ancestor of nodes a and b"""
2460 """calculate the "best" common ancestor of nodes a and b"""
2456
2461
2457 a, b = self.rev(a), self.rev(b)
2462 a, b = self.rev(a), self.rev(b)
2458 try:
2463 try:
2459 ancs = self.index.ancestors(a, b)
2464 ancs = self.index.ancestors(a, b)
2460 except (AttributeError, OverflowError):
2465 except (AttributeError, OverflowError):
2461 ancs = ancestor.ancestors(self.parentrevs, a, b)
2466 ancs = ancestor.ancestors(self.parentrevs, a, b)
2462 if ancs:
2467 if ancs:
2463 # choose a consistent winner when there's a tie
2468 # choose a consistent winner when there's a tie
2464 return min(map(self.node, ancs))
2469 return min(map(self.node, ancs))
2465 return self.nullid
2470 return self.nullid
2466
2471
2467 def _match(self, id):
2472 def _match(self, id):
2468 if isinstance(id, int):
2473 if isinstance(id, int):
2469 # rev
2474 # rev
2470 return self.node(id)
2475 return self.node(id)
2471 if len(id) == self.nodeconstants.nodelen:
2476 if len(id) == self.nodeconstants.nodelen:
2472 # possibly a binary node
2477 # possibly a binary node
2473 # odds of a binary node being all hex in ASCII are 1 in 10**25
2478 # odds of a binary node being all hex in ASCII are 1 in 10**25
2474 try:
2479 try:
2475 node = id
2480 node = id
2476 self.rev(node) # quick search the index
2481 self.rev(node) # quick search the index
2477 return node
2482 return node
2478 except error.LookupError:
2483 except error.LookupError:
2479 pass # may be partial hex id
2484 pass # may be partial hex id
2480 try:
2485 try:
2481 # str(rev)
2486 # str(rev)
2482 rev = int(id)
2487 rev = int(id)
2483 if b"%d" % rev != id:
2488 if b"%d" % rev != id:
2484 raise ValueError
2489 raise ValueError
2485 if rev < 0:
2490 if rev < 0:
2486 rev = len(self) + rev
2491 rev = len(self) + rev
2487 if rev < 0 or rev >= len(self):
2492 if rev < 0 or rev >= len(self):
2488 raise ValueError
2493 raise ValueError
2489 return self.node(rev)
2494 return self.node(rev)
2490 except (ValueError, OverflowError):
2495 except (ValueError, OverflowError):
2491 pass
2496 pass
2492 if len(id) == 2 * self.nodeconstants.nodelen:
2497 if len(id) == 2 * self.nodeconstants.nodelen:
2493 try:
2498 try:
2494 # a full hex nodeid?
2499 # a full hex nodeid?
2495 node = bin(id)
2500 node = bin(id)
2496 self.rev(node)
2501 self.rev(node)
2497 return node
2502 return node
2498 except (binascii.Error, error.LookupError):
2503 except (binascii.Error, error.LookupError):
2499 pass
2504 pass
2500
2505
2501 def _partialmatch(self, id):
2506 def _partialmatch(self, id):
2502 # we don't care wdirfilenodeids as they should be always full hash
2507 # we don't care wdirfilenodeids as they should be always full hash
2503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2508 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2504 ambiguous = False
2509 ambiguous = False
2505 try:
2510 try:
2506 partial = self.index.partialmatch(id)
2511 partial = self.index.partialmatch(id)
2507 if partial and self.hasnode(partial):
2512 if partial and self.hasnode(partial):
2508 if maybewdir:
2513 if maybewdir:
2509 # single 'ff...' match in radix tree, ambiguous with wdir
2514 # single 'ff...' match in radix tree, ambiguous with wdir
2510 ambiguous = True
2515 ambiguous = True
2511 else:
2516 else:
2512 return partial
2517 return partial
2513 elif maybewdir:
2518 elif maybewdir:
2514 # no 'ff...' match in radix tree, wdir identified
2519 # no 'ff...' match in radix tree, wdir identified
2515 raise error.WdirUnsupported
2520 raise error.WdirUnsupported
2516 else:
2521 else:
2517 return None
2522 return None
2518 except error.RevlogError:
2523 except error.RevlogError:
2519 # parsers.c radix tree lookup gave multiple matches
2524 # parsers.c radix tree lookup gave multiple matches
2520 # fast path: for unfiltered changelog, radix tree is accurate
2525 # fast path: for unfiltered changelog, radix tree is accurate
2521 if not getattr(self, 'filteredrevs', None):
2526 if not getattr(self, 'filteredrevs', None):
2522 ambiguous = True
2527 ambiguous = True
2523 # fall through to slow path that filters hidden revisions
2528 # fall through to slow path that filters hidden revisions
2524 except (AttributeError, ValueError):
2529 except (AttributeError, ValueError):
2525 # we are pure python, or key is not hex
2530 # we are pure python, or key is not hex
2526 pass
2531 pass
2527 if ambiguous:
2532 if ambiguous:
2528 raise error.AmbiguousPrefixLookupError(
2533 raise error.AmbiguousPrefixLookupError(
2529 id, self.display_id, _(b'ambiguous identifier')
2534 id, self.display_id, _(b'ambiguous identifier')
2530 )
2535 )
2531
2536
2532 if id in self._pcache:
2537 if id in self._pcache:
2533 return self._pcache[id]
2538 return self._pcache[id]
2534
2539
2535 if len(id) <= 40:
2540 if len(id) <= 40:
2536 # hex(node)[:...]
2541 # hex(node)[:...]
2537 l = len(id) // 2 * 2 # grab an even number of digits
2542 l = len(id) // 2 * 2 # grab an even number of digits
2538 try:
2543 try:
2539 # we're dropping the last digit, so let's check that it's hex,
2544 # we're dropping the last digit, so let's check that it's hex,
2540 # to avoid the expensive computation below if it's not
2545 # to avoid the expensive computation below if it's not
2541 if len(id) % 2 > 0:
2546 if len(id) % 2 > 0:
2542 if not (id[-1] in hexdigits):
2547 if not (id[-1] in hexdigits):
2543 return None
2548 return None
2544 prefix = bin(id[:l])
2549 prefix = bin(id[:l])
2545 except binascii.Error:
2550 except binascii.Error:
2546 pass
2551 pass
2547 else:
2552 else:
2548 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2553 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2549 nl = [
2554 nl = [
2550 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2555 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2551 ]
2556 ]
2552 if self.nodeconstants.nullhex.startswith(id):
2557 if self.nodeconstants.nullhex.startswith(id):
2553 nl.append(self.nullid)
2558 nl.append(self.nullid)
2554 if len(nl) > 0:
2559 if len(nl) > 0:
2555 if len(nl) == 1 and not maybewdir:
2560 if len(nl) == 1 and not maybewdir:
2556 self._pcache[id] = nl[0]
2561 self._pcache[id] = nl[0]
2557 return nl[0]
2562 return nl[0]
2558 raise error.AmbiguousPrefixLookupError(
2563 raise error.AmbiguousPrefixLookupError(
2559 id, self.display_id, _(b'ambiguous identifier')
2564 id, self.display_id, _(b'ambiguous identifier')
2560 )
2565 )
2561 if maybewdir:
2566 if maybewdir:
2562 raise error.WdirUnsupported
2567 raise error.WdirUnsupported
2563 return None
2568 return None
2564
2569
2565 def lookup(self, id):
2570 def lookup(self, id):
2566 """locate a node based on:
2571 """locate a node based on:
2567 - revision number or str(revision number)
2572 - revision number or str(revision number)
2568 - nodeid or subset of hex nodeid
2573 - nodeid or subset of hex nodeid
2569 """
2574 """
2570 n = self._match(id)
2575 n = self._match(id)
2571 if n is not None:
2576 if n is not None:
2572 return n
2577 return n
2573 n = self._partialmatch(id)
2578 n = self._partialmatch(id)
2574 if n:
2579 if n:
2575 return n
2580 return n
2576
2581
2577 raise error.LookupError(id, self.display_id, _(b'no match found'))
2582 raise error.LookupError(id, self.display_id, _(b'no match found'))
2578
2583
2579 def shortest(self, node, minlength=1):
2584 def shortest(self, node, minlength=1):
2580 """Find the shortest unambiguous prefix that matches node."""
2585 """Find the shortest unambiguous prefix that matches node."""
2581
2586
2582 def isvalid(prefix):
2587 def isvalid(prefix):
2583 try:
2588 try:
2584 matchednode = self._partialmatch(prefix)
2589 matchednode = self._partialmatch(prefix)
2585 except error.AmbiguousPrefixLookupError:
2590 except error.AmbiguousPrefixLookupError:
2586 return False
2591 return False
2587 except error.WdirUnsupported:
2592 except error.WdirUnsupported:
2588 # single 'ff...' match
2593 # single 'ff...' match
2589 return True
2594 return True
2590 if matchednode is None:
2595 if matchednode is None:
2591 raise error.LookupError(node, self.display_id, _(b'no node'))
2596 raise error.LookupError(node, self.display_id, _(b'no node'))
2592 return True
2597 return True
2593
2598
2594 def maybewdir(prefix):
2599 def maybewdir(prefix):
2595 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2600 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2596
2601
2597 hexnode = hex(node)
2602 hexnode = hex(node)
2598
2603
2599 def disambiguate(hexnode, minlength):
2604 def disambiguate(hexnode, minlength):
2600 """Disambiguate against wdirid."""
2605 """Disambiguate against wdirid."""
2601 for length in range(minlength, len(hexnode) + 1):
2606 for length in range(minlength, len(hexnode) + 1):
2602 prefix = hexnode[:length]
2607 prefix = hexnode[:length]
2603 if not maybewdir(prefix):
2608 if not maybewdir(prefix):
2604 return prefix
2609 return prefix
2605
2610
2606 if not getattr(self, 'filteredrevs', None):
2611 if not getattr(self, 'filteredrevs', None):
2607 try:
2612 try:
2608 length = max(self.index.shortest(node), minlength)
2613 length = max(self.index.shortest(node), minlength)
2609 return disambiguate(hexnode, length)
2614 return disambiguate(hexnode, length)
2610 except error.RevlogError:
2615 except error.RevlogError:
2611 if node != self.nodeconstants.wdirid:
2616 if node != self.nodeconstants.wdirid:
2612 raise error.LookupError(
2617 raise error.LookupError(
2613 node, self.display_id, _(b'no node')
2618 node, self.display_id, _(b'no node')
2614 )
2619 )
2615 except AttributeError:
2620 except AttributeError:
2616 # Fall through to pure code
2621 # Fall through to pure code
2617 pass
2622 pass
2618
2623
2619 if node == self.nodeconstants.wdirid:
2624 if node == self.nodeconstants.wdirid:
2620 for length in range(minlength, len(hexnode) + 1):
2625 for length in range(minlength, len(hexnode) + 1):
2621 prefix = hexnode[:length]
2626 prefix = hexnode[:length]
2622 if isvalid(prefix):
2627 if isvalid(prefix):
2623 return prefix
2628 return prefix
2624
2629
2625 for length in range(minlength, len(hexnode) + 1):
2630 for length in range(minlength, len(hexnode) + 1):
2626 prefix = hexnode[:length]
2631 prefix = hexnode[:length]
2627 if isvalid(prefix):
2632 if isvalid(prefix):
2628 return disambiguate(hexnode, length)
2633 return disambiguate(hexnode, length)
2629
2634
2630 def cmp(self, node, text):
2635 def cmp(self, node, text):
2631 """compare text with a given file revision
2636 """compare text with a given file revision
2632
2637
2633 returns True if text is different than what is stored.
2638 returns True if text is different than what is stored.
2634 """
2639 """
2635 p1, p2 = self.parents(node)
2640 p1, p2 = self.parents(node)
2636 return storageutil.hashrevisionsha1(text, p1, p2) != node
2641 return storageutil.hashrevisionsha1(text, p1, p2) != node
2637
2642
2638 def deltaparent(self, rev):
2643 def deltaparent(self, rev):
2639 """return deltaparent of the given revision"""
2644 """return deltaparent of the given revision"""
2640 base = self.index[rev][3]
2645 base = self.index[rev][3]
2641 if base == rev:
2646 if base == rev:
2642 return nullrev
2647 return nullrev
2643 elif self.delta_config.general_delta:
2648 elif self.delta_config.general_delta:
2644 return base
2649 return base
2645 else:
2650 else:
2646 return rev - 1
2651 return rev - 1
2647
2652
2648 def issnapshot(self, rev):
2653 def issnapshot(self, rev):
2649 """tells whether rev is a snapshot"""
2654 """tells whether rev is a snapshot"""
2650 ret = self._inner.issnapshot(rev)
2655 ret = self._inner.issnapshot(rev)
2651 self.issnapshot = self._inner.issnapshot
2656 self.issnapshot = self._inner.issnapshot
2652 return ret
2657 return ret
2653
2658
2654 def snapshotdepth(self, rev):
2659 def snapshotdepth(self, rev):
2655 """number of snapshot in the chain before this one"""
2660 """number of snapshot in the chain before this one"""
2656 if not self.issnapshot(rev):
2661 if not self.issnapshot(rev):
2657 raise error.ProgrammingError(b'revision %d not a snapshot')
2662 raise error.ProgrammingError(b'revision %d not a snapshot')
2658 return len(self._inner._deltachain(rev)[0]) - 1
2663 return len(self._inner._deltachain(rev)[0]) - 1
2659
2664
2660 def revdiff(self, rev1, rev2):
2665 def revdiff(self, rev1, rev2):
2661 """return or calculate a delta between two revisions
2666 """return or calculate a delta between two revisions
2662
2667
2663 The delta calculated is in binary form and is intended to be written to
2668 The delta calculated is in binary form and is intended to be written to
2664 revlog data directly. So this function needs raw revision data.
2669 revlog data directly. So this function needs raw revision data.
2665 """
2670 """
2666 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2671 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2667 return bytes(self._inner._chunk(rev2))
2672 return bytes(self._inner._chunk(rev2))
2668
2673
2669 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2674 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2670
2675
2671 def revision(self, nodeorrev):
2676 def revision(self, nodeorrev):
2672 """return an uncompressed revision of a given node or revision
2677 """return an uncompressed revision of a given node or revision
2673 number.
2678 number.
2674 """
2679 """
2675 return self._revisiondata(nodeorrev)
2680 return self._revisiondata(nodeorrev)
2676
2681
2677 def sidedata(self, nodeorrev):
2682 def sidedata(self, nodeorrev):
2678 """a map of extra data related to the changeset but not part of the hash
2683 """a map of extra data related to the changeset but not part of the hash
2679
2684
2680 This function currently return a dictionary. However, more advanced
2685 This function currently return a dictionary. However, more advanced
2681 mapping object will likely be used in the future for a more
2686 mapping object will likely be used in the future for a more
2682 efficient/lazy code.
2687 efficient/lazy code.
2683 """
2688 """
2684 # deal with <nodeorrev> argument type
2689 # deal with <nodeorrev> argument type
2685 if isinstance(nodeorrev, int):
2690 if isinstance(nodeorrev, int):
2686 rev = nodeorrev
2691 rev = nodeorrev
2687 else:
2692 else:
2688 rev = self.rev(nodeorrev)
2693 rev = self.rev(nodeorrev)
2689 return self._sidedata(rev)
2694 return self._sidedata(rev)
2690
2695
2691 def _rawtext(self, node, rev):
2696 def _rawtext(self, node, rev):
2692 """return the possibly unvalidated rawtext for a revision
2697 """return the possibly unvalidated rawtext for a revision
2693
2698
2694 returns (rev, rawtext, validated)
2699 returns (rev, rawtext, validated)
2695 """
2700 """
2696 # Check if we have the entry in cache
2701 # Check if we have the entry in cache
2697 # The cache entry looks like (node, rev, rawtext)
2702 # The cache entry looks like (node, rev, rawtext)
2698 if self._inner._revisioncache:
2703 if self._inner._revisioncache:
2699 if self._inner._revisioncache[0] == node:
2704 if self._inner._revisioncache[0] == node:
2700 return (rev, self._inner._revisioncache[2], True)
2705 return (rev, self._inner._revisioncache[2], True)
2701
2706
2702 if rev is None:
2707 if rev is None:
2703 rev = self.rev(node)
2708 rev = self.rev(node)
2704
2709
2705 return self._inner.raw_text(node, rev)
2710 return self._inner.raw_text(node, rev)
2706
2711
2707 def _revisiondata(self, nodeorrev, raw=False):
2712 def _revisiondata(self, nodeorrev, raw=False):
2708 # deal with <nodeorrev> argument type
2713 # deal with <nodeorrev> argument type
2709 if isinstance(nodeorrev, int):
2714 if isinstance(nodeorrev, int):
2710 rev = nodeorrev
2715 rev = nodeorrev
2711 node = self.node(rev)
2716 node = self.node(rev)
2712 else:
2717 else:
2713 node = nodeorrev
2718 node = nodeorrev
2714 rev = None
2719 rev = None
2715
2720
2716 # fast path the special `nullid` rev
2721 # fast path the special `nullid` rev
2717 if node == self.nullid:
2722 if node == self.nullid:
2718 return b""
2723 return b""
2719
2724
2720 # ``rawtext`` is the text as stored inside the revlog. Might be the
2725 # ``rawtext`` is the text as stored inside the revlog. Might be the
2721 # revision or might need to be processed to retrieve the revision.
2726 # revision or might need to be processed to retrieve the revision.
2722 rev, rawtext, validated = self._rawtext(node, rev)
2727 rev, rawtext, validated = self._rawtext(node, rev)
2723
2728
2724 if raw and validated:
2729 if raw and validated:
2725 # if we don't want to process the raw text and that raw
2730 # if we don't want to process the raw text and that raw
2726 # text is cached, we can exit early.
2731 # text is cached, we can exit early.
2727 return rawtext
2732 return rawtext
2728 if rev is None:
2733 if rev is None:
2729 rev = self.rev(node)
2734 rev = self.rev(node)
2730 # the revlog's flag for this revision
2735 # the revlog's flag for this revision
2731 # (usually alter its state or content)
2736 # (usually alter its state or content)
2732 flags = self.flags(rev)
2737 flags = self.flags(rev)
2733
2738
2734 if validated and flags == REVIDX_DEFAULT_FLAGS:
2739 if validated and flags == REVIDX_DEFAULT_FLAGS:
2735 # no extra flags set, no flag processor runs, text = rawtext
2740 # no extra flags set, no flag processor runs, text = rawtext
2736 return rawtext
2741 return rawtext
2737
2742
2738 if raw:
2743 if raw:
2739 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2744 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2740 text = rawtext
2745 text = rawtext
2741 else:
2746 else:
2742 r = flagutil.processflagsread(self, rawtext, flags)
2747 r = flagutil.processflagsread(self, rawtext, flags)
2743 text, validatehash = r
2748 text, validatehash = r
2744 if validatehash:
2749 if validatehash:
2745 self.checkhash(text, node, rev=rev)
2750 self.checkhash(text, node, rev=rev)
2746 if not validated:
2751 if not validated:
2747 self._inner._revisioncache = (node, rev, rawtext)
2752 self._inner._revisioncache = (node, rev, rawtext)
2748
2753
2749 return text
2754 return text
2750
2755
2751 def _sidedata(self, rev):
2756 def _sidedata(self, rev):
2752 """Return the sidedata for a given revision number."""
2757 """Return the sidedata for a given revision number."""
2753 sidedata_end = None
2758 sidedata_end = None
2754 if self._docket is not None:
2759 if self._docket is not None:
2755 sidedata_end = self._docket.sidedata_end
2760 sidedata_end = self._docket.sidedata_end
2756 return self._inner.sidedata(rev, sidedata_end)
2761 return self._inner.sidedata(rev, sidedata_end)
2757
2762
2758 def rawdata(self, nodeorrev):
2763 def rawdata(self, nodeorrev):
2759 """return an uncompressed raw data of a given node or revision number."""
2764 """return an uncompressed raw data of a given node or revision number."""
2760 return self._revisiondata(nodeorrev, raw=True)
2765 return self._revisiondata(nodeorrev, raw=True)
2761
2766
2762 def hash(self, text, p1, p2):
2767 def hash(self, text, p1, p2):
2763 """Compute a node hash.
2768 """Compute a node hash.
2764
2769
2765 Available as a function so that subclasses can replace the hash
2770 Available as a function so that subclasses can replace the hash
2766 as needed.
2771 as needed.
2767 """
2772 """
2768 return storageutil.hashrevisionsha1(text, p1, p2)
2773 return storageutil.hashrevisionsha1(text, p1, p2)
2769
2774
2770 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2775 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2771 """Check node hash integrity.
2776 """Check node hash integrity.
2772
2777
2773 Available as a function so that subclasses can extend hash mismatch
2778 Available as a function so that subclasses can extend hash mismatch
2774 behaviors as needed.
2779 behaviors as needed.
2775 """
2780 """
2776 try:
2781 try:
2777 if p1 is None and p2 is None:
2782 if p1 is None and p2 is None:
2778 p1, p2 = self.parents(node)
2783 p1, p2 = self.parents(node)
2779 if node != self.hash(text, p1, p2):
2784 if node != self.hash(text, p1, p2):
2780 # Clear the revision cache on hash failure. The revision cache
2785 # Clear the revision cache on hash failure. The revision cache
2781 # only stores the raw revision and clearing the cache does have
2786 # only stores the raw revision and clearing the cache does have
2782 # the side-effect that we won't have a cache hit when the raw
2787 # the side-effect that we won't have a cache hit when the raw
2783 # revision data is accessed. But this case should be rare and
2788 # revision data is accessed. But this case should be rare and
2784 # it is extra work to teach the cache about the hash
2789 # it is extra work to teach the cache about the hash
2785 # verification state.
2790 # verification state.
2786 if (
2791 if (
2787 self._inner._revisioncache
2792 self._inner._revisioncache
2788 and self._inner._revisioncache[0] == node
2793 and self._inner._revisioncache[0] == node
2789 ):
2794 ):
2790 self._inner._revisioncache = None
2795 self._inner._revisioncache = None
2791
2796
2792 revornode = rev
2797 revornode = rev
2793 if revornode is None:
2798 if revornode is None:
2794 revornode = templatefilters.short(hex(node))
2799 revornode = templatefilters.short(hex(node))
2795 raise error.RevlogError(
2800 raise error.RevlogError(
2796 _(b"integrity check failed on %s:%s")
2801 _(b"integrity check failed on %s:%s")
2797 % (self.display_id, pycompat.bytestr(revornode))
2802 % (self.display_id, pycompat.bytestr(revornode))
2798 )
2803 )
2799 except error.RevlogError:
2804 except error.RevlogError:
2800 if self.feature_config.censorable and storageutil.iscensoredtext(
2805 if self.feature_config.censorable and storageutil.iscensoredtext(
2801 text
2806 text
2802 ):
2807 ):
2803 raise error.CensoredNodeError(self.display_id, node, text)
2808 raise error.CensoredNodeError(self.display_id, node, text)
2804 raise
2809 raise
2805
2810
2806 @property
2811 @property
2807 def _split_index_file(self):
2812 def _split_index_file(self):
2808 """the path where to expect the index of an ongoing splitting operation
2813 """the path where to expect the index of an ongoing splitting operation
2809
2814
2810 The file will only exist if a splitting operation is in progress, but
2815 The file will only exist if a splitting operation is in progress, but
2811 it is always expected at the same location."""
2816 it is always expected at the same location."""
2812 parts = self.radix.split(b'/')
2817 parts = self.radix.split(b'/')
2813 if len(parts) > 1:
2818 if len(parts) > 1:
2814 # adds a '-s' prefix to the ``data/` or `meta/` base
2819 # adds a '-s' prefix to the ``data/` or `meta/` base
2815 head = parts[0] + b'-s'
2820 head = parts[0] + b'-s'
2816 mids = parts[1:-1]
2821 mids = parts[1:-1]
2817 tail = parts[-1] + b'.i'
2822 tail = parts[-1] + b'.i'
2818 pieces = [head] + mids + [tail]
2823 pieces = [head] + mids + [tail]
2819 return b'/'.join(pieces)
2824 return b'/'.join(pieces)
2820 else:
2825 else:
2821 # the revlog is stored at the root of the store (changelog or
2826 # the revlog is stored at the root of the store (changelog or
2822 # manifest), no risk of collision.
2827 # manifest), no risk of collision.
2823 return self.radix + b'.i.s'
2828 return self.radix + b'.i.s'
2824
2829
2825 def _enforceinlinesize(self, tr, side_write=True):
2830 def _enforceinlinesize(self, tr, side_write=True):
2826 """Check if the revlog is too big for inline and convert if so.
2831 """Check if the revlog is too big for inline and convert if so.
2827
2832
2828 This should be called after revisions are added to the revlog. If the
2833 This should be called after revisions are added to the revlog. If the
2829 revlog has grown too large to be an inline revlog, it will convert it
2834 revlog has grown too large to be an inline revlog, it will convert it
2830 to use multiple index and data files.
2835 to use multiple index and data files.
2831 """
2836 """
2832 tiprev = len(self) - 1
2837 tiprev = len(self) - 1
2833 total_size = self.start(tiprev) + self.length(tiprev)
2838 total_size = self.start(tiprev) + self.length(tiprev)
2834 if not self._inline or (self._may_inline and total_size < _maxinline):
2839 if not self._inline or (self._may_inline and total_size < _maxinline):
2835 return
2840 return
2836
2841
2837 if self._docket is not None:
2842 if self._docket is not None:
2838 msg = b"inline revlog should not have a docket"
2843 msg = b"inline revlog should not have a docket"
2839 raise error.ProgrammingError(msg)
2844 raise error.ProgrammingError(msg)
2840
2845
2841 # In the common case, we enforce inline size because the revlog has
2846 # In the common case, we enforce inline size because the revlog has
2842 # been appened too. And in such case, it must have an initial offset
2847 # been appened too. And in such case, it must have an initial offset
2843 # recorded in the transaction.
2848 # recorded in the transaction.
2844 troffset = tr.findoffset(self._inner.canonical_index_file)
2849 troffset = tr.findoffset(self._inner.canonical_index_file)
2845 pre_touched = troffset is not None
2850 pre_touched = troffset is not None
2846 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2851 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2847 raise error.RevlogError(
2852 raise error.RevlogError(
2848 _(b"%s not found in the transaction") % self._indexfile
2853 _(b"%s not found in the transaction") % self._indexfile
2849 )
2854 )
2850
2855
2851 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2856 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2852 tr.add(self._datafile, 0)
2857 tr.add(self._datafile, 0)
2853
2858
2854 new_index_file_path = None
2859 new_index_file_path = None
2855 if side_write:
2860 if side_write:
2856 old_index_file_path = self._indexfile
2861 old_index_file_path = self._indexfile
2857 new_index_file_path = self._split_index_file
2862 new_index_file_path = self._split_index_file
2858 opener = self.opener
2863 opener = self.opener
2859 weak_self = weakref.ref(self)
2864 weak_self = weakref.ref(self)
2860
2865
2861 # the "split" index replace the real index when the transaction is
2866 # the "split" index replace the real index when the transaction is
2862 # finalized
2867 # finalized
2863 def finalize_callback(tr):
2868 def finalize_callback(tr):
2864 opener.rename(
2869 opener.rename(
2865 new_index_file_path,
2870 new_index_file_path,
2866 old_index_file_path,
2871 old_index_file_path,
2867 checkambig=True,
2872 checkambig=True,
2868 )
2873 )
2869 maybe_self = weak_self()
2874 maybe_self = weak_self()
2870 if maybe_self is not None:
2875 if maybe_self is not None:
2871 maybe_self._indexfile = old_index_file_path
2876 maybe_self._indexfile = old_index_file_path
2872 maybe_self._inner.index_file = maybe_self._indexfile
2877 maybe_self._inner.index_file = maybe_self._indexfile
2873
2878
2874 def abort_callback(tr):
2879 def abort_callback(tr):
2875 maybe_self = weak_self()
2880 maybe_self = weak_self()
2876 if maybe_self is not None:
2881 if maybe_self is not None:
2877 maybe_self._indexfile = old_index_file_path
2882 maybe_self._indexfile = old_index_file_path
2878 maybe_self._inner.inline = True
2883 maybe_self._inner.inline = True
2879 maybe_self._inner.index_file = old_index_file_path
2884 maybe_self._inner.index_file = old_index_file_path
2880
2885
2881 tr.registertmp(new_index_file_path)
2886 tr.registertmp(new_index_file_path)
2882 if self.target[1] is not None:
2887 if self.target[1] is not None:
2883 callback_id = b'000-revlog-split-%d-%s' % self.target
2888 callback_id = b'000-revlog-split-%d-%s' % self.target
2884 else:
2889 else:
2885 callback_id = b'000-revlog-split-%d' % self.target[0]
2890 callback_id = b'000-revlog-split-%d' % self.target[0]
2886 tr.addfinalize(callback_id, finalize_callback)
2891 tr.addfinalize(callback_id, finalize_callback)
2887 tr.addabort(callback_id, abort_callback)
2892 tr.addabort(callback_id, abort_callback)
2888
2893
2889 self._format_flags &= ~FLAG_INLINE_DATA
2894 self._format_flags &= ~FLAG_INLINE_DATA
2890 self._inner.split_inline(
2895 self._inner.split_inline(
2891 tr,
2896 tr,
2892 self._format_flags | self._format_version,
2897 self._format_flags | self._format_version,
2893 new_index_file_path=new_index_file_path,
2898 new_index_file_path=new_index_file_path,
2894 )
2899 )
2895
2900
2896 self._inline = False
2901 self._inline = False
2897 if new_index_file_path is not None:
2902 if new_index_file_path is not None:
2898 self._indexfile = new_index_file_path
2903 self._indexfile = new_index_file_path
2899
2904
2900 nodemaputil.setup_persistent_nodemap(tr, self)
2905 nodemaputil.setup_persistent_nodemap(tr, self)
2901
2906
2902 def _nodeduplicatecallback(self, transaction, node):
2907 def _nodeduplicatecallback(self, transaction, node):
2903 """called when trying to add a node already stored."""
2908 """called when trying to add a node already stored."""
2904
2909
2905 @contextlib.contextmanager
2910 @contextlib.contextmanager
2906 def reading(self):
2911 def reading(self):
2907 with self._inner.reading():
2912 with self._inner.reading():
2908 yield
2913 yield
2909
2914
2910 @contextlib.contextmanager
2915 @contextlib.contextmanager
2911 def _writing(self, transaction):
2916 def _writing(self, transaction):
2912 if self._trypending:
2917 if self._trypending:
2913 msg = b'try to write in a `trypending` revlog: %s'
2918 msg = b'try to write in a `trypending` revlog: %s'
2914 msg %= self.display_id
2919 msg %= self.display_id
2915 raise error.ProgrammingError(msg)
2920 raise error.ProgrammingError(msg)
2916 if self._inner.is_writing:
2921 if self._inner.is_writing:
2917 yield
2922 yield
2918 else:
2923 else:
2919 data_end = None
2924 data_end = None
2920 sidedata_end = None
2925 sidedata_end = None
2921 if self._docket is not None:
2926 if self._docket is not None:
2922 data_end = self._docket.data_end
2927 data_end = self._docket.data_end
2923 sidedata_end = self._docket.sidedata_end
2928 sidedata_end = self._docket.sidedata_end
2924 with self._inner.writing(
2929 with self._inner.writing(
2925 transaction,
2930 transaction,
2926 data_end=data_end,
2931 data_end=data_end,
2927 sidedata_end=sidedata_end,
2932 sidedata_end=sidedata_end,
2928 ):
2933 ):
2929 yield
2934 yield
2930 if self._docket is not None:
2935 if self._docket is not None:
2931 self._write_docket(transaction)
2936 self._write_docket(transaction)
2932
2937
2933 @property
2938 @property
2934 def is_delaying(self):
2939 def is_delaying(self):
2935 return self._inner.is_delaying
2940 return self._inner.is_delaying
2936
2941
2937 def _write_docket(self, transaction):
2942 def _write_docket(self, transaction):
2938 """write the current docket on disk
2943 """write the current docket on disk
2939
2944
2940 Exist as a method to help changelog to implement transaction logic
2945 Exist as a method to help changelog to implement transaction logic
2941
2946
2942 We could also imagine using the same transaction logic for all revlog
2947 We could also imagine using the same transaction logic for all revlog
2943 since docket are cheap."""
2948 since docket are cheap."""
2944 self._docket.write(transaction)
2949 self._docket.write(transaction)
2945
2950
2946 def addrevision(
2951 def addrevision(
2947 self,
2952 self,
2948 text,
2953 text,
2949 transaction,
2954 transaction,
2950 link,
2955 link,
2951 p1,
2956 p1,
2952 p2,
2957 p2,
2953 cachedelta=None,
2958 cachedelta=None,
2954 node=None,
2959 node=None,
2955 flags=REVIDX_DEFAULT_FLAGS,
2960 flags=REVIDX_DEFAULT_FLAGS,
2956 deltacomputer=None,
2961 deltacomputer=None,
2957 sidedata=None,
2962 sidedata=None,
2958 ):
2963 ):
2959 """add a revision to the log
2964 """add a revision to the log
2960
2965
2961 text - the revision data to add
2966 text - the revision data to add
2962 transaction - the transaction object used for rollback
2967 transaction - the transaction object used for rollback
2963 link - the linkrev data to add
2968 link - the linkrev data to add
2964 p1, p2 - the parent nodeids of the revision
2969 p1, p2 - the parent nodeids of the revision
2965 cachedelta - an optional precomputed delta
2970 cachedelta - an optional precomputed delta
2966 node - nodeid of revision; typically node is not specified, and it is
2971 node - nodeid of revision; typically node is not specified, and it is
2967 computed by default as hash(text, p1, p2), however subclasses might
2972 computed by default as hash(text, p1, p2), however subclasses might
2968 use different hashing method (and override checkhash() in such case)
2973 use different hashing method (and override checkhash() in such case)
2969 flags - the known flags to set on the revision
2974 flags - the known flags to set on the revision
2970 deltacomputer - an optional deltacomputer instance shared between
2975 deltacomputer - an optional deltacomputer instance shared between
2971 multiple calls
2976 multiple calls
2972 """
2977 """
2973 if link == nullrev:
2978 if link == nullrev:
2974 raise error.RevlogError(
2979 raise error.RevlogError(
2975 _(b"attempted to add linkrev -1 to %s") % self.display_id
2980 _(b"attempted to add linkrev -1 to %s") % self.display_id
2976 )
2981 )
2977
2982
2978 if sidedata is None:
2983 if sidedata is None:
2979 sidedata = {}
2984 sidedata = {}
2980 elif sidedata and not self.feature_config.has_side_data:
2985 elif sidedata and not self.feature_config.has_side_data:
2981 raise error.ProgrammingError(
2986 raise error.ProgrammingError(
2982 _(b"trying to add sidedata to a revlog who don't support them")
2987 _(b"trying to add sidedata to a revlog who don't support them")
2983 )
2988 )
2984
2989
2985 if flags:
2990 if flags:
2986 node = node or self.hash(text, p1, p2)
2991 node = node or self.hash(text, p1, p2)
2987
2992
2988 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2993 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2989
2994
2990 # If the flag processor modifies the revision data, ignore any provided
2995 # If the flag processor modifies the revision data, ignore any provided
2991 # cachedelta.
2996 # cachedelta.
2992 if rawtext != text:
2997 if rawtext != text:
2993 cachedelta = None
2998 cachedelta = None
2994
2999
2995 if len(rawtext) > _maxentrysize:
3000 if len(rawtext) > _maxentrysize:
2996 raise error.RevlogError(
3001 raise error.RevlogError(
2997 _(
3002 _(
2998 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3003 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2999 )
3004 )
3000 % (self.display_id, len(rawtext))
3005 % (self.display_id, len(rawtext))
3001 )
3006 )
3002
3007
3003 node = node or self.hash(rawtext, p1, p2)
3008 node = node or self.hash(rawtext, p1, p2)
3004 rev = self.index.get_rev(node)
3009 rev = self.index.get_rev(node)
3005 if rev is not None:
3010 if rev is not None:
3006 return rev
3011 return rev
3007
3012
3008 if validatehash:
3013 if validatehash:
3009 self.checkhash(rawtext, node, p1=p1, p2=p2)
3014 self.checkhash(rawtext, node, p1=p1, p2=p2)
3010
3015
3011 return self.addrawrevision(
3016 return self.addrawrevision(
3012 rawtext,
3017 rawtext,
3013 transaction,
3018 transaction,
3014 link,
3019 link,
3015 p1,
3020 p1,
3016 p2,
3021 p2,
3017 node,
3022 node,
3018 flags,
3023 flags,
3019 cachedelta=cachedelta,
3024 cachedelta=cachedelta,
3020 deltacomputer=deltacomputer,
3025 deltacomputer=deltacomputer,
3021 sidedata=sidedata,
3026 sidedata=sidedata,
3022 )
3027 )
3023
3028
3024 def addrawrevision(
3029 def addrawrevision(
3025 self,
3030 self,
3026 rawtext,
3031 rawtext,
3027 transaction,
3032 transaction,
3028 link,
3033 link,
3029 p1,
3034 p1,
3030 p2,
3035 p2,
3031 node,
3036 node,
3032 flags,
3037 flags,
3033 cachedelta=None,
3038 cachedelta=None,
3034 deltacomputer=None,
3039 deltacomputer=None,
3035 sidedata=None,
3040 sidedata=None,
3036 ):
3041 ):
3037 """add a raw revision with known flags, node and parents
3042 """add a raw revision with known flags, node and parents
3038 useful when reusing a revision not stored in this revlog (ex: received
3043 useful when reusing a revision not stored in this revlog (ex: received
3039 over wire, or read from an external bundle).
3044 over wire, or read from an external bundle).
3040 """
3045 """
3041 with self._writing(transaction):
3046 with self._writing(transaction):
3042 return self._addrevision(
3047 return self._addrevision(
3043 node,
3048 node,
3044 rawtext,
3049 rawtext,
3045 transaction,
3050 transaction,
3046 link,
3051 link,
3047 p1,
3052 p1,
3048 p2,
3053 p2,
3049 flags,
3054 flags,
3050 cachedelta,
3055 cachedelta,
3051 deltacomputer=deltacomputer,
3056 deltacomputer=deltacomputer,
3052 sidedata=sidedata,
3057 sidedata=sidedata,
3053 )
3058 )
3054
3059
3055 def compress(self, data):
3060 def compress(self, data):
3056 return self._inner.compress(data)
3061 return self._inner.compress(data)
3057
3062
3058 def decompress(self, data):
3063 def decompress(self, data):
3059 return self._inner.decompress(data)
3064 return self._inner.decompress(data)
3060
3065
3061 def _addrevision(
3066 def _addrevision(
3062 self,
3067 self,
3063 node,
3068 node,
3064 rawtext,
3069 rawtext,
3065 transaction,
3070 transaction,
3066 link,
3071 link,
3067 p1,
3072 p1,
3068 p2,
3073 p2,
3069 flags,
3074 flags,
3070 cachedelta,
3075 cachedelta,
3071 alwayscache=False,
3076 alwayscache=False,
3072 deltacomputer=None,
3077 deltacomputer=None,
3073 sidedata=None,
3078 sidedata=None,
3074 ):
3079 ):
3075 """internal function to add revisions to the log
3080 """internal function to add revisions to the log
3076
3081
3077 see addrevision for argument descriptions.
3082 see addrevision for argument descriptions.
3078
3083
3079 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3084 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3080
3085
3081 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3086 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3082 be used.
3087 be used.
3083
3088
3084 invariants:
3089 invariants:
3085 - rawtext is optional (can be None); if not set, cachedelta must be set.
3090 - rawtext is optional (can be None); if not set, cachedelta must be set.
3086 if both are set, they must correspond to each other.
3091 if both are set, they must correspond to each other.
3087 """
3092 """
3088 if node == self.nullid:
3093 if node == self.nullid:
3089 raise error.RevlogError(
3094 raise error.RevlogError(
3090 _(b"%s: attempt to add null revision") % self.display_id
3095 _(b"%s: attempt to add null revision") % self.display_id
3091 )
3096 )
3092 if (
3097 if (
3093 node == self.nodeconstants.wdirid
3098 node == self.nodeconstants.wdirid
3094 or node in self.nodeconstants.wdirfilenodeids
3099 or node in self.nodeconstants.wdirfilenodeids
3095 ):
3100 ):
3096 raise error.RevlogError(
3101 raise error.RevlogError(
3097 _(b"%s: attempt to add wdir revision") % self.display_id
3102 _(b"%s: attempt to add wdir revision") % self.display_id
3098 )
3103 )
3099 if self._inner._writinghandles is None:
3104 if self._inner._writinghandles is None:
3100 msg = b'adding revision outside `revlog._writing` context'
3105 msg = b'adding revision outside `revlog._writing` context'
3101 raise error.ProgrammingError(msg)
3106 raise error.ProgrammingError(msg)
3102
3107
3103 btext = [rawtext]
3108 btext = [rawtext]
3104
3109
3105 curr = len(self)
3110 curr = len(self)
3106 prev = curr - 1
3111 prev = curr - 1
3107
3112
3108 offset = self._get_data_offset(prev)
3113 offset = self._get_data_offset(prev)
3109
3114
3110 if self._concurrencychecker:
3115 if self._concurrencychecker:
3111 ifh, dfh, sdfh = self._inner._writinghandles
3116 ifh, dfh, sdfh = self._inner._writinghandles
3112 # XXX no checking for the sidedata file
3117 # XXX no checking for the sidedata file
3113 if self._inline:
3118 if self._inline:
3114 # offset is "as if" it were in the .d file, so we need to add on
3119 # offset is "as if" it were in the .d file, so we need to add on
3115 # the size of the entry metadata.
3120 # the size of the entry metadata.
3116 self._concurrencychecker(
3121 self._concurrencychecker(
3117 ifh, self._indexfile, offset + curr * self.index.entry_size
3122 ifh, self._indexfile, offset + curr * self.index.entry_size
3118 )
3123 )
3119 else:
3124 else:
3120 # Entries in the .i are a consistent size.
3125 # Entries in the .i are a consistent size.
3121 self._concurrencychecker(
3126 self._concurrencychecker(
3122 ifh, self._indexfile, curr * self.index.entry_size
3127 ifh, self._indexfile, curr * self.index.entry_size
3123 )
3128 )
3124 self._concurrencychecker(dfh, self._datafile, offset)
3129 self._concurrencychecker(dfh, self._datafile, offset)
3125
3130
3126 p1r, p2r = self.rev(p1), self.rev(p2)
3131 p1r, p2r = self.rev(p1), self.rev(p2)
3127
3132
3128 # full versions are inserted when the needed deltas
3133 # full versions are inserted when the needed deltas
3129 # become comparable to the uncompressed text
3134 # become comparable to the uncompressed text
3130 if rawtext is None:
3135 if rawtext is None:
3131 # need rawtext size, before changed by flag processors, which is
3136 # need rawtext size, before changed by flag processors, which is
3132 # the non-raw size. use revlog explicitly to avoid filelog's extra
3137 # the non-raw size. use revlog explicitly to avoid filelog's extra
3133 # logic that might remove metadata size.
3138 # logic that might remove metadata size.
3134 textlen = mdiff.patchedsize(
3139 textlen = mdiff.patchedsize(
3135 revlog.size(self, cachedelta[0]), cachedelta[1]
3140 revlog.size(self, cachedelta[0]), cachedelta[1]
3136 )
3141 )
3137 else:
3142 else:
3138 textlen = len(rawtext)
3143 textlen = len(rawtext)
3139
3144
3140 if deltacomputer is None:
3145 if deltacomputer is None:
3141 write_debug = None
3146 write_debug = None
3142 if self.delta_config.debug_delta:
3147 if self.delta_config.debug_delta:
3143 write_debug = transaction._report
3148 write_debug = transaction._report
3144 deltacomputer = deltautil.deltacomputer(
3149 deltacomputer = deltautil.deltacomputer(
3145 self, write_debug=write_debug
3150 self, write_debug=write_debug
3146 )
3151 )
3147
3152
3148 if cachedelta is not None and len(cachedelta) == 2:
3153 if cachedelta is not None and len(cachedelta) == 2:
3149 # If the cached delta has no information about how it should be
3154 # If the cached delta has no information about how it should be
3150 # reused, add the default reuse instruction according to the
3155 # reused, add the default reuse instruction according to the
3151 # revlog's configuration.
3156 # revlog's configuration.
3152 if (
3157 if (
3153 self.delta_config.general_delta
3158 self.delta_config.general_delta
3154 and self.delta_config.lazy_delta_base
3159 and self.delta_config.lazy_delta_base
3155 ):
3160 ):
3156 delta_base_reuse = DELTA_BASE_REUSE_TRY
3161 delta_base_reuse = DELTA_BASE_REUSE_TRY
3157 else:
3162 else:
3158 delta_base_reuse = DELTA_BASE_REUSE_NO
3163 delta_base_reuse = DELTA_BASE_REUSE_NO
3159 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3164 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3160
3165
3161 revinfo = revlogutils.revisioninfo(
3166 revinfo = revlogutils.revisioninfo(
3162 node,
3167 node,
3163 p1,
3168 p1,
3164 p2,
3169 p2,
3165 btext,
3170 btext,
3166 textlen,
3171 textlen,
3167 cachedelta,
3172 cachedelta,
3168 flags,
3173 flags,
3169 )
3174 )
3170
3175
3171 deltainfo = deltacomputer.finddeltainfo(revinfo)
3176 deltainfo = deltacomputer.finddeltainfo(revinfo)
3172
3177
3173 compression_mode = COMP_MODE_INLINE
3178 compression_mode = COMP_MODE_INLINE
3174 if self._docket is not None:
3179 if self._docket is not None:
3175 default_comp = self._docket.default_compression_header
3180 default_comp = self._docket.default_compression_header
3176 r = deltautil.delta_compression(default_comp, deltainfo)
3181 r = deltautil.delta_compression(default_comp, deltainfo)
3177 compression_mode, deltainfo = r
3182 compression_mode, deltainfo = r
3178
3183
3179 sidedata_compression_mode = COMP_MODE_INLINE
3184 sidedata_compression_mode = COMP_MODE_INLINE
3180 if sidedata and self.feature_config.has_side_data:
3185 if sidedata and self.feature_config.has_side_data:
3181 sidedata_compression_mode = COMP_MODE_PLAIN
3186 sidedata_compression_mode = COMP_MODE_PLAIN
3182 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3187 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3183 sidedata_offset = self._docket.sidedata_end
3188 sidedata_offset = self._docket.sidedata_end
3184 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3189 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3185 if (
3190 if (
3186 h != b'u'
3191 h != b'u'
3187 and comp_sidedata[0:1] != b'\0'
3192 and comp_sidedata[0:1] != b'\0'
3188 and len(comp_sidedata) < len(serialized_sidedata)
3193 and len(comp_sidedata) < len(serialized_sidedata)
3189 ):
3194 ):
3190 assert not h
3195 assert not h
3191 if (
3196 if (
3192 comp_sidedata[0:1]
3197 comp_sidedata[0:1]
3193 == self._docket.default_compression_header
3198 == self._docket.default_compression_header
3194 ):
3199 ):
3195 sidedata_compression_mode = COMP_MODE_DEFAULT
3200 sidedata_compression_mode = COMP_MODE_DEFAULT
3196 serialized_sidedata = comp_sidedata
3201 serialized_sidedata = comp_sidedata
3197 else:
3202 else:
3198 sidedata_compression_mode = COMP_MODE_INLINE
3203 sidedata_compression_mode = COMP_MODE_INLINE
3199 serialized_sidedata = comp_sidedata
3204 serialized_sidedata = comp_sidedata
3200 else:
3205 else:
3201 serialized_sidedata = b""
3206 serialized_sidedata = b""
3202 # Don't store the offset if the sidedata is empty, that way
3207 # Don't store the offset if the sidedata is empty, that way
3203 # we can easily detect empty sidedata and they will be no different
3208 # we can easily detect empty sidedata and they will be no different
3204 # than ones we manually add.
3209 # than ones we manually add.
3205 sidedata_offset = 0
3210 sidedata_offset = 0
3206
3211
3207 rank = RANK_UNKNOWN
3212 rank = RANK_UNKNOWN
3208 if self.feature_config.compute_rank:
3213 if self.feature_config.compute_rank:
3209 if (p1r, p2r) == (nullrev, nullrev):
3214 if (p1r, p2r) == (nullrev, nullrev):
3210 rank = 1
3215 rank = 1
3211 elif p1r != nullrev and p2r == nullrev:
3216 elif p1r != nullrev and p2r == nullrev:
3212 rank = 1 + self.fast_rank(p1r)
3217 rank = 1 + self.fast_rank(p1r)
3213 elif p1r == nullrev and p2r != nullrev:
3218 elif p1r == nullrev and p2r != nullrev:
3214 rank = 1 + self.fast_rank(p2r)
3219 rank = 1 + self.fast_rank(p2r)
3215 else: # merge node
3220 else: # merge node
3216 if rustdagop is not None and self.index.rust_ext_compat:
3221 if rustdagop is not None and self.index.rust_ext_compat:
3217 rank = rustdagop.rank(self.index, p1r, p2r)
3222 rank = rustdagop.rank(self.index, p1r, p2r)
3218 else:
3223 else:
3219 pmin, pmax = sorted((p1r, p2r))
3224 pmin, pmax = sorted((p1r, p2r))
3220 rank = 1 + self.fast_rank(pmax)
3225 rank = 1 + self.fast_rank(pmax)
3221 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3226 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3222
3227
3223 e = revlogutils.entry(
3228 e = revlogutils.entry(
3224 flags=flags,
3229 flags=flags,
3225 data_offset=offset,
3230 data_offset=offset,
3226 data_compressed_length=deltainfo.deltalen,
3231 data_compressed_length=deltainfo.deltalen,
3227 data_uncompressed_length=textlen,
3232 data_uncompressed_length=textlen,
3228 data_compression_mode=compression_mode,
3233 data_compression_mode=compression_mode,
3229 data_delta_base=deltainfo.base,
3234 data_delta_base=deltainfo.base,
3230 link_rev=link,
3235 link_rev=link,
3231 parent_rev_1=p1r,
3236 parent_rev_1=p1r,
3232 parent_rev_2=p2r,
3237 parent_rev_2=p2r,
3233 node_id=node,
3238 node_id=node,
3234 sidedata_offset=sidedata_offset,
3239 sidedata_offset=sidedata_offset,
3235 sidedata_compressed_length=len(serialized_sidedata),
3240 sidedata_compressed_length=len(serialized_sidedata),
3236 sidedata_compression_mode=sidedata_compression_mode,
3241 sidedata_compression_mode=sidedata_compression_mode,
3237 rank=rank,
3242 rank=rank,
3238 )
3243 )
3239
3244
3240 self.index.append(e)
3245 self.index.append(e)
3241 entry = self.index.entry_binary(curr)
3246 entry = self.index.entry_binary(curr)
3242 if curr == 0 and self._docket is None:
3247 if curr == 0 and self._docket is None:
3243 header = self._format_flags | self._format_version
3248 header = self._format_flags | self._format_version
3244 header = self.index.pack_header(header)
3249 header = self.index.pack_header(header)
3245 entry = header + entry
3250 entry = header + entry
3246 self._writeentry(
3251 self._writeentry(
3247 transaction,
3252 transaction,
3248 entry,
3253 entry,
3249 deltainfo.data,
3254 deltainfo.data,
3250 link,
3255 link,
3251 offset,
3256 offset,
3252 serialized_sidedata,
3257 serialized_sidedata,
3253 sidedata_offset,
3258 sidedata_offset,
3254 )
3259 )
3255
3260
3256 rawtext = btext[0]
3261 rawtext = btext[0]
3257
3262
3258 if alwayscache and rawtext is None:
3263 if alwayscache and rawtext is None:
3259 rawtext = deltacomputer.buildtext(revinfo)
3264 rawtext = deltacomputer.buildtext(revinfo)
3260
3265
3261 if type(rawtext) == bytes: # only accept immutable objects
3266 if type(rawtext) == bytes: # only accept immutable objects
3262 self._inner._revisioncache = (node, curr, rawtext)
3267 self._inner._revisioncache = (node, curr, rawtext)
3263 self._chainbasecache[curr] = deltainfo.chainbase
3268 self._chainbasecache[curr] = deltainfo.chainbase
3264 return curr
3269 return curr
3265
3270
3266 def _get_data_offset(self, prev):
3271 def _get_data_offset(self, prev):
3267 """Returns the current offset in the (in-transaction) data file.
3272 """Returns the current offset in the (in-transaction) data file.
3268 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3273 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3269 file to store that information: since sidedata can be rewritten to the
3274 file to store that information: since sidedata can be rewritten to the
3270 end of the data file within a transaction, you can have cases where, for
3275 end of the data file within a transaction, you can have cases where, for
3271 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3276 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3272 to `n - 1`'s sidedata being written after `n`'s data.
3277 to `n - 1`'s sidedata being written after `n`'s data.
3273
3278
3274 TODO cache this in a docket file before getting out of experimental."""
3279 TODO cache this in a docket file before getting out of experimental."""
3275 if self._docket is None:
3280 if self._docket is None:
3276 return self.end(prev)
3281 return self.end(prev)
3277 else:
3282 else:
3278 return self._docket.data_end
3283 return self._docket.data_end
3279
3284
3280 def _writeentry(
3285 def _writeentry(
3281 self,
3286 self,
3282 transaction,
3287 transaction,
3283 entry,
3288 entry,
3284 data,
3289 data,
3285 link,
3290 link,
3286 offset,
3291 offset,
3287 sidedata,
3292 sidedata,
3288 sidedata_offset,
3293 sidedata_offset,
3289 ):
3294 ):
3290 # Files opened in a+ mode have inconsistent behavior on various
3295 # Files opened in a+ mode have inconsistent behavior on various
3291 # platforms. Windows requires that a file positioning call be made
3296 # platforms. Windows requires that a file positioning call be made
3292 # when the file handle transitions between reads and writes. See
3297 # when the file handle transitions between reads and writes. See
3293 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3298 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3294 # platforms, Python or the platform itself can be buggy. Some versions
3299 # platforms, Python or the platform itself can be buggy. Some versions
3295 # of Solaris have been observed to not append at the end of the file
3300 # of Solaris have been observed to not append at the end of the file
3296 # if the file was seeked to before the end. See issue4943 for more.
3301 # if the file was seeked to before the end. See issue4943 for more.
3297 #
3302 #
3298 # We work around this issue by inserting a seek() before writing.
3303 # We work around this issue by inserting a seek() before writing.
3299 # Note: This is likely not necessary on Python 3. However, because
3304 # Note: This is likely not necessary on Python 3. However, because
3300 # the file handle is reused for reads and may be seeked there, we need
3305 # the file handle is reused for reads and may be seeked there, we need
3301 # to be careful before changing this.
3306 # to be careful before changing this.
3302 index_end = data_end = sidedata_end = None
3307 index_end = data_end = sidedata_end = None
3303 if self._docket is not None:
3308 if self._docket is not None:
3304 index_end = self._docket.index_end
3309 index_end = self._docket.index_end
3305 data_end = self._docket.data_end
3310 data_end = self._docket.data_end
3306 sidedata_end = self._docket.sidedata_end
3311 sidedata_end = self._docket.sidedata_end
3307
3312
3308 files_end = self._inner.write_entry(
3313 files_end = self._inner.write_entry(
3309 transaction,
3314 transaction,
3310 entry,
3315 entry,
3311 data,
3316 data,
3312 link,
3317 link,
3313 offset,
3318 offset,
3314 sidedata,
3319 sidedata,
3315 sidedata_offset,
3320 sidedata_offset,
3316 index_end,
3321 index_end,
3317 data_end,
3322 data_end,
3318 sidedata_end,
3323 sidedata_end,
3319 )
3324 )
3320 self._enforceinlinesize(transaction)
3325 self._enforceinlinesize(transaction)
3321 if self._docket is not None:
3326 if self._docket is not None:
3322 self._docket.index_end = files_end[0]
3327 self._docket.index_end = files_end[0]
3323 self._docket.data_end = files_end[1]
3328 self._docket.data_end = files_end[1]
3324 self._docket.sidedata_end = files_end[2]
3329 self._docket.sidedata_end = files_end[2]
3325
3330
3326 nodemaputil.setup_persistent_nodemap(transaction, self)
3331 nodemaputil.setup_persistent_nodemap(transaction, self)
3327
3332
3328 def addgroup(
3333 def addgroup(
3329 self,
3334 self,
3330 deltas,
3335 deltas,
3331 linkmapper,
3336 linkmapper,
3332 transaction,
3337 transaction,
3333 alwayscache=False,
3338 alwayscache=False,
3334 addrevisioncb=None,
3339 addrevisioncb=None,
3335 duplicaterevisioncb=None,
3340 duplicaterevisioncb=None,
3336 debug_info=None,
3341 debug_info=None,
3337 delta_base_reuse_policy=None,
3342 delta_base_reuse_policy=None,
3338 ):
3343 ):
3339 """
3344 """
3340 add a delta group
3345 add a delta group
3341
3346
3342 given a set of deltas, add them to the revision log. the
3347 given a set of deltas, add them to the revision log. the
3343 first delta is against its parent, which should be in our
3348 first delta is against its parent, which should be in our
3344 log, the rest are against the previous delta.
3349 log, the rest are against the previous delta.
3345
3350
3346 If ``addrevisioncb`` is defined, it will be called with arguments of
3351 If ``addrevisioncb`` is defined, it will be called with arguments of
3347 this revlog and the node that was added.
3352 this revlog and the node that was added.
3348 """
3353 """
3349
3354
3350 if self._adding_group:
3355 if self._adding_group:
3351 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3356 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3352
3357
3353 # read the default delta-base reuse policy from revlog config if the
3358 # read the default delta-base reuse policy from revlog config if the
3354 # group did not specify one.
3359 # group did not specify one.
3355 if delta_base_reuse_policy is None:
3360 if delta_base_reuse_policy is None:
3356 if (
3361 if (
3357 self.delta_config.general_delta
3362 self.delta_config.general_delta
3358 and self.delta_config.lazy_delta_base
3363 and self.delta_config.lazy_delta_base
3359 ):
3364 ):
3360 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3365 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3361 else:
3366 else:
3362 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3367 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3363
3368
3364 self._adding_group = True
3369 self._adding_group = True
3365 empty = True
3370 empty = True
3366 try:
3371 try:
3367 with self._writing(transaction):
3372 with self._writing(transaction):
3368 write_debug = None
3373 write_debug = None
3369 if self.delta_config.debug_delta:
3374 if self.delta_config.debug_delta:
3370 write_debug = transaction._report
3375 write_debug = transaction._report
3371 deltacomputer = deltautil.deltacomputer(
3376 deltacomputer = deltautil.deltacomputer(
3372 self,
3377 self,
3373 write_debug=write_debug,
3378 write_debug=write_debug,
3374 debug_info=debug_info,
3379 debug_info=debug_info,
3375 )
3380 )
3376 # loop through our set of deltas
3381 # loop through our set of deltas
3377 for data in deltas:
3382 for data in deltas:
3378 (
3383 (
3379 node,
3384 node,
3380 p1,
3385 p1,
3381 p2,
3386 p2,
3382 linknode,
3387 linknode,
3383 deltabase,
3388 deltabase,
3384 delta,
3389 delta,
3385 flags,
3390 flags,
3386 sidedata,
3391 sidedata,
3387 ) = data
3392 ) = data
3388 link = linkmapper(linknode)
3393 link = linkmapper(linknode)
3389 flags = flags or REVIDX_DEFAULT_FLAGS
3394 flags = flags or REVIDX_DEFAULT_FLAGS
3390
3395
3391 rev = self.index.get_rev(node)
3396 rev = self.index.get_rev(node)
3392 if rev is not None:
3397 if rev is not None:
3393 # this can happen if two branches make the same change
3398 # this can happen if two branches make the same change
3394 self._nodeduplicatecallback(transaction, rev)
3399 self._nodeduplicatecallback(transaction, rev)
3395 if duplicaterevisioncb:
3400 if duplicaterevisioncb:
3396 duplicaterevisioncb(self, rev)
3401 duplicaterevisioncb(self, rev)
3397 empty = False
3402 empty = False
3398 continue
3403 continue
3399
3404
3400 for p in (p1, p2):
3405 for p in (p1, p2):
3401 if not self.index.has_node(p):
3406 if not self.index.has_node(p):
3402 raise error.LookupError(
3407 raise error.LookupError(
3403 p, self.radix, _(b'unknown parent')
3408 p, self.radix, _(b'unknown parent')
3404 )
3409 )
3405
3410
3406 if not self.index.has_node(deltabase):
3411 if not self.index.has_node(deltabase):
3407 raise error.LookupError(
3412 raise error.LookupError(
3408 deltabase, self.display_id, _(b'unknown delta base')
3413 deltabase, self.display_id, _(b'unknown delta base')
3409 )
3414 )
3410
3415
3411 baserev = self.rev(deltabase)
3416 baserev = self.rev(deltabase)
3412
3417
3413 if baserev != nullrev and self.iscensored(baserev):
3418 if baserev != nullrev and self.iscensored(baserev):
3414 # if base is censored, delta must be full replacement in a
3419 # if base is censored, delta must be full replacement in a
3415 # single patch operation
3420 # single patch operation
3416 hlen = struct.calcsize(b">lll")
3421 hlen = struct.calcsize(b">lll")
3417 oldlen = self.rawsize(baserev)
3422 oldlen = self.rawsize(baserev)
3418 newlen = len(delta) - hlen
3423 newlen = len(delta) - hlen
3419 if delta[:hlen] != mdiff.replacediffheader(
3424 if delta[:hlen] != mdiff.replacediffheader(
3420 oldlen, newlen
3425 oldlen, newlen
3421 ):
3426 ):
3422 raise error.CensoredBaseError(
3427 raise error.CensoredBaseError(
3423 self.display_id, self.node(baserev)
3428 self.display_id, self.node(baserev)
3424 )
3429 )
3425
3430
3426 if not flags and self._peek_iscensored(baserev, delta):
3431 if not flags and self._peek_iscensored(baserev, delta):
3427 flags |= REVIDX_ISCENSORED
3432 flags |= REVIDX_ISCENSORED
3428
3433
3429 # We assume consumers of addrevisioncb will want to retrieve
3434 # We assume consumers of addrevisioncb will want to retrieve
3430 # the added revision, which will require a call to
3435 # the added revision, which will require a call to
3431 # revision(). revision() will fast path if there is a cache
3436 # revision(). revision() will fast path if there is a cache
3432 # hit. So, we tell _addrevision() to always cache in this case.
3437 # hit. So, we tell _addrevision() to always cache in this case.
3433 # We're only using addgroup() in the context of changegroup
3438 # We're only using addgroup() in the context of changegroup
3434 # generation so the revision data can always be handled as raw
3439 # generation so the revision data can always be handled as raw
3435 # by the flagprocessor.
3440 # by the flagprocessor.
3436 rev = self._addrevision(
3441 rev = self._addrevision(
3437 node,
3442 node,
3438 None,
3443 None,
3439 transaction,
3444 transaction,
3440 link,
3445 link,
3441 p1,
3446 p1,
3442 p2,
3447 p2,
3443 flags,
3448 flags,
3444 (baserev, delta, delta_base_reuse_policy),
3449 (baserev, delta, delta_base_reuse_policy),
3445 alwayscache=alwayscache,
3450 alwayscache=alwayscache,
3446 deltacomputer=deltacomputer,
3451 deltacomputer=deltacomputer,
3447 sidedata=sidedata,
3452 sidedata=sidedata,
3448 )
3453 )
3449
3454
3450 if addrevisioncb:
3455 if addrevisioncb:
3451 addrevisioncb(self, rev)
3456 addrevisioncb(self, rev)
3452 empty = False
3457 empty = False
3453 finally:
3458 finally:
3454 self._adding_group = False
3459 self._adding_group = False
3455 return not empty
3460 return not empty
3456
3461
3457 def iscensored(self, rev):
3462 def iscensored(self, rev):
3458 """Check if a file revision is censored."""
3463 """Check if a file revision is censored."""
3459 if not self.feature_config.censorable:
3464 if not self.feature_config.censorable:
3460 return False
3465 return False
3461
3466
3462 return self.flags(rev) & REVIDX_ISCENSORED
3467 return self.flags(rev) & REVIDX_ISCENSORED
3463
3468
3464 def _peek_iscensored(self, baserev, delta):
3469 def _peek_iscensored(self, baserev, delta):
3465 """Quickly check if a delta produces a censored revision."""
3470 """Quickly check if a delta produces a censored revision."""
3466 if not self.feature_config.censorable:
3471 if not self.feature_config.censorable:
3467 return False
3472 return False
3468
3473
3469 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3474 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3470
3475
3471 def getstrippoint(self, minlink):
3476 def getstrippoint(self, minlink):
3472 """find the minimum rev that must be stripped to strip the linkrev
3477 """find the minimum rev that must be stripped to strip the linkrev
3473
3478
3474 Returns a tuple containing the minimum rev and a set of all revs that
3479 Returns a tuple containing the minimum rev and a set of all revs that
3475 have linkrevs that will be broken by this strip.
3480 have linkrevs that will be broken by this strip.
3476 """
3481 """
3477 return storageutil.resolvestripinfo(
3482 return storageutil.resolvestripinfo(
3478 minlink,
3483 minlink,
3479 len(self) - 1,
3484 len(self) - 1,
3480 self.headrevs(),
3485 self.headrevs(),
3481 self.linkrev,
3486 self.linkrev,
3482 self.parentrevs,
3487 self.parentrevs,
3483 )
3488 )
3484
3489
3485 def strip(self, minlink, transaction):
3490 def strip(self, minlink, transaction):
3486 """truncate the revlog on the first revision with a linkrev >= minlink
3491 """truncate the revlog on the first revision with a linkrev >= minlink
3487
3492
3488 This function is called when we're stripping revision minlink and
3493 This function is called when we're stripping revision minlink and
3489 its descendants from the repository.
3494 its descendants from the repository.
3490
3495
3491 We have to remove all revisions with linkrev >= minlink, because
3496 We have to remove all revisions with linkrev >= minlink, because
3492 the equivalent changelog revisions will be renumbered after the
3497 the equivalent changelog revisions will be renumbered after the
3493 strip.
3498 strip.
3494
3499
3495 So we truncate the revlog on the first of these revisions, and
3500 So we truncate the revlog on the first of these revisions, and
3496 trust that the caller has saved the revisions that shouldn't be
3501 trust that the caller has saved the revisions that shouldn't be
3497 removed and that it'll re-add them after this truncation.
3502 removed and that it'll re-add them after this truncation.
3498 """
3503 """
3499 if len(self) == 0:
3504 if len(self) == 0:
3500 return
3505 return
3501
3506
3502 rev, _ = self.getstrippoint(minlink)
3507 rev, _ = self.getstrippoint(minlink)
3503 if rev == len(self):
3508 if rev == len(self):
3504 return
3509 return
3505
3510
3506 # first truncate the files on disk
3511 # first truncate the files on disk
3507 data_end = self.start(rev)
3512 data_end = self.start(rev)
3508 if not self._inline:
3513 if not self._inline:
3509 transaction.add(self._datafile, data_end)
3514 transaction.add(self._datafile, data_end)
3510 end = rev * self.index.entry_size
3515 end = rev * self.index.entry_size
3511 else:
3516 else:
3512 end = data_end + (rev * self.index.entry_size)
3517 end = data_end + (rev * self.index.entry_size)
3513
3518
3514 if self._sidedatafile:
3519 if self._sidedatafile:
3515 sidedata_end = self.sidedata_cut_off(rev)
3520 sidedata_end = self.sidedata_cut_off(rev)
3516 transaction.add(self._sidedatafile, sidedata_end)
3521 transaction.add(self._sidedatafile, sidedata_end)
3517
3522
3518 transaction.add(self._indexfile, end)
3523 transaction.add(self._indexfile, end)
3519 if self._docket is not None:
3524 if self._docket is not None:
3520 # XXX we could, leverage the docket while stripping. However it is
3525 # XXX we could, leverage the docket while stripping. However it is
3521 # not powerfull enough at the time of this comment
3526 # not powerfull enough at the time of this comment
3522 self._docket.index_end = end
3527 self._docket.index_end = end
3523 self._docket.data_end = data_end
3528 self._docket.data_end = data_end
3524 self._docket.sidedata_end = sidedata_end
3529 self._docket.sidedata_end = sidedata_end
3525 self._docket.write(transaction, stripping=True)
3530 self._docket.write(transaction, stripping=True)
3526
3531
3527 # then reset internal state in memory to forget those revisions
3532 # then reset internal state in memory to forget those revisions
3528 self._chaininfocache = util.lrucachedict(500)
3533 self._chaininfocache = util.lrucachedict(500)
3529 self._inner.clear_cache()
3534 self._inner.clear_cache()
3530
3535
3531 del self.index[rev:-1]
3536 del self.index[rev:-1]
3532
3537
3533 def checksize(self):
3538 def checksize(self):
3534 """Check size of index and data files
3539 """Check size of index and data files
3535
3540
3536 return a (dd, di) tuple.
3541 return a (dd, di) tuple.
3537 - dd: extra bytes for the "data" file
3542 - dd: extra bytes for the "data" file
3538 - di: extra bytes for the "index" file
3543 - di: extra bytes for the "index" file
3539
3544
3540 A healthy revlog will return (0, 0).
3545 A healthy revlog will return (0, 0).
3541 """
3546 """
3542 expected = 0
3547 expected = 0
3543 if len(self):
3548 if len(self):
3544 expected = max(0, self.end(len(self) - 1))
3549 expected = max(0, self.end(len(self) - 1))
3545
3550
3546 try:
3551 try:
3547 with self._datafp() as f:
3552 with self._datafp() as f:
3548 f.seek(0, io.SEEK_END)
3553 f.seek(0, io.SEEK_END)
3549 actual = f.tell()
3554 actual = f.tell()
3550 dd = actual - expected
3555 dd = actual - expected
3551 except FileNotFoundError:
3556 except FileNotFoundError:
3552 dd = 0
3557 dd = 0
3553
3558
3554 try:
3559 try:
3555 f = self.opener(self._indexfile)
3560 f = self.opener(self._indexfile)
3556 f.seek(0, io.SEEK_END)
3561 f.seek(0, io.SEEK_END)
3557 actual = f.tell()
3562 actual = f.tell()
3558 f.close()
3563 f.close()
3559 s = self.index.entry_size
3564 s = self.index.entry_size
3560 i = max(0, actual // s)
3565 i = max(0, actual // s)
3561 di = actual - (i * s)
3566 di = actual - (i * s)
3562 if self._inline:
3567 if self._inline:
3563 databytes = 0
3568 databytes = 0
3564 for r in self:
3569 for r in self:
3565 databytes += max(0, self.length(r))
3570 databytes += max(0, self.length(r))
3566 dd = 0
3571 dd = 0
3567 di = actual - len(self) * s - databytes
3572 di = actual - len(self) * s - databytes
3568 except FileNotFoundError:
3573 except FileNotFoundError:
3569 di = 0
3574 di = 0
3570
3575
3571 return (dd, di)
3576 return (dd, di)
3572
3577
3573 def files(self):
3578 def files(self):
3574 """return list of files that compose this revlog"""
3579 """return list of files that compose this revlog"""
3575 res = [self._indexfile]
3580 res = [self._indexfile]
3576 if self._docket_file is None:
3581 if self._docket_file is None:
3577 if not self._inline:
3582 if not self._inline:
3578 res.append(self._datafile)
3583 res.append(self._datafile)
3579 else:
3584 else:
3580 res.append(self._docket_file)
3585 res.append(self._docket_file)
3581 res.extend(self._docket.old_index_filepaths(include_empty=False))
3586 res.extend(self._docket.old_index_filepaths(include_empty=False))
3582 if self._docket.data_end:
3587 if self._docket.data_end:
3583 res.append(self._datafile)
3588 res.append(self._datafile)
3584 res.extend(self._docket.old_data_filepaths(include_empty=False))
3589 res.extend(self._docket.old_data_filepaths(include_empty=False))
3585 if self._docket.sidedata_end:
3590 if self._docket.sidedata_end:
3586 res.append(self._sidedatafile)
3591 res.append(self._sidedatafile)
3587 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3592 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3588 return res
3593 return res
3589
3594
3590 def emitrevisions(
3595 def emitrevisions(
3591 self,
3596 self,
3592 nodes,
3597 nodes,
3593 nodesorder=None,
3598 nodesorder=None,
3594 revisiondata=False,
3599 revisiondata=False,
3595 assumehaveparentrevisions=False,
3600 assumehaveparentrevisions=False,
3596 deltamode=repository.CG_DELTAMODE_STD,
3601 deltamode=repository.CG_DELTAMODE_STD,
3597 sidedata_helpers=None,
3602 sidedata_helpers=None,
3598 debug_info=None,
3603 debug_info=None,
3599 ):
3604 ):
3600 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3605 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3601 raise error.ProgrammingError(
3606 raise error.ProgrammingError(
3602 b'unhandled value for nodesorder: %s' % nodesorder
3607 b'unhandled value for nodesorder: %s' % nodesorder
3603 )
3608 )
3604
3609
3605 if nodesorder is None and not self.delta_config.general_delta:
3610 if nodesorder is None and not self.delta_config.general_delta:
3606 nodesorder = b'storage'
3611 nodesorder = b'storage'
3607
3612
3608 if (
3613 if (
3609 not self._storedeltachains
3614 not self._storedeltachains
3610 and deltamode != repository.CG_DELTAMODE_PREV
3615 and deltamode != repository.CG_DELTAMODE_PREV
3611 ):
3616 ):
3612 deltamode = repository.CG_DELTAMODE_FULL
3617 deltamode = repository.CG_DELTAMODE_FULL
3613
3618
3614 return storageutil.emitrevisions(
3619 return storageutil.emitrevisions(
3615 self,
3620 self,
3616 nodes,
3621 nodes,
3617 nodesorder,
3622 nodesorder,
3618 revlogrevisiondelta,
3623 revlogrevisiondelta,
3619 deltaparentfn=self.deltaparent,
3624 deltaparentfn=self.deltaparent,
3620 candeltafn=self._candelta,
3625 candeltafn=self._candelta,
3621 rawsizefn=self.rawsize,
3626 rawsizefn=self.rawsize,
3622 revdifffn=self.revdiff,
3627 revdifffn=self.revdiff,
3623 flagsfn=self.flags,
3628 flagsfn=self.flags,
3624 deltamode=deltamode,
3629 deltamode=deltamode,
3625 revisiondata=revisiondata,
3630 revisiondata=revisiondata,
3626 assumehaveparentrevisions=assumehaveparentrevisions,
3631 assumehaveparentrevisions=assumehaveparentrevisions,
3627 sidedata_helpers=sidedata_helpers,
3632 sidedata_helpers=sidedata_helpers,
3628 debug_info=debug_info,
3633 debug_info=debug_info,
3629 )
3634 )
3630
3635
3631 DELTAREUSEALWAYS = b'always'
3636 DELTAREUSEALWAYS = b'always'
3632 DELTAREUSESAMEREVS = b'samerevs'
3637 DELTAREUSESAMEREVS = b'samerevs'
3633 DELTAREUSENEVER = b'never'
3638 DELTAREUSENEVER = b'never'
3634
3639
3635 DELTAREUSEFULLADD = b'fulladd'
3640 DELTAREUSEFULLADD = b'fulladd'
3636
3641
3637 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3642 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3638
3643
3639 def clone(
3644 def clone(
3640 self,
3645 self,
3641 tr,
3646 tr,
3642 destrevlog,
3647 destrevlog,
3643 addrevisioncb=None,
3648 addrevisioncb=None,
3644 deltareuse=DELTAREUSESAMEREVS,
3649 deltareuse=DELTAREUSESAMEREVS,
3645 forcedeltabothparents=None,
3650 forcedeltabothparents=None,
3646 sidedata_helpers=None,
3651 sidedata_helpers=None,
3647 ):
3652 ):
3648 """Copy this revlog to another, possibly with format changes.
3653 """Copy this revlog to another, possibly with format changes.
3649
3654
3650 The destination revlog will contain the same revisions and nodes.
3655 The destination revlog will contain the same revisions and nodes.
3651 However, it may not be bit-for-bit identical due to e.g. delta encoding
3656 However, it may not be bit-for-bit identical due to e.g. delta encoding
3652 differences.
3657 differences.
3653
3658
3654 The ``deltareuse`` argument control how deltas from the existing revlog
3659 The ``deltareuse`` argument control how deltas from the existing revlog
3655 are preserved in the destination revlog. The argument can have the
3660 are preserved in the destination revlog. The argument can have the
3656 following values:
3661 following values:
3657
3662
3658 DELTAREUSEALWAYS
3663 DELTAREUSEALWAYS
3659 Deltas will always be reused (if possible), even if the destination
3664 Deltas will always be reused (if possible), even if the destination
3660 revlog would not select the same revisions for the delta. This is the
3665 revlog would not select the same revisions for the delta. This is the
3661 fastest mode of operation.
3666 fastest mode of operation.
3662 DELTAREUSESAMEREVS
3667 DELTAREUSESAMEREVS
3663 Deltas will be reused if the destination revlog would pick the same
3668 Deltas will be reused if the destination revlog would pick the same
3664 revisions for the delta. This mode strikes a balance between speed
3669 revisions for the delta. This mode strikes a balance between speed
3665 and optimization.
3670 and optimization.
3666 DELTAREUSENEVER
3671 DELTAREUSENEVER
3667 Deltas will never be reused. This is the slowest mode of execution.
3672 Deltas will never be reused. This is the slowest mode of execution.
3668 This mode can be used to recompute deltas (e.g. if the diff/delta
3673 This mode can be used to recompute deltas (e.g. if the diff/delta
3669 algorithm changes).
3674 algorithm changes).
3670 DELTAREUSEFULLADD
3675 DELTAREUSEFULLADD
3671 Revision will be re-added as if their were new content. This is
3676 Revision will be re-added as if their were new content. This is
3672 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3677 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3673 eg: large file detection and handling.
3678 eg: large file detection and handling.
3674
3679
3675 Delta computation can be slow, so the choice of delta reuse policy can
3680 Delta computation can be slow, so the choice of delta reuse policy can
3676 significantly affect run time.
3681 significantly affect run time.
3677
3682
3678 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3683 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3679 two extremes. Deltas will be reused if they are appropriate. But if the
3684 two extremes. Deltas will be reused if they are appropriate. But if the
3680 delta could choose a better revision, it will do so. This means if you
3685 delta could choose a better revision, it will do so. This means if you
3681 are converting a non-generaldelta revlog to a generaldelta revlog,
3686 are converting a non-generaldelta revlog to a generaldelta revlog,
3682 deltas will be recomputed if the delta's parent isn't a parent of the
3687 deltas will be recomputed if the delta's parent isn't a parent of the
3683 revision.
3688 revision.
3684
3689
3685 In addition to the delta policy, the ``forcedeltabothparents``
3690 In addition to the delta policy, the ``forcedeltabothparents``
3686 argument controls whether to force compute deltas against both parents
3691 argument controls whether to force compute deltas against both parents
3687 for merges. By default, the current default is used.
3692 for merges. By default, the current default is used.
3688
3693
3689 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3694 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3690 `sidedata_helpers`.
3695 `sidedata_helpers`.
3691 """
3696 """
3692 if deltareuse not in self.DELTAREUSEALL:
3697 if deltareuse not in self.DELTAREUSEALL:
3693 raise ValueError(
3698 raise ValueError(
3694 _(b'value for deltareuse invalid: %s') % deltareuse
3699 _(b'value for deltareuse invalid: %s') % deltareuse
3695 )
3700 )
3696
3701
3697 if len(destrevlog):
3702 if len(destrevlog):
3698 raise ValueError(_(b'destination revlog is not empty'))
3703 raise ValueError(_(b'destination revlog is not empty'))
3699
3704
3700 if getattr(self, 'filteredrevs', None):
3705 if getattr(self, 'filteredrevs', None):
3701 raise ValueError(_(b'source revlog has filtered revisions'))
3706 raise ValueError(_(b'source revlog has filtered revisions'))
3702 if getattr(destrevlog, 'filteredrevs', None):
3707 if getattr(destrevlog, 'filteredrevs', None):
3703 raise ValueError(_(b'destination revlog has filtered revisions'))
3708 raise ValueError(_(b'destination revlog has filtered revisions'))
3704
3709
3705 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3710 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3706 # if possible.
3711 # if possible.
3707 old_delta_config = destrevlog.delta_config
3712 old_delta_config = destrevlog.delta_config
3708 destrevlog.delta_config = destrevlog.delta_config.copy()
3713 destrevlog.delta_config = destrevlog.delta_config.copy()
3709
3714
3710 try:
3715 try:
3711 if deltareuse == self.DELTAREUSEALWAYS:
3716 if deltareuse == self.DELTAREUSEALWAYS:
3712 destrevlog.delta_config.lazy_delta_base = True
3717 destrevlog.delta_config.lazy_delta_base = True
3713 destrevlog.delta_config.lazy_delta = True
3718 destrevlog.delta_config.lazy_delta = True
3714 elif deltareuse == self.DELTAREUSESAMEREVS:
3719 elif deltareuse == self.DELTAREUSESAMEREVS:
3715 destrevlog.delta_config.lazy_delta_base = False
3720 destrevlog.delta_config.lazy_delta_base = False
3716 destrevlog.delta_config.lazy_delta = True
3721 destrevlog.delta_config.lazy_delta = True
3717 elif deltareuse == self.DELTAREUSENEVER:
3722 elif deltareuse == self.DELTAREUSENEVER:
3718 destrevlog.delta_config.lazy_delta_base = False
3723 destrevlog.delta_config.lazy_delta_base = False
3719 destrevlog.delta_config.lazy_delta = False
3724 destrevlog.delta_config.lazy_delta = False
3720
3725
3721 delta_both_parents = (
3726 delta_both_parents = (
3722 forcedeltabothparents or old_delta_config.delta_both_parents
3727 forcedeltabothparents or old_delta_config.delta_both_parents
3723 )
3728 )
3724 destrevlog.delta_config.delta_both_parents = delta_both_parents
3729 destrevlog.delta_config.delta_both_parents = delta_both_parents
3725
3730
3726 with self.reading(), destrevlog._writing(tr):
3731 with self.reading(), destrevlog._writing(tr):
3727 self._clone(
3732 self._clone(
3728 tr,
3733 tr,
3729 destrevlog,
3734 destrevlog,
3730 addrevisioncb,
3735 addrevisioncb,
3731 deltareuse,
3736 deltareuse,
3732 forcedeltabothparents,
3737 forcedeltabothparents,
3733 sidedata_helpers,
3738 sidedata_helpers,
3734 )
3739 )
3735
3740
3736 finally:
3741 finally:
3737 destrevlog.delta_config = old_delta_config
3742 destrevlog.delta_config = old_delta_config
3738
3743
3739 def _clone(
3744 def _clone(
3740 self,
3745 self,
3741 tr,
3746 tr,
3742 destrevlog,
3747 destrevlog,
3743 addrevisioncb,
3748 addrevisioncb,
3744 deltareuse,
3749 deltareuse,
3745 forcedeltabothparents,
3750 forcedeltabothparents,
3746 sidedata_helpers,
3751 sidedata_helpers,
3747 ):
3752 ):
3748 """perform the core duty of `revlog.clone` after parameter processing"""
3753 """perform the core duty of `revlog.clone` after parameter processing"""
3749 write_debug = None
3754 write_debug = None
3750 if self.delta_config.debug_delta:
3755 if self.delta_config.debug_delta:
3751 write_debug = tr._report
3756 write_debug = tr._report
3752 deltacomputer = deltautil.deltacomputer(
3757 deltacomputer = deltautil.deltacomputer(
3753 destrevlog,
3758 destrevlog,
3754 write_debug=write_debug,
3759 write_debug=write_debug,
3755 )
3760 )
3756 index = self.index
3761 index = self.index
3757 for rev in self:
3762 for rev in self:
3758 entry = index[rev]
3763 entry = index[rev]
3759
3764
3760 # Some classes override linkrev to take filtered revs into
3765 # Some classes override linkrev to take filtered revs into
3761 # account. Use raw entry from index.
3766 # account. Use raw entry from index.
3762 flags = entry[0] & 0xFFFF
3767 flags = entry[0] & 0xFFFF
3763 linkrev = entry[4]
3768 linkrev = entry[4]
3764 p1 = index[entry[5]][7]
3769 p1 = index[entry[5]][7]
3765 p2 = index[entry[6]][7]
3770 p2 = index[entry[6]][7]
3766 node = entry[7]
3771 node = entry[7]
3767
3772
3768 # (Possibly) reuse the delta from the revlog if allowed and
3773 # (Possibly) reuse the delta from the revlog if allowed and
3769 # the revlog chunk is a delta.
3774 # the revlog chunk is a delta.
3770 cachedelta = None
3775 cachedelta = None
3771 rawtext = None
3776 rawtext = None
3772 if deltareuse == self.DELTAREUSEFULLADD:
3777 if deltareuse == self.DELTAREUSEFULLADD:
3773 text = self._revisiondata(rev)
3778 text = self._revisiondata(rev)
3774 sidedata = self.sidedata(rev)
3779 sidedata = self.sidedata(rev)
3775
3780
3776 if sidedata_helpers is not None:
3781 if sidedata_helpers is not None:
3777 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3782 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3778 self, sidedata_helpers, sidedata, rev
3783 self, sidedata_helpers, sidedata, rev
3779 )
3784 )
3780 flags = flags | new_flags[0] & ~new_flags[1]
3785 flags = flags | new_flags[0] & ~new_flags[1]
3781
3786
3782 destrevlog.addrevision(
3787 destrevlog.addrevision(
3783 text,
3788 text,
3784 tr,
3789 tr,
3785 linkrev,
3790 linkrev,
3786 p1,
3791 p1,
3787 p2,
3792 p2,
3788 cachedelta=cachedelta,
3793 cachedelta=cachedelta,
3789 node=node,
3794 node=node,
3790 flags=flags,
3795 flags=flags,
3791 deltacomputer=deltacomputer,
3796 deltacomputer=deltacomputer,
3792 sidedata=sidedata,
3797 sidedata=sidedata,
3793 )
3798 )
3794 else:
3799 else:
3795 if destrevlog.delta_config.lazy_delta:
3800 if destrevlog.delta_config.lazy_delta:
3796 dp = self.deltaparent(rev)
3801 dp = self.deltaparent(rev)
3797 if dp != nullrev:
3802 if dp != nullrev:
3798 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3803 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3799
3804
3800 sidedata = None
3805 sidedata = None
3801 if not cachedelta:
3806 if not cachedelta:
3802 try:
3807 try:
3803 rawtext = self._revisiondata(rev)
3808 rawtext = self._revisiondata(rev)
3804 except error.CensoredNodeError as censored:
3809 except error.CensoredNodeError as censored:
3805 assert flags & REVIDX_ISCENSORED
3810 assert flags & REVIDX_ISCENSORED
3806 rawtext = censored.tombstone
3811 rawtext = censored.tombstone
3807 sidedata = self.sidedata(rev)
3812 sidedata = self.sidedata(rev)
3808 if sidedata is None:
3813 if sidedata is None:
3809 sidedata = self.sidedata(rev)
3814 sidedata = self.sidedata(rev)
3810
3815
3811 if sidedata_helpers is not None:
3816 if sidedata_helpers is not None:
3812 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3817 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3813 self, sidedata_helpers, sidedata, rev
3818 self, sidedata_helpers, sidedata, rev
3814 )
3819 )
3815 flags = flags | new_flags[0] & ~new_flags[1]
3820 flags = flags | new_flags[0] & ~new_flags[1]
3816
3821
3817 destrevlog._addrevision(
3822 destrevlog._addrevision(
3818 node,
3823 node,
3819 rawtext,
3824 rawtext,
3820 tr,
3825 tr,
3821 linkrev,
3826 linkrev,
3822 p1,
3827 p1,
3823 p2,
3828 p2,
3824 flags,
3829 flags,
3825 cachedelta,
3830 cachedelta,
3826 deltacomputer=deltacomputer,
3831 deltacomputer=deltacomputer,
3827 sidedata=sidedata,
3832 sidedata=sidedata,
3828 )
3833 )
3829
3834
3830 if addrevisioncb:
3835 if addrevisioncb:
3831 addrevisioncb(self, rev, node)
3836 addrevisioncb(self, rev, node)
3832
3837
3833 def censorrevision(self, tr, censornode, tombstone=b''):
3838 def censorrevision(self, tr, censornode, tombstone=b''):
3834 if self._format_version == REVLOGV0:
3839 if self._format_version == REVLOGV0:
3835 raise error.RevlogError(
3840 raise error.RevlogError(
3836 _(b'cannot censor with version %d revlogs')
3841 _(b'cannot censor with version %d revlogs')
3837 % self._format_version
3842 % self._format_version
3838 )
3843 )
3839 elif self._format_version == REVLOGV1:
3844 elif self._format_version == REVLOGV1:
3840 rewrite.v1_censor(self, tr, censornode, tombstone)
3845 rewrite.v1_censor(self, tr, censornode, tombstone)
3841 else:
3846 else:
3842 rewrite.v2_censor(self, tr, censornode, tombstone)
3847 rewrite.v2_censor(self, tr, censornode, tombstone)
3843
3848
3844 def verifyintegrity(self, state):
3849 def verifyintegrity(self, state):
3845 """Verifies the integrity of the revlog.
3850 """Verifies the integrity of the revlog.
3846
3851
3847 Yields ``revlogproblem`` instances describing problems that are
3852 Yields ``revlogproblem`` instances describing problems that are
3848 found.
3853 found.
3849 """
3854 """
3850 dd, di = self.checksize()
3855 dd, di = self.checksize()
3851 if dd:
3856 if dd:
3852 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3857 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3853 if di:
3858 if di:
3854 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3859 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3855
3860
3856 version = self._format_version
3861 version = self._format_version
3857
3862
3858 # The verifier tells us what version revlog we should be.
3863 # The verifier tells us what version revlog we should be.
3859 if version != state[b'expectedversion']:
3864 if version != state[b'expectedversion']:
3860 yield revlogproblem(
3865 yield revlogproblem(
3861 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3866 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3862 % (self.display_id, version, state[b'expectedversion'])
3867 % (self.display_id, version, state[b'expectedversion'])
3863 )
3868 )
3864
3869
3865 state[b'skipread'] = set()
3870 state[b'skipread'] = set()
3866 state[b'safe_renamed'] = set()
3871 state[b'safe_renamed'] = set()
3867
3872
3868 for rev in self:
3873 for rev in self:
3869 node = self.node(rev)
3874 node = self.node(rev)
3870
3875
3871 # Verify contents. 4 cases to care about:
3876 # Verify contents. 4 cases to care about:
3872 #
3877 #
3873 # common: the most common case
3878 # common: the most common case
3874 # rename: with a rename
3879 # rename: with a rename
3875 # meta: file content starts with b'\1\n', the metadata
3880 # meta: file content starts with b'\1\n', the metadata
3876 # header defined in filelog.py, but without a rename
3881 # header defined in filelog.py, but without a rename
3877 # ext: content stored externally
3882 # ext: content stored externally
3878 #
3883 #
3879 # More formally, their differences are shown below:
3884 # More formally, their differences are shown below:
3880 #
3885 #
3881 # | common | rename | meta | ext
3886 # | common | rename | meta | ext
3882 # -------------------------------------------------------
3887 # -------------------------------------------------------
3883 # flags() | 0 | 0 | 0 | not 0
3888 # flags() | 0 | 0 | 0 | not 0
3884 # renamed() | False | True | False | ?
3889 # renamed() | False | True | False | ?
3885 # rawtext[0:2]=='\1\n'| False | True | True | ?
3890 # rawtext[0:2]=='\1\n'| False | True | True | ?
3886 #
3891 #
3887 # "rawtext" means the raw text stored in revlog data, which
3892 # "rawtext" means the raw text stored in revlog data, which
3888 # could be retrieved by "rawdata(rev)". "text"
3893 # could be retrieved by "rawdata(rev)". "text"
3889 # mentioned below is "revision(rev)".
3894 # mentioned below is "revision(rev)".
3890 #
3895 #
3891 # There are 3 different lengths stored physically:
3896 # There are 3 different lengths stored physically:
3892 # 1. L1: rawsize, stored in revlog index
3897 # 1. L1: rawsize, stored in revlog index
3893 # 2. L2: len(rawtext), stored in revlog data
3898 # 2. L2: len(rawtext), stored in revlog data
3894 # 3. L3: len(text), stored in revlog data if flags==0, or
3899 # 3. L3: len(text), stored in revlog data if flags==0, or
3895 # possibly somewhere else if flags!=0
3900 # possibly somewhere else if flags!=0
3896 #
3901 #
3897 # L1 should be equal to L2. L3 could be different from them.
3902 # L1 should be equal to L2. L3 could be different from them.
3898 # "text" may or may not affect commit hash depending on flag
3903 # "text" may or may not affect commit hash depending on flag
3899 # processors (see flagutil.addflagprocessor).
3904 # processors (see flagutil.addflagprocessor).
3900 #
3905 #
3901 # | common | rename | meta | ext
3906 # | common | rename | meta | ext
3902 # -------------------------------------------------
3907 # -------------------------------------------------
3903 # rawsize() | L1 | L1 | L1 | L1
3908 # rawsize() | L1 | L1 | L1 | L1
3904 # size() | L1 | L2-LM | L1(*) | L1 (?)
3909 # size() | L1 | L2-LM | L1(*) | L1 (?)
3905 # len(rawtext) | L2 | L2 | L2 | L2
3910 # len(rawtext) | L2 | L2 | L2 | L2
3906 # len(text) | L2 | L2 | L2 | L3
3911 # len(text) | L2 | L2 | L2 | L3
3907 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3912 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3908 #
3913 #
3909 # LM: length of metadata, depending on rawtext
3914 # LM: length of metadata, depending on rawtext
3910 # (*): not ideal, see comment in filelog.size
3915 # (*): not ideal, see comment in filelog.size
3911 # (?): could be "- len(meta)" if the resolved content has
3916 # (?): could be "- len(meta)" if the resolved content has
3912 # rename metadata
3917 # rename metadata
3913 #
3918 #
3914 # Checks needed to be done:
3919 # Checks needed to be done:
3915 # 1. length check: L1 == L2, in all cases.
3920 # 1. length check: L1 == L2, in all cases.
3916 # 2. hash check: depending on flag processor, we may need to
3921 # 2. hash check: depending on flag processor, we may need to
3917 # use either "text" (external), or "rawtext" (in revlog).
3922 # use either "text" (external), or "rawtext" (in revlog).
3918
3923
3919 try:
3924 try:
3920 skipflags = state.get(b'skipflags', 0)
3925 skipflags = state.get(b'skipflags', 0)
3921 if skipflags:
3926 if skipflags:
3922 skipflags &= self.flags(rev)
3927 skipflags &= self.flags(rev)
3923
3928
3924 _verify_revision(self, skipflags, state, node)
3929 _verify_revision(self, skipflags, state, node)
3925
3930
3926 l1 = self.rawsize(rev)
3931 l1 = self.rawsize(rev)
3927 l2 = len(self.rawdata(node))
3932 l2 = len(self.rawdata(node))
3928
3933
3929 if l1 != l2:
3934 if l1 != l2:
3930 yield revlogproblem(
3935 yield revlogproblem(
3931 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3936 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3932 node=node,
3937 node=node,
3933 )
3938 )
3934
3939
3935 except error.CensoredNodeError:
3940 except error.CensoredNodeError:
3936 if state[b'erroroncensored']:
3941 if state[b'erroroncensored']:
3937 yield revlogproblem(
3942 yield revlogproblem(
3938 error=_(b'censored file data'), node=node
3943 error=_(b'censored file data'), node=node
3939 )
3944 )
3940 state[b'skipread'].add(node)
3945 state[b'skipread'].add(node)
3941 except Exception as e:
3946 except Exception as e:
3942 yield revlogproblem(
3947 yield revlogproblem(
3943 error=_(b'unpacking %s: %s')
3948 error=_(b'unpacking %s: %s')
3944 % (short(node), stringutil.forcebytestr(e)),
3949 % (short(node), stringutil.forcebytestr(e)),
3945 node=node,
3950 node=node,
3946 )
3951 )
3947 state[b'skipread'].add(node)
3952 state[b'skipread'].add(node)
3948
3953
3949 def storageinfo(
3954 def storageinfo(
3950 self,
3955 self,
3951 exclusivefiles=False,
3956 exclusivefiles=False,
3952 sharedfiles=False,
3957 sharedfiles=False,
3953 revisionscount=False,
3958 revisionscount=False,
3954 trackedsize=False,
3959 trackedsize=False,
3955 storedsize=False,
3960 storedsize=False,
3956 ):
3961 ):
3957 d = {}
3962 d = {}
3958
3963
3959 if exclusivefiles:
3964 if exclusivefiles:
3960 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3965 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3961 if not self._inline:
3966 if not self._inline:
3962 d[b'exclusivefiles'].append((self.opener, self._datafile))
3967 d[b'exclusivefiles'].append((self.opener, self._datafile))
3963
3968
3964 if sharedfiles:
3969 if sharedfiles:
3965 d[b'sharedfiles'] = []
3970 d[b'sharedfiles'] = []
3966
3971
3967 if revisionscount:
3972 if revisionscount:
3968 d[b'revisionscount'] = len(self)
3973 d[b'revisionscount'] = len(self)
3969
3974
3970 if trackedsize:
3975 if trackedsize:
3971 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3976 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3972
3977
3973 if storedsize:
3978 if storedsize:
3974 d[b'storedsize'] = sum(
3979 d[b'storedsize'] = sum(
3975 self.opener.stat(path).st_size for path in self.files()
3980 self.opener.stat(path).st_size for path in self.files()
3976 )
3981 )
3977
3982
3978 return d
3983 return d
3979
3984
3980 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3985 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3981 if not self.feature_config.has_side_data:
3986 if not self.feature_config.has_side_data:
3982 return
3987 return
3983 # revlog formats with sidedata support does not support inline
3988 # revlog formats with sidedata support does not support inline
3984 assert not self._inline
3989 assert not self._inline
3985 if not helpers[1] and not helpers[2]:
3990 if not helpers[1] and not helpers[2]:
3986 # Nothing to generate or remove
3991 # Nothing to generate or remove
3987 return
3992 return
3988
3993
3989 new_entries = []
3994 new_entries = []
3990 # append the new sidedata
3995 # append the new sidedata
3991 with self._writing(transaction):
3996 with self._writing(transaction):
3992 ifh, dfh, sdfh = self._inner._writinghandles
3997 ifh, dfh, sdfh = self._inner._writinghandles
3993 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3998 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3994
3999
3995 current_offset = sdfh.tell()
4000 current_offset = sdfh.tell()
3996 for rev in range(startrev, endrev + 1):
4001 for rev in range(startrev, endrev + 1):
3997 entry = self.index[rev]
4002 entry = self.index[rev]
3998 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4003 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3999 store=self,
4004 store=self,
4000 sidedata_helpers=helpers,
4005 sidedata_helpers=helpers,
4001 sidedata={},
4006 sidedata={},
4002 rev=rev,
4007 rev=rev,
4003 )
4008 )
4004
4009
4005 serialized_sidedata = sidedatautil.serialize_sidedata(
4010 serialized_sidedata = sidedatautil.serialize_sidedata(
4006 new_sidedata
4011 new_sidedata
4007 )
4012 )
4008
4013
4009 sidedata_compression_mode = COMP_MODE_INLINE
4014 sidedata_compression_mode = COMP_MODE_INLINE
4010 if serialized_sidedata and self.feature_config.has_side_data:
4015 if serialized_sidedata and self.feature_config.has_side_data:
4011 sidedata_compression_mode = COMP_MODE_PLAIN
4016 sidedata_compression_mode = COMP_MODE_PLAIN
4012 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4017 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4013 if (
4018 if (
4014 h != b'u'
4019 h != b'u'
4015 and comp_sidedata[0] != b'\0'
4020 and comp_sidedata[0] != b'\0'
4016 and len(comp_sidedata) < len(serialized_sidedata)
4021 and len(comp_sidedata) < len(serialized_sidedata)
4017 ):
4022 ):
4018 assert not h
4023 assert not h
4019 if (
4024 if (
4020 comp_sidedata[0]
4025 comp_sidedata[0]
4021 == self._docket.default_compression_header
4026 == self._docket.default_compression_header
4022 ):
4027 ):
4023 sidedata_compression_mode = COMP_MODE_DEFAULT
4028 sidedata_compression_mode = COMP_MODE_DEFAULT
4024 serialized_sidedata = comp_sidedata
4029 serialized_sidedata = comp_sidedata
4025 else:
4030 else:
4026 sidedata_compression_mode = COMP_MODE_INLINE
4031 sidedata_compression_mode = COMP_MODE_INLINE
4027 serialized_sidedata = comp_sidedata
4032 serialized_sidedata = comp_sidedata
4028 if entry[8] != 0 or entry[9] != 0:
4033 if entry[8] != 0 or entry[9] != 0:
4029 # rewriting entries that already have sidedata is not
4034 # rewriting entries that already have sidedata is not
4030 # supported yet, because it introduces garbage data in the
4035 # supported yet, because it introduces garbage data in the
4031 # revlog.
4036 # revlog.
4032 msg = b"rewriting existing sidedata is not supported yet"
4037 msg = b"rewriting existing sidedata is not supported yet"
4033 raise error.Abort(msg)
4038 raise error.Abort(msg)
4034
4039
4035 # Apply (potential) flags to add and to remove after running
4040 # Apply (potential) flags to add and to remove after running
4036 # the sidedata helpers
4041 # the sidedata helpers
4037 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4042 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4038 entry_update = (
4043 entry_update = (
4039 current_offset,
4044 current_offset,
4040 len(serialized_sidedata),
4045 len(serialized_sidedata),
4041 new_offset_flags,
4046 new_offset_flags,
4042 sidedata_compression_mode,
4047 sidedata_compression_mode,
4043 )
4048 )
4044
4049
4045 # the sidedata computation might have move the file cursors around
4050 # the sidedata computation might have move the file cursors around
4046 sdfh.seek(current_offset, os.SEEK_SET)
4051 sdfh.seek(current_offset, os.SEEK_SET)
4047 sdfh.write(serialized_sidedata)
4052 sdfh.write(serialized_sidedata)
4048 new_entries.append(entry_update)
4053 new_entries.append(entry_update)
4049 current_offset += len(serialized_sidedata)
4054 current_offset += len(serialized_sidedata)
4050 self._docket.sidedata_end = sdfh.tell()
4055 self._docket.sidedata_end = sdfh.tell()
4051
4056
4052 # rewrite the new index entries
4057 # rewrite the new index entries
4053 ifh.seek(startrev * self.index.entry_size)
4058 ifh.seek(startrev * self.index.entry_size)
4054 for i, e in enumerate(new_entries):
4059 for i, e in enumerate(new_entries):
4055 rev = startrev + i
4060 rev = startrev + i
4056 self.index.replace_sidedata_info(rev, *e)
4061 self.index.replace_sidedata_info(rev, *e)
4057 packed = self.index.entry_binary(rev)
4062 packed = self.index.entry_binary(rev)
4058 if rev == 0 and self._docket is None:
4063 if rev == 0 and self._docket is None:
4059 header = self._format_flags | self._format_version
4064 header = self._format_flags | self._format_version
4060 header = self.index.pack_header(header)
4065 header = self.index.pack_header(header)
4061 packed = header + packed
4066 packed = header + packed
4062 ifh.write(packed)
4067 ifh.write(packed)
@@ -1,1157 +1,1189 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
9 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 PyRevision,
11 PyRevision,
12 };
12 };
13 use cpython::{
13 use cpython::{
14 buffer::{Element, PyBuffer},
14 buffer::{Element, PyBuffer},
15 exc::{IndexError, ValueError},
15 exc::{IndexError, ValueError},
16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
16 ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
17 PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
18 PythonObject, ToPyObject, UnsafePyLeaked,
18 PythonObject, ToPyObject, UnsafePyLeaked,
19 };
19 };
20 use hg::{
20 use hg::{
21 errors::HgError,
21 errors::HgError,
22 index::{
22 index::{
23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
23 IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
24 INDEX_ENTRY_SIZE,
24 INDEX_ENTRY_SIZE,
25 },
25 },
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
27 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
28 BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
29 };
29 };
30 use std::{cell::RefCell, collections::HashMap};
30 use std::{cell::RefCell, collections::HashMap};
31 use vcsgraph::graph::Graph as VCSGraph;
31 use vcsgraph::graph::Graph as VCSGraph;
32
32
33 pub struct PySharedIndex {
33 pub struct PySharedIndex {
34 /// The underlying hg-core index
34 /// The underlying hg-core index
35 pub(crate) inner: &'static hg::index::Index,
35 pub(crate) inner: &'static hg::index::Index,
36 }
36 }
37
37
38 /// Return a Struct implementing the Graph trait
38 /// Return a Struct implementing the Graph trait
39 pub(crate) fn py_rust_index_to_graph(
39 pub(crate) fn py_rust_index_to_graph(
40 py: Python,
40 py: Python,
41 index: PyObject,
41 index: PyObject,
42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
42 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
43 let midx = index.extract::<Index>(py)?;
43 let midx = index.extract::<Index>(py)?;
44 let leaked = midx.index(py).leak_immutable();
44 let leaked = midx.index(py).leak_immutable();
45 // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
45 // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
46 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
46 Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
47 }
47 }
48
48
49 impl Clone for PySharedIndex {
49 impl Clone for PySharedIndex {
50 fn clone(&self) -> Self {
50 fn clone(&self) -> Self {
51 Self { inner: self.inner }
51 Self { inner: self.inner }
52 }
52 }
53 }
53 }
54
54
55 impl Graph for PySharedIndex {
55 impl Graph for PySharedIndex {
56 #[inline(always)]
56 #[inline(always)]
57 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
57 fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> {
58 self.inner.parents(rev)
58 self.inner.parents(rev)
59 }
59 }
60 }
60 }
61
61
62 impl VCSGraph for PySharedIndex {
62 impl VCSGraph for PySharedIndex {
63 #[inline(always)]
63 #[inline(always)]
64 fn parents(
64 fn parents(
65 &self,
65 &self,
66 rev: BaseRevision,
66 rev: BaseRevision,
67 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
67 ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError>
68 {
68 {
69 // FIXME This trait should be reworked to decide between Revision
69 // FIXME This trait should be reworked to decide between Revision
70 // and UncheckedRevision, get better errors names, etc.
70 // and UncheckedRevision, get better errors names, etc.
71 match Graph::parents(self, Revision(rev)) {
71 match Graph::parents(self, Revision(rev)) {
72 Ok(parents) => {
72 Ok(parents) => {
73 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
73 Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0]))
74 }
74 }
75 Err(hg::GraphError::ParentOutOfRange(rev)) => {
75 Err(hg::GraphError::ParentOutOfRange(rev)) => {
76 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
76 Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0))
77 }
77 }
78 }
78 }
79 }
79 }
80 }
80 }
81
81
82 impl RevlogIndex for PySharedIndex {
82 impl RevlogIndex for PySharedIndex {
83 fn len(&self) -> usize {
83 fn len(&self) -> usize {
84 self.inner.len()
84 self.inner.len()
85 }
85 }
86 fn node(&self, rev: Revision) -> Option<&Node> {
86 fn node(&self, rev: Revision) -> Option<&Node> {
87 self.inner.node(rev)
87 self.inner.node(rev)
88 }
88 }
89 }
89 }
90
90
91 py_class!(pub class Index |py| {
91 py_class!(pub class Index |py| {
92 @shared data index: hg::index::Index;
92 @shared data index: hg::index::Index;
93 data nt: RefCell<Option<CoreNodeTree>>;
93 data nt: RefCell<Option<CoreNodeTree>>;
94 data docket: RefCell<Option<PyObject>>;
94 data docket: RefCell<Option<PyObject>>;
95 // Holds a reference to the mmap'ed persistent nodemap data
95 // Holds a reference to the mmap'ed persistent nodemap data
96 data nodemap_mmap: RefCell<Option<PyBuffer>>;
96 data nodemap_mmap: RefCell<Option<PyBuffer>>;
97 // Holds a reference to the mmap'ed persistent index data
97 // Holds a reference to the mmap'ed persistent index data
98 data index_mmap: RefCell<Option<PyBuffer>>;
98 data index_mmap: RefCell<Option<PyBuffer>>;
99 data head_revs_py_list: RefCell<Option<PyList>>;
99 data head_revs_py_list: RefCell<Option<PyList>>;
100
100
101 def __new__(
101 def __new__(
102 _cls,
102 _cls,
103 data: PyObject,
103 data: PyObject,
104 default_header: u32,
104 default_header: u32,
105 ) -> PyResult<Self> {
105 ) -> PyResult<Self> {
106 Self::new(py, data, default_header)
106 Self::new(py, data, default_header)
107 }
107 }
108
108
109 /// Compatibility layer used for Python consumers needing access to the C index
109 /// Compatibility layer used for Python consumers needing access to the C index
110 ///
110 ///
111 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
111 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
112 /// that may need to build a custom `nodetree`, based on a specified revset.
112 /// that may need to build a custom `nodetree`, based on a specified revset.
113 /// With a Rust implementation of the nodemap, we will be able to get rid of
113 /// With a Rust implementation of the nodemap, we will be able to get rid of
114 /// this, by exposing our own standalone nodemap class,
114 /// this, by exposing our own standalone nodemap class,
115 /// ready to accept `Index`.
115 /// ready to accept `Index`.
116 /* def get_cindex(&self) -> PyResult<PyObject> {
116 /* def get_cindex(&self) -> PyResult<PyObject> {
117 Ok(self.cindex(py).borrow().inner().clone_ref(py))
117 Ok(self.cindex(py).borrow().inner().clone_ref(py))
118 }
118 }
119 */
119 */
120 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
120 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
121
121
122 /// Return Revision if found, raises a bare `error.RevlogError`
122 /// Return Revision if found, raises a bare `error.RevlogError`
123 /// in case of ambiguity, same as C version does
123 /// in case of ambiguity, same as C version does
124 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
124 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
125 let opt = self.get_nodetree(py)?.borrow();
125 let opt = self.get_nodetree(py)?.borrow();
126 let nt = opt.as_ref().unwrap();
126 let nt = opt.as_ref().unwrap();
127 let ridx = &*self.index(py).borrow();
127 let ridx = &*self.index(py).borrow();
128 let node = node_from_py_bytes(py, &node)?;
128 let node = node_from_py_bytes(py, &node)?;
129 let rust_rev =
129 let rust_rev =
130 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
130 nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
131 Ok(rust_rev.map(Into::into))
131 Ok(rust_rev.map(Into::into))
132
132
133 }
133 }
134
134
135 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
135 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
136 /// is not found.
136 /// is not found.
137 ///
137 ///
138 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
138 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
139 /// will catch and rewrap with it
139 /// will catch and rewrap with it
140 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
140 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
141 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
141 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
142 }
142 }
143
143
144 /// return True if the node exist in the index
144 /// return True if the node exist in the index
145 def has_node(&self, node: PyBytes) -> PyResult<bool> {
145 def has_node(&self, node: PyBytes) -> PyResult<bool> {
146 // TODO OPTIM we could avoid a needless conversion here,
146 // TODO OPTIM we could avoid a needless conversion here,
147 // to do when scaffolding for pure Rust switch is removed,
147 // to do when scaffolding for pure Rust switch is removed,
148 // as `get_rev()` currently does the necessary assertions
148 // as `get_rev()` currently does the necessary assertions
149 self.get_rev(py, node).map(|opt| opt.is_some())
149 self.get_rev(py, node).map(|opt| opt.is_some())
150 }
150 }
151
151
152 /// find length of shortest hex nodeid of a binary ID
152 /// find length of shortest hex nodeid of a binary ID
153 def shortest(&self, node: PyBytes) -> PyResult<usize> {
153 def shortest(&self, node: PyBytes) -> PyResult<usize> {
154 let opt = self.get_nodetree(py)?.borrow();
154 let opt = self.get_nodetree(py)?.borrow();
155 let nt = opt.as_ref().unwrap();
155 let nt = opt.as_ref().unwrap();
156 let idx = &*self.index(py).borrow();
156 let idx = &*self.index(py).borrow();
157 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
157 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
158 {
158 {
159 Ok(Some(l)) => Ok(l),
159 Ok(Some(l)) => Ok(l),
160 Ok(None) => Err(revlog_error(py)),
160 Ok(None) => Err(revlog_error(py)),
161 Err(e) => Err(nodemap_error(py, e)),
161 Err(e) => Err(nodemap_error(py, e)),
162 }
162 }
163 }
163 }
164
164
165 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
165 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
166 let opt = self.get_nodetree(py)?.borrow();
166 let opt = self.get_nodetree(py)?.borrow();
167 let nt = opt.as_ref().unwrap();
167 let nt = opt.as_ref().unwrap();
168 let idx = &*self.index(py).borrow();
168 let idx = &*self.index(py).borrow();
169
169
170 let node_as_string = if cfg!(feature = "python3-sys") {
170 let node_as_string = if cfg!(feature = "python3-sys") {
171 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
171 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
172 }
172 }
173 else {
173 else {
174 let node = node.extract::<PyBytes>(py)?;
174 let node = node.extract::<PyBytes>(py)?;
175 String::from_utf8_lossy(node.data(py)).to_string()
175 String::from_utf8_lossy(node.data(py)).to_string()
176 };
176 };
177
177
178 let prefix = NodePrefix::from_hex(&node_as_string)
178 let prefix = NodePrefix::from_hex(&node_as_string)
179 .map_err(|_| PyErr::new::<ValueError, _>(
179 .map_err(|_| PyErr::new::<ValueError, _>(
180 py, format!("Invalid node or prefix '{}'", node_as_string))
180 py, format!("Invalid node or prefix '{}'", node_as_string))
181 )?;
181 )?;
182
182
183 nt.find_bin(idx, prefix)
183 nt.find_bin(idx, prefix)
184 // TODO make an inner API returning the node directly
184 // TODO make an inner API returning the node directly
185 .map(|opt| opt.map(
185 .map(|opt| opt.map(
186 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
186 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
187 .map_err(|e| nodemap_error(py, e))
187 .map_err(|e| nodemap_error(py, e))
188
188
189 }
189 }
190
190
191 /// append an index entry
191 /// append an index entry
192 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
192 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
193 if tup.len(py) < 8 {
193 if tup.len(py) < 8 {
194 // this is better than the panic promised by tup.get_item()
194 // this is better than the panic promised by tup.get_item()
195 return Err(
195 return Err(
196 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
196 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
197 }
197 }
198 let node_bytes = tup.get_item(py, 7).extract(py)?;
198 let node_bytes = tup.get_item(py, 7).extract(py)?;
199 let node = node_from_py_object(py, &node_bytes)?;
199 let node = node_from_py_object(py, &node_bytes)?;
200
200
201 let rev = self.len(py)? as BaseRevision;
201 let rev = self.len(py)? as BaseRevision;
202
202
203 // This is ok since we will just add the revision to the index
203 // This is ok since we will just add the revision to the index
204 let rev = Revision(rev);
204 let rev = Revision(rev);
205 self.index(py)
205 self.index(py)
206 .borrow_mut()
206 .borrow_mut()
207 .append(py_tuple_to_revision_data_params(py, tup)?)
207 .append(py_tuple_to_revision_data_params(py, tup)?)
208 .unwrap();
208 .unwrap();
209 let idx = &*self.index(py).borrow();
209 let idx = &*self.index(py).borrow();
210 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
210 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
211 .insert(idx, &node, rev)
211 .insert(idx, &node, rev)
212 .map_err(|e| nodemap_error(py, e))?;
212 .map_err(|e| nodemap_error(py, e))?;
213 Ok(py.None())
213 Ok(py.None())
214 }
214 }
215
215
216 def __delitem__(&self, key: PyObject) -> PyResult<()> {
216 def __delitem__(&self, key: PyObject) -> PyResult<()> {
217 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
217 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
218 let start = if let Ok(rev) = key.extract(py) {
218 let start = if let Ok(rev) = key.extract(py) {
219 UncheckedRevision(rev)
219 UncheckedRevision(rev)
220 } else {
220 } else {
221 let start = key.getattr(py, "start")?;
221 let start = key.getattr(py, "start")?;
222 UncheckedRevision(start.extract(py)?)
222 UncheckedRevision(start.extract(py)?)
223 };
223 };
224 let start = self.index(py)
224 let start = self.index(py)
225 .borrow()
225 .borrow()
226 .check_revision(start)
226 .check_revision(start)
227 .ok_or_else(|| {
227 .ok_or_else(|| {
228 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
228 nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
229 })?;
229 })?;
230 self.index(py).borrow_mut().remove(start).unwrap();
230 self.index(py).borrow_mut().remove(start).unwrap();
231 let mut opt = self.get_nodetree(py)?.borrow_mut();
231 let mut opt = self.get_nodetree(py)?.borrow_mut();
232 let nt = opt.as_mut().unwrap();
232 let nt = opt.as_mut().unwrap();
233 nt.invalidate_all();
233 nt.invalidate_all();
234 self.fill_nodemap(py, nt)?;
234 self.fill_nodemap(py, nt)?;
235 Ok(())
235 Ok(())
236 }
236 }
237
237
238 //
238 //
239 // Index methods previously reforwarded to C index (tp_methods)
239 // Index methods previously reforwarded to C index (tp_methods)
240 // Same ordering as in revlog.c
240 // Same ordering as in revlog.c
241 //
241 //
242
242
243 /// return the gca set of the given revs
243 /// return the gca set of the given revs
244 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
244 def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
245 let rust_res = self.inner_ancestors(py, args)?;
245 let rust_res = self.inner_ancestors(py, args)?;
246 Ok(rust_res)
246 Ok(rust_res)
247 }
247 }
248
248
249 /// return the heads of the common ancestors of the given revs
249 /// return the heads of the common ancestors of the given revs
250 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
250 def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
251 let rust_res = self.inner_commonancestorsheads(py, args)?;
251 let rust_res = self.inner_commonancestorsheads(py, args)?;
252 Ok(rust_res)
252 Ok(rust_res)
253 }
253 }
254
254
255 /// Clear the index caches and inner py_class data.
255 /// Clear the index caches and inner py_class data.
256 /// It is Python's responsibility to call `update_nodemap_data` again.
256 /// It is Python's responsibility to call `update_nodemap_data` again.
257 def clearcaches(&self) -> PyResult<PyObject> {
257 def clearcaches(&self) -> PyResult<PyObject> {
258 self.nt(py).borrow_mut().take();
258 self.nt(py).borrow_mut().take();
259 self.docket(py).borrow_mut().take();
259 self.docket(py).borrow_mut().take();
260 self.nodemap_mmap(py).borrow_mut().take();
260 self.nodemap_mmap(py).borrow_mut().take();
261 self.head_revs_py_list(py).borrow_mut().take();
261 self.head_revs_py_list(py).borrow_mut().take();
262 self.index(py).borrow().clear_caches();
262 self.index(py).borrow().clear_caches();
263 Ok(py.None())
263 Ok(py.None())
264 }
264 }
265
265
266 /// return the raw binary string representing a revision
266 /// return the raw binary string representing a revision
267 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
267 def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
268 let rindex = self.index(py).borrow();
268 let rindex = self.index(py).borrow();
269 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
269 let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
270 let rust_bytes = rindex.check_revision(rev).and_then(
270 let rust_bytes = rindex.check_revision(rev).and_then(
271 |r| rindex.entry_binary(r))
271 |r| rindex.entry_binary(r))
272 .ok_or_else(|| rev_not_in_index(py, rev))?;
272 .ok_or_else(|| rev_not_in_index(py, rev))?;
273 let rust_res = PyBytes::new(py, rust_bytes).into_object();
273 let rust_res = PyBytes::new(py, rust_bytes).into_object();
274 Ok(rust_res)
274 Ok(rust_res)
275 }
275 }
276
276
277 /// return a binary packed version of the header
277 /// return a binary packed version of the header
278 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
278 def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
279 let rindex = self.index(py).borrow();
279 let rindex = self.index(py).borrow();
280 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
280 let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
281 let rust_res = PyBytes::new(py, &packed).into_object();
281 let rust_res = PyBytes::new(py, &packed).into_object();
282 Ok(rust_res)
282 Ok(rust_res)
283 }
283 }
284
284
285 /// compute phases
285 /// compute phases
286 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
286 def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
287 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
287 let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
288 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
288 let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
289 Ok(rust_res)
289 Ok(rust_res)
290 }
290 }
291
291
292 /// reachableroots
292 /// reachableroots
293 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
293 def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
294 let rust_res = self.inner_reachableroots2(
294 let rust_res = self.inner_reachableroots2(
295 py,
295 py,
296 UncheckedRevision(args.get_item(py, 0).extract(py)?),
296 UncheckedRevision(args.get_item(py, 0).extract(py)?),
297 args.get_item(py, 1),
297 args.get_item(py, 1),
298 args.get_item(py, 2),
298 args.get_item(py, 2),
299 args.get_item(py, 3).extract(py)?,
299 args.get_item(py, 3).extract(py)?,
300 )?;
300 )?;
301 Ok(rust_res)
301 Ok(rust_res)
302 }
302 }
303
303
304 /// get head revisions
304 /// get head revisions
305 def headrevs(&self) -> PyResult<PyObject> {
305 def headrevs(&self) -> PyResult<PyObject> {
306 let rust_res = self.inner_headrevs(py)?;
306 let rust_res = self.inner_headrevs(py)?;
307 Ok(rust_res)
307 Ok(rust_res)
308 }
308 }
309
309
310 /// get head nodeids
311 def head_node_ids(&self) -> PyResult<PyObject> {
312 let rust_res = self.inner_head_node_ids(py)?;
313 Ok(rust_res)
314 }
315
310 /// get filtered head revisions
316 /// get filtered head revisions
311 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
317 def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
312 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
318 let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
313 Ok(rust_res)
319 Ok(rust_res)
314 }
320 }
315
321
316 /// True if the object is a snapshot
322 /// True if the object is a snapshot
317 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
323 def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
318 let index = self.index(py).borrow();
324 let index = self.index(py).borrow();
319 let result = index
325 let result = index
320 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
326 .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
321 .map_err(|e| {
327 .map_err(|e| {
322 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
328 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
323 })?;
329 })?;
324 Ok(result)
330 Ok(result)
325 }
331 }
326
332
327 /// Gather snapshot data in a cache dict
333 /// Gather snapshot data in a cache dict
328 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
334 def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
329 let index = self.index(py).borrow();
335 let index = self.index(py).borrow();
330 let cache: PyDict = args.get_item(py, 0).extract(py)?;
336 let cache: PyDict = args.get_item(py, 0).extract(py)?;
331 // this methods operates by setting new values in the cache,
337 // this methods operates by setting new values in the cache,
332 // hence we will compare results by letting the C implementation
338 // hence we will compare results by letting the C implementation
333 // operate over a deepcopy of the cache, and finally compare both
339 // operate over a deepcopy of the cache, and finally compare both
334 // caches.
340 // caches.
335 let c_cache = PyDict::new(py);
341 let c_cache = PyDict::new(py);
336 for (k, v) in cache.items(py) {
342 for (k, v) in cache.items(py) {
337 c_cache.set_item(py, k, PySet::new(py, v)?)?;
343 c_cache.set_item(py, k, PySet::new(py, v)?)?;
338 }
344 }
339
345
340 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
346 let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
341 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
347 let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
342 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
348 let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
343 index.find_snapshots(
349 index.find_snapshots(
344 start_rev,
350 start_rev,
345 end_rev,
351 end_rev,
346 &mut cache_wrapper,
352 &mut cache_wrapper,
347 ).map_err(|_| revlog_error(py))?;
353 ).map_err(|_| revlog_error(py))?;
348 Ok(py.None())
354 Ok(py.None())
349 }
355 }
350
356
351 /// determine revisions with deltas to reconstruct fulltext
357 /// determine revisions with deltas to reconstruct fulltext
352 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
358 def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
353 let index = self.index(py).borrow();
359 let index = self.index(py).borrow();
354 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
360 let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
355 let stop_rev =
361 let stop_rev =
356 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
362 args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
357 let rev = index.check_revision(rev).ok_or_else(|| {
363 let rev = index.check_revision(rev).ok_or_else(|| {
358 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
364 nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
359 })?;
365 })?;
360 let stop_rev = if let Some(stop_rev) = stop_rev {
366 let stop_rev = if let Some(stop_rev) = stop_rev {
361 let stop_rev = UncheckedRevision(stop_rev);
367 let stop_rev = UncheckedRevision(stop_rev);
362 Some(index.check_revision(stop_rev).ok_or_else(|| {
368 Some(index.check_revision(stop_rev).ok_or_else(|| {
363 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
369 nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
364 })?)
370 })?)
365 } else {None};
371 } else {None};
366 let using_general_delta = args.get_item(py, 2)
372 let using_general_delta = args.get_item(py, 2)
367 .extract::<Option<u32>>(py)?
373 .extract::<Option<u32>>(py)?
368 .map(|i| i != 0);
374 .map(|i| i != 0);
369 let (chain, stopped) = index.delta_chain(
375 let (chain, stopped) = index.delta_chain(
370 rev, stop_rev, using_general_delta
376 rev, stop_rev, using_general_delta
371 ).map_err(|e| {
377 ).map_err(|e| {
372 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
378 PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
373 })?;
379 })?;
374
380
375 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
381 let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
376 Ok(
382 Ok(
377 PyTuple::new(
383 PyTuple::new(
378 py,
384 py,
379 &[
385 &[
380 chain.into_py_object(py).into_object(),
386 chain.into_py_object(py).into_object(),
381 stopped.into_py_object(py).into_object()
387 stopped.into_py_object(py).into_object()
382 ]
388 ]
383 ).into_object()
389 ).into_object()
384 )
390 )
385
391
386 }
392 }
387
393
388 /// slice planned chunk read to reach a density threshold
394 /// slice planned chunk read to reach a density threshold
389 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
395 def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
390 let rust_res = self.inner_slicechunktodensity(
396 let rust_res = self.inner_slicechunktodensity(
391 py,
397 py,
392 args.get_item(py, 0),
398 args.get_item(py, 0),
393 args.get_item(py, 1).extract(py)?,
399 args.get_item(py, 1).extract(py)?,
394 args.get_item(py, 2).extract(py)?
400 args.get_item(py, 2).extract(py)?
395 )?;
401 )?;
396 Ok(rust_res)
402 Ok(rust_res)
397 }
403 }
398
404
399 // index_sequence_methods and index_mapping_methods.
405 // index_sequence_methods and index_mapping_methods.
400 //
406 //
401 // Since we call back through the high level Python API,
407 // Since we call back through the high level Python API,
402 // there's no point making a distinction between index_get
408 // there's no point making a distinction between index_get
403 // and index_getitem.
409 // and index_getitem.
404 // gracinet 2023: this above is no longer true for the pure Rust impl
410 // gracinet 2023: this above is no longer true for the pure Rust impl
405
411
406 def __len__(&self) -> PyResult<usize> {
412 def __len__(&self) -> PyResult<usize> {
407 self.len(py)
413 self.len(py)
408 }
414 }
409
415
410 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
416 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
411 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
417 let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
412 Ok(rust_res)
418 Ok(rust_res)
413 }
419 }
414
420
415 def __contains__(&self, item: PyObject) -> PyResult<bool> {
421 def __contains__(&self, item: PyObject) -> PyResult<bool> {
416 // ObjectProtocol does not seem to provide contains(), so
422 // ObjectProtocol does not seem to provide contains(), so
417 // this is an equivalent implementation of the index_contains()
423 // this is an equivalent implementation of the index_contains()
418 // defined in revlog.c
424 // defined in revlog.c
419 match item.extract::<i32>(py) {
425 match item.extract::<i32>(py) {
420 Ok(rev) => {
426 Ok(rev) => {
421 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
427 Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
422 }
428 }
423 Err(_) => {
429 Err(_) => {
424 let item_bytes: PyBytes = item.extract(py)?;
430 let item_bytes: PyBytes = item.extract(py)?;
425 let rust_res = self.has_node(py, item_bytes)?;
431 let rust_res = self.has_node(py, item_bytes)?;
426 Ok(rust_res)
432 Ok(rust_res)
427 }
433 }
428 }
434 }
429 }
435 }
430
436
431 def nodemap_data_all(&self) -> PyResult<PyBytes> {
437 def nodemap_data_all(&self) -> PyResult<PyBytes> {
432 self.inner_nodemap_data_all(py)
438 self.inner_nodemap_data_all(py)
433 }
439 }
434
440
435 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
441 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
436 self.inner_nodemap_data_incremental(py)
442 self.inner_nodemap_data_incremental(py)
437 }
443 }
438 def update_nodemap_data(
444 def update_nodemap_data(
439 &self,
445 &self,
440 docket: PyObject,
446 docket: PyObject,
441 nm_data: PyObject
447 nm_data: PyObject
442 ) -> PyResult<PyObject> {
448 ) -> PyResult<PyObject> {
443 self.inner_update_nodemap_data(py, docket, nm_data)
449 self.inner_update_nodemap_data(py, docket, nm_data)
444 }
450 }
445
451
446 @property
452 @property
447 def entry_size(&self) -> PyResult<PyInt> {
453 def entry_size(&self) -> PyResult<PyInt> {
448 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
454 let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
449 Ok(rust_res)
455 Ok(rust_res)
450 }
456 }
451
457
452 @property
458 @property
453 def rust_ext_compat(&self) -> PyResult<PyInt> {
459 def rust_ext_compat(&self) -> PyResult<PyInt> {
454 // will be entirely removed when the Rust index yet useful to
460 // will be entirely removed when the Rust index yet useful to
455 // implement in Rust to detangle things when removing `self.cindex`
461 // implement in Rust to detangle things when removing `self.cindex`
456 let rust_res: PyInt = 1.to_py_object(py);
462 let rust_res: PyInt = 1.to_py_object(py);
457 Ok(rust_res)
463 Ok(rust_res)
458 }
464 }
459
465
460 @property
466 @property
461 def is_rust(&self) -> PyResult<PyBool> {
467 def is_rust(&self) -> PyResult<PyBool> {
462 Ok(false.to_py_object(py))
468 Ok(false.to_py_object(py))
463 }
469 }
464
470
465 });
471 });
466
472
467 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
473 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
468 /// buffer along with the Rust slice into said buffer. We need to keep the
474 /// buffer along with the Rust slice into said buffer. We need to keep the
469 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
475 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
470 /// is freed from Python's side.
476 /// is freed from Python's side.
471 ///
477 ///
472 /// # Safety
478 /// # Safety
473 ///
479 ///
474 /// The caller must make sure that the buffer is kept around for at least as
480 /// The caller must make sure that the buffer is kept around for at least as
475 /// long as the slice.
481 /// long as the slice.
476 #[deny(unsafe_op_in_unsafe_fn)]
482 #[deny(unsafe_op_in_unsafe_fn)]
477 unsafe fn mmap_keeparound(
483 unsafe fn mmap_keeparound(
478 py: Python,
484 py: Python,
479 data: PyObject,
485 data: PyObject,
480 ) -> PyResult<(
486 ) -> PyResult<(
481 PyBuffer,
487 PyBuffer,
482 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
488 Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>,
483 )> {
489 )> {
484 let buf = PyBuffer::get(py, &data)?;
490 let buf = PyBuffer::get(py, &data)?;
485 let len = buf.item_count();
491 let len = buf.item_count();
486
492
487 // Build a slice from the mmap'ed buffer data
493 // Build a slice from the mmap'ed buffer data
488 let cbuf = buf.buf_ptr();
494 let cbuf = buf.buf_ptr();
489 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
495 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
490 && buf.is_c_contiguous()
496 && buf.is_c_contiguous()
491 && u8::is_compatible_format(buf.format())
497 && u8::is_compatible_format(buf.format())
492 {
498 {
493 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
499 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
494 } else {
500 } else {
495 return Err(PyErr::new::<ValueError, _>(
501 return Err(PyErr::new::<ValueError, _>(
496 py,
502 py,
497 "Nodemap data buffer has an invalid memory representation"
503 "Nodemap data buffer has an invalid memory representation"
498 .to_string(),
504 .to_string(),
499 ));
505 ));
500 };
506 };
501
507
502 Ok((buf, Box::new(bytes)))
508 Ok((buf, Box::new(bytes)))
503 }
509 }
504
510
505 fn py_tuple_to_revision_data_params(
511 fn py_tuple_to_revision_data_params(
506 py: Python,
512 py: Python,
507 tuple: PyTuple,
513 tuple: PyTuple,
508 ) -> PyResult<RevisionDataParams> {
514 ) -> PyResult<RevisionDataParams> {
509 if tuple.len(py) < 8 {
515 if tuple.len(py) < 8 {
510 // this is better than the panic promised by tup.get_item()
516 // this is better than the panic promised by tup.get_item()
511 return Err(PyErr::new::<IndexError, _>(
517 return Err(PyErr::new::<IndexError, _>(
512 py,
518 py,
513 "tuple index out of range",
519 "tuple index out of range",
514 ));
520 ));
515 }
521 }
516 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
522 let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?;
517 let node_id = tuple
523 let node_id = tuple
518 .get_item(py, 7)
524 .get_item(py, 7)
519 .extract::<PyBytes>(py)?
525 .extract::<PyBytes>(py)?
520 .data(py)
526 .data(py)
521 .try_into()
527 .try_into()
522 .unwrap();
528 .unwrap();
523 let flags = (offset_or_flags & 0xFFFF) as u16;
529 let flags = (offset_or_flags & 0xFFFF) as u16;
524 let data_offset = offset_or_flags >> 16;
530 let data_offset = offset_or_flags >> 16;
525 Ok(RevisionDataParams {
531 Ok(RevisionDataParams {
526 flags,
532 flags,
527 data_offset,
533 data_offset,
528 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
534 data_compressed_length: tuple.get_item(py, 1).extract(py)?,
529 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
535 data_uncompressed_length: tuple.get_item(py, 2).extract(py)?,
530 data_delta_base: tuple.get_item(py, 3).extract(py)?,
536 data_delta_base: tuple.get_item(py, 3).extract(py)?,
531 link_rev: tuple.get_item(py, 4).extract(py)?,
537 link_rev: tuple.get_item(py, 4).extract(py)?,
532 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
538 parent_rev_1: tuple.get_item(py, 5).extract(py)?,
533 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
539 parent_rev_2: tuple.get_item(py, 6).extract(py)?,
534 node_id,
540 node_id,
535 ..Default::default()
541 ..Default::default()
536 })
542 })
537 }
543 }
538 fn revision_data_params_to_py_tuple(
544 fn revision_data_params_to_py_tuple(
539 py: Python,
545 py: Python,
540 params: RevisionDataParams,
546 params: RevisionDataParams,
541 ) -> PyTuple {
547 ) -> PyTuple {
542 PyTuple::new(
548 PyTuple::new(
543 py,
549 py,
544 &[
550 &[
545 params.data_offset.into_py_object(py).into_object(),
551 params.data_offset.into_py_object(py).into_object(),
546 params
552 params
547 .data_compressed_length
553 .data_compressed_length
548 .into_py_object(py)
554 .into_py_object(py)
549 .into_object(),
555 .into_object(),
550 params
556 params
551 .data_uncompressed_length
557 .data_uncompressed_length
552 .into_py_object(py)
558 .into_py_object(py)
553 .into_object(),
559 .into_object(),
554 params.data_delta_base.into_py_object(py).into_object(),
560 params.data_delta_base.into_py_object(py).into_object(),
555 params.link_rev.into_py_object(py).into_object(),
561 params.link_rev.into_py_object(py).into_object(),
556 params.parent_rev_1.into_py_object(py).into_object(),
562 params.parent_rev_1.into_py_object(py).into_object(),
557 params.parent_rev_2.into_py_object(py).into_object(),
563 params.parent_rev_2.into_py_object(py).into_object(),
558 PyBytes::new(py, &params.node_id)
564 PyBytes::new(py, &params.node_id)
559 .into_py_object(py)
565 .into_py_object(py)
560 .into_object(),
566 .into_object(),
561 params._sidedata_offset.into_py_object(py).into_object(),
567 params._sidedata_offset.into_py_object(py).into_object(),
562 params
568 params
563 ._sidedata_compressed_length
569 ._sidedata_compressed_length
564 .into_py_object(py)
570 .into_py_object(py)
565 .into_object(),
571 .into_object(),
566 params
572 params
567 .data_compression_mode
573 .data_compression_mode
568 .into_py_object(py)
574 .into_py_object(py)
569 .into_object(),
575 .into_object(),
570 params
576 params
571 ._sidedata_compression_mode
577 ._sidedata_compression_mode
572 .into_py_object(py)
578 .into_py_object(py)
573 .into_object(),
579 .into_object(),
574 params._rank.into_py_object(py).into_object(),
580 params._rank.into_py_object(py).into_object(),
575 ],
581 ],
576 )
582 )
577 }
583 }
578
584
579 struct PySnapshotsCache<'p> {
585 struct PySnapshotsCache<'p> {
580 py: Python<'p>,
586 py: Python<'p>,
581 dict: PyDict,
587 dict: PyDict,
582 }
588 }
583
589
584 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
590 impl<'p> SnapshotsCache for PySnapshotsCache<'p> {
585 fn insert_for(
591 fn insert_for(
586 &mut self,
592 &mut self,
587 rev: BaseRevision,
593 rev: BaseRevision,
588 value: BaseRevision,
594 value: BaseRevision,
589 ) -> Result<(), RevlogError> {
595 ) -> Result<(), RevlogError> {
590 let pyvalue = value.into_py_object(self.py).into_object();
596 let pyvalue = value.into_py_object(self.py).into_object();
591 match self.dict.get_item(self.py, rev) {
597 match self.dict.get_item(self.py, rev) {
592 Some(obj) => obj
598 Some(obj) => obj
593 .extract::<PySet>(self.py)
599 .extract::<PySet>(self.py)
594 .and_then(|set| set.add(self.py, pyvalue)),
600 .and_then(|set| set.add(self.py, pyvalue)),
595 None => PySet::new(self.py, vec![pyvalue])
601 None => PySet::new(self.py, vec![pyvalue])
596 .and_then(|set| self.dict.set_item(self.py, rev, set)),
602 .and_then(|set| self.dict.set_item(self.py, rev, set)),
597 }
603 }
598 .map_err(|_| {
604 .map_err(|_| {
599 RevlogError::Other(HgError::unsupported(
605 RevlogError::Other(HgError::unsupported(
600 "Error in Python caches handling",
606 "Error in Python caches handling",
601 ))
607 ))
602 })
608 })
603 }
609 }
604 }
610 }
605
611
606 impl Index {
612 impl Index {
607 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
613 fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
608 // Safety: we keep the buffer around inside the class as `index_mmap`
614 // Safety: we keep the buffer around inside the class as `index_mmap`
609 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
615 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
610
616
611 Self::create_instance(
617 Self::create_instance(
612 py,
618 py,
613 hg::index::Index::new(
619 hg::index::Index::new(
614 bytes,
620 bytes,
615 IndexHeader::parse(&header.to_be_bytes())
621 IndexHeader::parse(&header.to_be_bytes())
616 .expect("default header is broken")
622 .expect("default header is broken")
617 .unwrap(),
623 .unwrap(),
618 )
624 )
619 .map_err(|e| {
625 .map_err(|e| {
620 revlog_error_with_msg(py, e.to_string().as_bytes())
626 revlog_error_with_msg(py, e.to_string().as_bytes())
621 })?,
627 })?,
622 RefCell::new(None),
628 RefCell::new(None),
623 RefCell::new(None),
629 RefCell::new(None),
624 RefCell::new(None),
630 RefCell::new(None),
625 RefCell::new(Some(buf)),
631 RefCell::new(Some(buf)),
626 RefCell::new(None),
632 RefCell::new(None),
627 )
633 )
628 }
634 }
629
635
630 fn len(&self, py: Python) -> PyResult<usize> {
636 fn len(&self, py: Python) -> PyResult<usize> {
631 let rust_index_len = self.index(py).borrow().len();
637 let rust_index_len = self.index(py).borrow().len();
632 Ok(rust_index_len)
638 Ok(rust_index_len)
633 }
639 }
634
640
635 /// This is scaffolding at this point, but it could also become
641 /// This is scaffolding at this point, but it could also become
636 /// a way to start a persistent nodemap or perform a
642 /// a way to start a persistent nodemap or perform a
637 /// vacuum / repack operation
643 /// vacuum / repack operation
638 fn fill_nodemap(
644 fn fill_nodemap(
639 &self,
645 &self,
640 py: Python,
646 py: Python,
641 nt: &mut CoreNodeTree,
647 nt: &mut CoreNodeTree,
642 ) -> PyResult<PyObject> {
648 ) -> PyResult<PyObject> {
643 let index = self.index(py).borrow();
649 let index = self.index(py).borrow();
644 for r in 0..self.len(py)? {
650 for r in 0..self.len(py)? {
645 let rev = Revision(r as BaseRevision);
651 let rev = Revision(r as BaseRevision);
646 // in this case node() won't ever return None
652 // in this case node() won't ever return None
647 nt.insert(&*index, index.node(rev).unwrap(), rev)
653 nt.insert(&*index, index.node(rev).unwrap(), rev)
648 .map_err(|e| nodemap_error(py, e))?
654 .map_err(|e| nodemap_error(py, e))?
649 }
655 }
650 Ok(py.None())
656 Ok(py.None())
651 }
657 }
652
658
653 fn get_nodetree<'a>(
659 fn get_nodetree<'a>(
654 &'a self,
660 &'a self,
655 py: Python<'a>,
661 py: Python<'a>,
656 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
662 ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> {
657 if self.nt(py).borrow().is_none() {
663 if self.nt(py).borrow().is_none() {
658 let readonly = Box::<Vec<_>>::default();
664 let readonly = Box::<Vec<_>>::default();
659 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
665 let mut nt = CoreNodeTree::load_bytes(readonly, 0);
660 self.fill_nodemap(py, &mut nt)?;
666 self.fill_nodemap(py, &mut nt)?;
661 self.nt(py).borrow_mut().replace(nt);
667 self.nt(py).borrow_mut().replace(nt);
662 }
668 }
663 Ok(self.nt(py))
669 Ok(self.nt(py))
664 }
670 }
665
671
666 /// Returns the full nodemap bytes to be written as-is to disk
672 /// Returns the full nodemap bytes to be written as-is to disk
667 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
673 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
668 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
674 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
669 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
675 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
670
676
671 // If there's anything readonly, we need to build the data again from
677 // If there's anything readonly, we need to build the data again from
672 // scratch
678 // scratch
673 let bytes = if readonly.len() > 0 {
679 let bytes = if readonly.len() > 0 {
674 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
680 let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
675 self.fill_nodemap(py, &mut nt)?;
681 self.fill_nodemap(py, &mut nt)?;
676
682
677 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
683 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
678 assert_eq!(readonly.len(), 0);
684 assert_eq!(readonly.len(), 0);
679
685
680 bytes
686 bytes
681 } else {
687 } else {
682 bytes
688 bytes
683 };
689 };
684
690
685 let bytes = PyBytes::new(py, &bytes);
691 let bytes = PyBytes::new(py, &bytes);
686 Ok(bytes)
692 Ok(bytes)
687 }
693 }
688
694
689 /// Returns the last saved docket along with the size of any changed data
695 /// Returns the last saved docket along with the size of any changed data
690 /// (in number of blocks), and said data as bytes.
696 /// (in number of blocks), and said data as bytes.
691 fn inner_nodemap_data_incremental(
697 fn inner_nodemap_data_incremental(
692 &self,
698 &self,
693 py: Python,
699 py: Python,
694 ) -> PyResult<PyObject> {
700 ) -> PyResult<PyObject> {
695 let docket = self.docket(py).borrow();
701 let docket = self.docket(py).borrow();
696 let docket = match docket.as_ref() {
702 let docket = match docket.as_ref() {
697 Some(d) => d,
703 Some(d) => d,
698 None => return Ok(py.None()),
704 None => return Ok(py.None()),
699 };
705 };
700
706
701 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
707 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
702 let masked_blocks = node_tree.masked_readonly_blocks();
708 let masked_blocks = node_tree.masked_readonly_blocks();
703 let (_, data) = node_tree.into_readonly_and_added_bytes();
709 let (_, data) = node_tree.into_readonly_and_added_bytes();
704 let changed = masked_blocks * std::mem::size_of::<Block>();
710 let changed = masked_blocks * std::mem::size_of::<Block>();
705
711
706 Ok((docket, changed, PyBytes::new(py, &data))
712 Ok((docket, changed, PyBytes::new(py, &data))
707 .to_py_object(py)
713 .to_py_object(py)
708 .into_object())
714 .into_object())
709 }
715 }
710
716
711 /// Update the nodemap from the new (mmaped) data.
717 /// Update the nodemap from the new (mmaped) data.
712 /// The docket is kept as a reference for later incremental calls.
718 /// The docket is kept as a reference for later incremental calls.
713 fn inner_update_nodemap_data(
719 fn inner_update_nodemap_data(
714 &self,
720 &self,
715 py: Python,
721 py: Python,
716 docket: PyObject,
722 docket: PyObject,
717 nm_data: PyObject,
723 nm_data: PyObject,
718 ) -> PyResult<PyObject> {
724 ) -> PyResult<PyObject> {
719 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
725 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
720 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
726 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
721 let len = buf.item_count();
727 let len = buf.item_count();
722 self.nodemap_mmap(py).borrow_mut().replace(buf);
728 self.nodemap_mmap(py).borrow_mut().replace(buf);
723
729
724 let mut nt = CoreNodeTree::load_bytes(bytes, len);
730 let mut nt = CoreNodeTree::load_bytes(bytes, len);
725
731
726 let data_tip = docket
732 let data_tip = docket
727 .getattr(py, "tip_rev")?
733 .getattr(py, "tip_rev")?
728 .extract::<BaseRevision>(py)?
734 .extract::<BaseRevision>(py)?
729 .into();
735 .into();
730 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
736 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
731 let idx = self.index(py).borrow();
737 let idx = self.index(py).borrow();
732 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
738 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
733 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
739 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
734 })?;
740 })?;
735 let current_tip = idx.len();
741 let current_tip = idx.len();
736
742
737 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
743 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
738 let rev = Revision(r);
744 let rev = Revision(r);
739 // in this case node() won't ever return None
745 // in this case node() won't ever return None
740 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
746 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
741 .map_err(|e| nodemap_error(py, e))?
747 .map_err(|e| nodemap_error(py, e))?
742 }
748 }
743
749
744 *self.nt(py).borrow_mut() = Some(nt);
750 *self.nt(py).borrow_mut() = Some(nt);
745
751
746 Ok(py.None())
752 Ok(py.None())
747 }
753 }
748
754
749 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
755 fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
750 let idx = self.index(py).borrow();
756 let idx = self.index(py).borrow();
751 Ok(match key.extract::<BaseRevision>(py) {
757 Ok(match key.extract::<BaseRevision>(py) {
752 Ok(key_as_int) => {
758 Ok(key_as_int) => {
753 let entry_params = if key_as_int == NULL_REVISION.0 {
759 let entry_params = if key_as_int == NULL_REVISION.0 {
754 RevisionDataParams::default()
760 RevisionDataParams::default()
755 } else {
761 } else {
756 let rev = UncheckedRevision(key_as_int);
762 let rev = UncheckedRevision(key_as_int);
757 match idx.entry_as_params(rev) {
763 match idx.entry_as_params(rev) {
758 Some(e) => e,
764 Some(e) => e,
759 None => {
765 None => {
760 return Err(PyErr::new::<IndexError, _>(
766 return Err(PyErr::new::<IndexError, _>(
761 py,
767 py,
762 "revlog index out of range",
768 "revlog index out of range",
763 ));
769 ));
764 }
770 }
765 }
771 }
766 };
772 };
767 revision_data_params_to_py_tuple(py, entry_params)
773 revision_data_params_to_py_tuple(py, entry_params)
768 .into_object()
774 .into_object()
769 }
775 }
770 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
776 _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
771 || py.None(),
777 || py.None(),
772 |py_rev| py_rev.into_py_object(py).into_object(),
778 |py_rev| py_rev.into_py_object(py).into_object(),
773 ),
779 ),
774 })
780 })
775 }
781 }
776
782
783 fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
784 let index = &*self.index(py).borrow();
785
786 // We don't use the shortcut here, as it's actually slower to loop
787 // through the cached `PyList` than to re-do the whole computation for
788 // large lists, which are the performance sensitive ones anyway.
789 let head_revs = index.head_revs().map_err(|e| graph_error(py, e))?;
790 let res: Vec<_> = head_revs
791 .iter()
792 .map(|r| {
793 PyBytes::new(
794 py,
795 index
796 .node(*r)
797 .expect("rev should have been in the index")
798 .as_bytes(),
799 )
800 .into_object()
801 })
802 .collect();
803
804 self.cache_new_heads_py_list(head_revs, py);
805
806 Ok(PyList::new(py, &res).into_object())
807 }
808
777 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
809 fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
778 let index = &*self.index(py).borrow();
810 let index = &*self.index(py).borrow();
779 if let Some(new_heads) =
811 if let Some(new_heads) =
780 index.head_revs_shortcut().map_err(|e| graph_error(py, e))?
812 index.head_revs_shortcut().map_err(|e| graph_error(py, e))?
781 {
813 {
782 self.cache_new_heads_py_list(new_heads, py);
814 self.cache_new_heads_py_list(new_heads, py);
783 }
815 }
784
816
785 Ok(self
817 Ok(self
786 .head_revs_py_list(py)
818 .head_revs_py_list(py)
787 .borrow()
819 .borrow()
788 .as_ref()
820 .as_ref()
789 .expect("head revs should be cached")
821 .expect("head revs should be cached")
790 .clone_ref(py)
822 .clone_ref(py)
791 .into_object())
823 .into_object())
792 }
824 }
793
825
794 fn inner_headrevsfiltered(
826 fn inner_headrevsfiltered(
795 &self,
827 &self,
796 py: Python,
828 py: Python,
797 filtered_revs: &PyObject,
829 filtered_revs: &PyObject,
798 ) -> PyResult<PyObject> {
830 ) -> PyResult<PyObject> {
799 let index = &mut *self.index(py).borrow_mut();
831 let index = &mut *self.index(py).borrow_mut();
800 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
832 let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?;
801
833
802 if let Some(new_heads) = index
834 if let Some(new_heads) = index
803 .head_revs_filtered(&filtered_revs, true)
835 .head_revs_filtered(&filtered_revs, true)
804 .map_err(|e| graph_error(py, e))?
836 .map_err(|e| graph_error(py, e))?
805 {
837 {
806 self.cache_new_heads_py_list(new_heads, py);
838 self.cache_new_heads_py_list(new_heads, py);
807 }
839 }
808
840
809 Ok(self
841 Ok(self
810 .head_revs_py_list(py)
842 .head_revs_py_list(py)
811 .borrow()
843 .borrow()
812 .as_ref()
844 .as_ref()
813 .expect("head revs should be cached")
845 .expect("head revs should be cached")
814 .clone_ref(py)
846 .clone_ref(py)
815 .into_object())
847 .into_object())
816 }
848 }
817
849
818 fn cache_new_heads_py_list(
850 fn cache_new_heads_py_list(
819 &self,
851 &self,
820 new_heads: Vec<Revision>,
852 new_heads: Vec<Revision>,
821 py: Python<'_>,
853 py: Python<'_>,
822 ) -> PyList {
854 ) -> PyList {
823 let as_vec: Vec<PyObject> = new_heads
855 let as_vec: Vec<PyObject> = new_heads
824 .iter()
856 .iter()
825 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
857 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
826 .collect();
858 .collect();
827 let new_heads_py_list = PyList::new(py, &as_vec);
859 let new_heads_py_list = PyList::new(py, &as_vec);
828 *self.head_revs_py_list(py).borrow_mut() =
860 *self.head_revs_py_list(py).borrow_mut() =
829 Some(new_heads_py_list.clone_ref(py));
861 Some(new_heads_py_list.clone_ref(py));
830 new_heads_py_list
862 new_heads_py_list
831 }
863 }
832
864
833 fn inner_ancestors(
865 fn inner_ancestors(
834 &self,
866 &self,
835 py: Python,
867 py: Python,
836 py_revs: &PyTuple,
868 py_revs: &PyTuple,
837 ) -> PyResult<PyObject> {
869 ) -> PyResult<PyObject> {
838 let index = &*self.index(py).borrow();
870 let index = &*self.index(py).borrow();
839 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
871 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
840 let as_vec: Vec<_> = index
872 let as_vec: Vec<_> = index
841 .ancestors(&revs)
873 .ancestors(&revs)
842 .map_err(|e| graph_error(py, e))?
874 .map_err(|e| graph_error(py, e))?
843 .iter()
875 .iter()
844 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
876 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
845 .collect();
877 .collect();
846 Ok(PyList::new(py, &as_vec).into_object())
878 Ok(PyList::new(py, &as_vec).into_object())
847 }
879 }
848
880
849 fn inner_commonancestorsheads(
881 fn inner_commonancestorsheads(
850 &self,
882 &self,
851 py: Python,
883 py: Python,
852 py_revs: &PyTuple,
884 py_revs: &PyTuple,
853 ) -> PyResult<PyObject> {
885 ) -> PyResult<PyObject> {
854 let index = &*self.index(py).borrow();
886 let index = &*self.index(py).borrow();
855 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
887 let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
856 let as_vec: Vec<_> = index
888 let as_vec: Vec<_> = index
857 .common_ancestor_heads(&revs)
889 .common_ancestor_heads(&revs)
858 .map_err(|e| graph_error(py, e))?
890 .map_err(|e| graph_error(py, e))?
859 .iter()
891 .iter()
860 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
892 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
861 .collect();
893 .collect();
862 Ok(PyList::new(py, &as_vec).into_object())
894 Ok(PyList::new(py, &as_vec).into_object())
863 }
895 }
864
896
865 fn inner_computephasesmapsets(
897 fn inner_computephasesmapsets(
866 &self,
898 &self,
867 py: Python,
899 py: Python,
868 py_roots: PyDict,
900 py_roots: PyDict,
869 ) -> PyResult<PyObject> {
901 ) -> PyResult<PyObject> {
870 let index = &*self.index(py).borrow();
902 let index = &*self.index(py).borrow();
871 let opt = self.get_nodetree(py)?.borrow();
903 let opt = self.get_nodetree(py)?.borrow();
872 let nt = opt.as_ref().unwrap();
904 let nt = opt.as_ref().unwrap();
873 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
905 let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
874 .items_list(py)
906 .items_list(py)
875 .iter(py)
907 .iter(py)
876 .map(|r| {
908 .map(|r| {
877 let phase = r.get_item(py, 0)?;
909 let phase = r.get_item(py, 0)?;
878 let nodes = r.get_item(py, 1)?;
910 let nodes = r.get_item(py, 1)?;
879 // Transform the nodes from Python to revs here since we
911 // Transform the nodes from Python to revs here since we
880 // have access to the nodemap
912 // have access to the nodemap
881 let revs: Result<_, _> = nodes
913 let revs: Result<_, _> = nodes
882 .iter(py)?
914 .iter(py)?
883 .map(|node| match node?.extract::<PyBytes>(py) {
915 .map(|node| match node?.extract::<PyBytes>(py) {
884 Ok(py_bytes) => {
916 Ok(py_bytes) => {
885 let node = node_from_py_bytes(py, &py_bytes)?;
917 let node = node_from_py_bytes(py, &py_bytes)?;
886 nt.find_bin(index, node.into())
918 nt.find_bin(index, node.into())
887 .map_err(|e| nodemap_error(py, e))?
919 .map_err(|e| nodemap_error(py, e))?
888 .ok_or_else(|| revlog_error(py))
920 .ok_or_else(|| revlog_error(py))
889 }
921 }
890 Err(e) => Err(e),
922 Err(e) => Err(e),
891 })
923 })
892 .collect();
924 .collect();
893 let phase = Phase::try_from(phase.extract::<usize>(py)?)
925 let phase = Phase::try_from(phase.extract::<usize>(py)?)
894 .map_err(|_| revlog_error(py));
926 .map_err(|_| revlog_error(py));
895 Ok((phase?, revs?))
927 Ok((phase?, revs?))
896 })
928 })
897 .collect();
929 .collect();
898 let (len, phase_maps) = index
930 let (len, phase_maps) = index
899 .compute_phases_map_sets(roots?)
931 .compute_phases_map_sets(roots?)
900 .map_err(|e| graph_error(py, e))?;
932 .map_err(|e| graph_error(py, e))?;
901
933
902 // Ugly hack, but temporary
934 // Ugly hack, but temporary
903 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
935 const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96];
904 let py_phase_maps = PyDict::new(py);
936 let py_phase_maps = PyDict::new(py);
905 for (idx, roots) in phase_maps.iter().enumerate() {
937 for (idx, roots) in phase_maps.iter().enumerate() {
906 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
938 let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py);
907 // OPTIM too bad we have to collect here. At least, we could
939 // OPTIM too bad we have to collect here. At least, we could
908 // reuse the same Vec and allocate it with capacity at
940 // reuse the same Vec and allocate it with capacity at
909 // max(len(phase_maps)
941 // max(len(phase_maps)
910 let roots_vec: Vec<PyInt> = roots
942 let roots_vec: Vec<PyInt> = roots
911 .iter()
943 .iter()
912 .map(|r| PyRevision::from(*r).into_py_object(py))
944 .map(|r| PyRevision::from(*r).into_py_object(py))
913 .collect();
945 .collect();
914 py_phase_maps.set_item(
946 py_phase_maps.set_item(
915 py,
947 py,
916 phase_num,
948 phase_num,
917 PySet::new(py, roots_vec)?,
949 PySet::new(py, roots_vec)?,
918 )?;
950 )?;
919 }
951 }
920 Ok(PyTuple::new(
952 Ok(PyTuple::new(
921 py,
953 py,
922 &[
954 &[
923 len.into_py_object(py).into_object(),
955 len.into_py_object(py).into_object(),
924 py_phase_maps.into_object(),
956 py_phase_maps.into_object(),
925 ],
957 ],
926 )
958 )
927 .into_object())
959 .into_object())
928 }
960 }
929
961
930 fn inner_slicechunktodensity(
962 fn inner_slicechunktodensity(
931 &self,
963 &self,
932 py: Python,
964 py: Python,
933 revs: PyObject,
965 revs: PyObject,
934 target_density: f64,
966 target_density: f64,
935 min_gap_size: usize,
967 min_gap_size: usize,
936 ) -> PyResult<PyObject> {
968 ) -> PyResult<PyObject> {
937 let index = &*self.index(py).borrow();
969 let index = &*self.index(py).borrow();
938 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
970 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
939 let as_nested_vec =
971 let as_nested_vec =
940 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
972 index.slice_chunk_to_density(&revs, target_density, min_gap_size);
941 let mut res = Vec::with_capacity(as_nested_vec.len());
973 let mut res = Vec::with_capacity(as_nested_vec.len());
942 let mut py_chunk = Vec::new();
974 let mut py_chunk = Vec::new();
943 for chunk in as_nested_vec {
975 for chunk in as_nested_vec {
944 py_chunk.clear();
976 py_chunk.clear();
945 py_chunk.reserve_exact(chunk.len());
977 py_chunk.reserve_exact(chunk.len());
946 for rev in chunk {
978 for rev in chunk {
947 py_chunk.push(
979 py_chunk.push(
948 PyRevision::from(rev).into_py_object(py).into_object(),
980 PyRevision::from(rev).into_py_object(py).into_object(),
949 );
981 );
950 }
982 }
951 res.push(PyList::new(py, &py_chunk).into_object());
983 res.push(PyList::new(py, &py_chunk).into_object());
952 }
984 }
953 // This is just to do the same as C, not sure why it does this
985 // This is just to do the same as C, not sure why it does this
954 if res.len() == 1 {
986 if res.len() == 1 {
955 Ok(PyTuple::new(py, &res).into_object())
987 Ok(PyTuple::new(py, &res).into_object())
956 } else {
988 } else {
957 Ok(PyList::new(py, &res).into_object())
989 Ok(PyList::new(py, &res).into_object())
958 }
990 }
959 }
991 }
960
992
961 fn inner_reachableroots2(
993 fn inner_reachableroots2(
962 &self,
994 &self,
963 py: Python,
995 py: Python,
964 min_root: UncheckedRevision,
996 min_root: UncheckedRevision,
965 heads: PyObject,
997 heads: PyObject,
966 roots: PyObject,
998 roots: PyObject,
967 include_path: bool,
999 include_path: bool,
968 ) -> PyResult<PyObject> {
1000 ) -> PyResult<PyObject> {
969 let index = &*self.index(py).borrow();
1001 let index = &*self.index(py).borrow();
970 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
1002 let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
971 PyErr::new::<IndexError, _>(py, "head out of range")
1003 PyErr::new::<IndexError, _>(py, "head out of range")
972 })?;
1004 })?;
973 let roots: Result<_, _> = roots
1005 let roots: Result<_, _> = roots
974 .iter(py)?
1006 .iter(py)?
975 .map(|r| {
1007 .map(|r| {
976 r.and_then(|o| match o.extract::<PyRevision>(py) {
1008 r.and_then(|o| match o.extract::<PyRevision>(py) {
977 Ok(r) => Ok(UncheckedRevision(r.0)),
1009 Ok(r) => Ok(UncheckedRevision(r.0)),
978 Err(e) => Err(e),
1010 Err(e) => Err(e),
979 })
1011 })
980 })
1012 })
981 .collect();
1013 .collect();
982 let as_set = index
1014 let as_set = index
983 .reachable_roots(min_root, heads, roots?, include_path)
1015 .reachable_roots(min_root, heads, roots?, include_path)
984 .map_err(|e| graph_error(py, e))?;
1016 .map_err(|e| graph_error(py, e))?;
985 let as_vec: Vec<PyObject> = as_set
1017 let as_vec: Vec<PyObject> = as_set
986 .iter()
1018 .iter()
987 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
1019 .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
988 .collect();
1020 .collect();
989 Ok(PyList::new(py, &as_vec).into_object())
1021 Ok(PyList::new(py, &as_vec).into_object())
990 }
1022 }
991 }
1023 }
992
1024
993 py_class!(pub class NodeTree |py| {
1025 py_class!(pub class NodeTree |py| {
994 data nt: RefCell<CoreNodeTree>;
1026 data nt: RefCell<CoreNodeTree>;
995 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
1027 data index: RefCell<UnsafePyLeaked<PySharedIndex>>;
996
1028
997 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
1029 def __new__(_cls, index: PyObject) -> PyResult<NodeTree> {
998 let index = py_rust_index_to_graph(py, index)?;
1030 let index = py_rust_index_to_graph(py, index)?;
999 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
1031 let nt = CoreNodeTree::default(); // in-RAM, fully mutable
1000 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
1032 Self::create_instance(py, RefCell::new(nt), RefCell::new(index))
1001 }
1033 }
1002
1034
1003 /// Tell whether the NodeTree is still valid
1035 /// Tell whether the NodeTree is still valid
1004 ///
1036 ///
1005 /// In case of mutation of the index, the given results are not
1037 /// In case of mutation of the index, the given results are not
1006 /// guaranteed to be correct, and in fact, the methods borrowing
1038 /// guaranteed to be correct, and in fact, the methods borrowing
1007 /// the inner index would fail because of `PySharedRef` poisoning
1039 /// the inner index would fail because of `PySharedRef` poisoning
1008 /// (generation-based guard), same as iterating on a `dict` that has
1040 /// (generation-based guard), same as iterating on a `dict` that has
1009 /// been meanwhile mutated.
1041 /// been meanwhile mutated.
1010 def is_invalidated(&self) -> PyResult<bool> {
1042 def is_invalidated(&self) -> PyResult<bool> {
1011 let leaked = self.index(py).borrow();
1043 let leaked = self.index(py).borrow();
1012 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1044 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1013 let result = unsafe { leaked.try_borrow(py) };
1045 let result = unsafe { leaked.try_borrow(py) };
1014 // two cases for result to be an error:
1046 // two cases for result to be an error:
1015 // - the index has previously been mutably borrowed
1047 // - the index has previously been mutably borrowed
1016 // - there is currently a mutable borrow
1048 // - there is currently a mutable borrow
1017 // in both cases this amounts for previous results related to
1049 // in both cases this amounts for previous results related to
1018 // the index to still be valid.
1050 // the index to still be valid.
1019 Ok(result.is_err())
1051 Ok(result.is_err())
1020 }
1052 }
1021
1053
1022 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
1054 def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
1023 let leaked = self.index(py).borrow();
1055 let leaked = self.index(py).borrow();
1024 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1056 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1025 let index = &*unsafe { leaked.try_borrow(py)? };
1057 let index = &*unsafe { leaked.try_borrow(py)? };
1026
1058
1027 let rev = UncheckedRevision(rev.0);
1059 let rev = UncheckedRevision(rev.0);
1028 let rev = index
1060 let rev = index
1029 .check_revision(rev)
1061 .check_revision(rev)
1030 .ok_or_else(|| rev_not_in_index(py, rev))?;
1062 .ok_or_else(|| rev_not_in_index(py, rev))?;
1031 if rev == NULL_REVISION {
1063 if rev == NULL_REVISION {
1032 return Err(rev_not_in_index(py, rev.into()))
1064 return Err(rev_not_in_index(py, rev.into()))
1033 }
1065 }
1034
1066
1035 let entry = index.inner.get_entry(rev).unwrap();
1067 let entry = index.inner.get_entry(rev).unwrap();
1036 let mut nt = self.nt(py).borrow_mut();
1068 let mut nt = self.nt(py).borrow_mut();
1037 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1069 nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
1038
1070
1039 Ok(py.None())
1071 Ok(py.None())
1040 }
1072 }
1041
1073
1042 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1074 /// Lookup by node hex prefix in the NodeTree, returning revision number.
1043 ///
1075 ///
1044 /// This is not part of the classical NodeTree API, but is good enough
1076 /// This is not part of the classical NodeTree API, but is good enough
1045 /// for unit testing, as in `test-rust-revlog.py`.
1077 /// for unit testing, as in `test-rust-revlog.py`.
1046 def prefix_rev_lookup(
1078 def prefix_rev_lookup(
1047 &self,
1079 &self,
1048 node_prefix: PyBytes
1080 node_prefix: PyBytes
1049 ) -> PyResult<Option<PyRevision>> {
1081 ) -> PyResult<Option<PyRevision>> {
1050 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1082 let prefix = NodePrefix::from_hex(node_prefix.data(py))
1051 .map_err(|_| PyErr::new::<ValueError, _>(
1083 .map_err(|_| PyErr::new::<ValueError, _>(
1052 py,
1084 py,
1053 format!("Invalid node or prefix {:?}",
1085 format!("Invalid node or prefix {:?}",
1054 node_prefix.as_object()))
1086 node_prefix.as_object()))
1055 )?;
1087 )?;
1056
1088
1057 let nt = self.nt(py).borrow();
1089 let nt = self.nt(py).borrow();
1058 let leaked = self.index(py).borrow();
1090 let leaked = self.index(py).borrow();
1059 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1091 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1060 let index = &*unsafe { leaked.try_borrow(py)? };
1092 let index = &*unsafe { leaked.try_borrow(py)? };
1061
1093
1062 Ok(nt.find_bin(index, prefix)
1094 Ok(nt.find_bin(index, prefix)
1063 .map_err(|e| nodemap_error(py, e))?
1095 .map_err(|e| nodemap_error(py, e))?
1064 .map(|r| r.into())
1096 .map(|r| r.into())
1065 )
1097 )
1066 }
1098 }
1067
1099
1068 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1100 def shortest(&self, node: PyBytes) -> PyResult<usize> {
1069 let nt = self.nt(py).borrow();
1101 let nt = self.nt(py).borrow();
1070 let leaked = self.index(py).borrow();
1102 let leaked = self.index(py).borrow();
1071 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1103 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
1072 let idx = &*unsafe { leaked.try_borrow(py)? };
1104 let idx = &*unsafe { leaked.try_borrow(py)? };
1073 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1105 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
1074 {
1106 {
1075 Ok(Some(l)) => Ok(l),
1107 Ok(Some(l)) => Ok(l),
1076 Ok(None) => Err(revlog_error(py)),
1108 Ok(None) => Err(revlog_error(py)),
1077 Err(e) => Err(nodemap_error(py, e)),
1109 Err(e) => Err(nodemap_error(py, e)),
1078 }
1110 }
1079 }
1111 }
1080 });
1112 });
1081
1113
1082 fn revlog_error(py: Python) -> PyErr {
1114 fn revlog_error(py: Python) -> PyErr {
1083 match py
1115 match py
1084 .import("mercurial.error")
1116 .import("mercurial.error")
1085 .and_then(|m| m.get(py, "RevlogError"))
1117 .and_then(|m| m.get(py, "RevlogError"))
1086 {
1118 {
1087 Err(e) => e,
1119 Err(e) => e,
1088 Ok(cls) => PyErr::from_instance(
1120 Ok(cls) => PyErr::from_instance(
1089 py,
1121 py,
1090 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1122 cls.call(py, (py.None(),), None).ok().into_py_object(py),
1091 ),
1123 ),
1092 }
1124 }
1093 }
1125 }
1094
1126
1095 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1127 fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
1096 match py
1128 match py
1097 .import("mercurial.error")
1129 .import("mercurial.error")
1098 .and_then(|m| m.get(py, "RevlogError"))
1130 .and_then(|m| m.get(py, "RevlogError"))
1099 {
1131 {
1100 Err(e) => e,
1132 Err(e) => e,
1101 Ok(cls) => PyErr::from_instance(
1133 Ok(cls) => PyErr::from_instance(
1102 py,
1134 py,
1103 cls.call(py, (PyBytes::new(py, msg),), None)
1135 cls.call(py, (PyBytes::new(py, msg),), None)
1104 .ok()
1136 .ok()
1105 .into_py_object(py),
1137 .into_py_object(py),
1106 ),
1138 ),
1107 }
1139 }
1108 }
1140 }
1109
1141
1110 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1142 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
1111 // ParentOutOfRange is currently the only alternative
1143 // ParentOutOfRange is currently the only alternative
1112 // in `hg::GraphError`. The C index always raises this simple ValueError.
1144 // in `hg::GraphError`. The C index always raises this simple ValueError.
1113 PyErr::new::<ValueError, _>(py, "parent out of range")
1145 PyErr::new::<ValueError, _>(py, "parent out of range")
1114 }
1146 }
1115
1147
1116 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1148 fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1117 PyErr::new::<ValueError, _>(
1149 PyErr::new::<ValueError, _>(
1118 py,
1150 py,
1119 format!(
1151 format!(
1120 "Inconsistency: Revision {} found in nodemap \
1152 "Inconsistency: Revision {} found in nodemap \
1121 is not in revlog index",
1153 is not in revlog index",
1122 rev
1154 rev
1123 ),
1155 ),
1124 )
1156 )
1125 }
1157 }
1126
1158
1127 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1159 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
1128 PyErr::new::<ValueError, _>(
1160 PyErr::new::<ValueError, _>(
1129 py,
1161 py,
1130 format!("revlog index out of range: {}", rev),
1162 format!("revlog index out of range: {}", rev),
1131 )
1163 )
1132 }
1164 }
1133
1165
1134 /// Standard treatment of NodeMapError
1166 /// Standard treatment of NodeMapError
1135 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1167 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
1136 match err {
1168 match err {
1137 NodeMapError::MultipleResults => revlog_error(py),
1169 NodeMapError::MultipleResults => revlog_error(py),
1138 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1170 NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r),
1139 }
1171 }
1140 }
1172 }
1141
1173
1142 /// Create the module, with __package__ given from parent
1174 /// Create the module, with __package__ given from parent
1143 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1175 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
1144 let dotted_name = &format!("{}.revlog", package);
1176 let dotted_name = &format!("{}.revlog", package);
1145 let m = PyModule::new(py, dotted_name)?;
1177 let m = PyModule::new(py, dotted_name)?;
1146 m.add(py, "__package__", package)?;
1178 m.add(py, "__package__", package)?;
1147 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1179 m.add(py, "__doc__", "RevLog - Rust implementations")?;
1148
1180
1149 m.add_class::<Index>(py)?;
1181 m.add_class::<Index>(py)?;
1150 m.add_class::<NodeTree>(py)?;
1182 m.add_class::<NodeTree>(py)?;
1151
1183
1152 let sys = PyModule::import(py, "sys")?;
1184 let sys = PyModule::import(py, "sys")?;
1153 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1185 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
1154 sys_modules.set_item(py, dotted_name, &m)?;
1186 sys_modules.set_item(py, dotted_name, &m)?;
1155
1187
1156 Ok(m)
1188 Ok(m)
1157 }
1189 }
General Comments 0
You need to be logged in to leave comments. Login now