##// END OF EJS Templates
copies: Keep changelog sidedata file open during copy tracing...
Simon Sapin -
r48256:5fa083a5 default
parent child Browse files
Show More
@@ -1,1305 +1,1306 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import nullrev
15 from .node import nullrev
16
16
17 from . import (
17 from . import (
18 match as matchmod,
18 match as matchmod,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25
25
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 from .revlogutils import (
28 from .revlogutils import (
29 flagutil,
29 flagutil,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32
32
33 rustmod = policy.importrust("copy_tracing")
33 rustmod = policy.importrust("copy_tracing")
34
34
35
35
36 def _filter(src, dst, t):
36 def _filter(src, dst, t):
37 """filters out invalid copies after chaining"""
37 """filters out invalid copies after chaining"""
38
38
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
41 # in the following table (not including trivial cases). For example, case 6
41 # in the following table (not including trivial cases). For example, case 6
42 # is where a file existed in 'src' and remained under that name in 'mid' and
42 # is where a file existed in 'src' and remained under that name in 'mid' and
43 # then was renamed between 'mid' and 'dst'.
43 # then was renamed between 'mid' and 'dst'.
44 #
44 #
45 # case src mid dst result
45 # case src mid dst result
46 # 1 x y - -
46 # 1 x y - -
47 # 2 x y y x->y
47 # 2 x y y x->y
48 # 3 x y x -
48 # 3 x y x -
49 # 4 x y z x->z
49 # 4 x y z x->z
50 # 5 - x y -
50 # 5 - x y -
51 # 6 x x y x->y
51 # 6 x x y x->y
52 #
52 #
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
55 # between 5 and 6, so it includes all cases in its result.
55 # between 5 and 6, so it includes all cases in its result.
56 # Cases 1, 3, and 5 are then removed by _filter().
56 # Cases 1, 3, and 5 are then removed by _filter().
57
57
58 for k, v in list(t.items()):
58 for k, v in list(t.items()):
59 if k == v: # case 3
59 if k == v: # case 3
60 del t[k]
60 del t[k]
61 elif v not in src: # case 5
61 elif v not in src: # case 5
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 del t[k]
63 del t[k]
64 elif k not in dst: # case 1
64 elif k not in dst: # case 1
65 # remove copies to files that were then removed
65 # remove copies to files that were then removed
66 del t[k]
66 del t[k]
67
67
68
68
69 def _chain(prefix, suffix):
69 def _chain(prefix, suffix):
70 """chain two sets of copies 'prefix' and 'suffix'"""
70 """chain two sets of copies 'prefix' and 'suffix'"""
71 result = prefix.copy()
71 result = prefix.copy()
72 for key, value in pycompat.iteritems(suffix):
72 for key, value in pycompat.iteritems(suffix):
73 result[key] = prefix.get(value, value)
73 result[key] = prefix.get(value, value)
74 return result
74 return result
75
75
76
76
77 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
79 manifest am
80
80
81 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
83 disabling that feature."""
84
84
85 for f in fctx.ancestors():
85 for f in fctx.ancestors():
86 path = f.path()
86 path = f.path()
87 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
88 return path
88 return path
89 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
90 return path
90 return path
91
91
92
92
93 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
94 ds = repo.dirstate
95 c = ds.copies().copy()
95 c = ds.copies().copy()
96 for k in list(c):
96 for k in list(c):
97 if ds[k] not in b'anm' or (match and not match(k)):
97 if ds[k] not in b'anm' or (match and not match(k)):
98 del c[k]
98 del c[k]
99 return c
99 return c
100
100
101
101
102 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
106 """
106 """
107 ma = a.manifest()
107 ma = a.manifest()
108 mb = b.manifest()
108 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
110
110
111
111
112 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
115 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
118 return readfrom in changesetsource
119
119
120
120
121 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
125 repo = a._repo
125 repo = a._repo
126
126
127 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
129
129
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
131 dbg = repo.ui.debug
132 if debug:
132 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
134 am = a.manifest()
135 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
136
136
137 # find where new files came from
137 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
140 cm = {}
141
141
142 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
147 # this comparison.
148 forwardmissingmatch = match
148 forwardmissingmatch = match
149 if b.p1() == a and b.p2().rev() == nullrev:
149 if b.p1() == a and b.p2().rev() == nullrev:
150 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
153 missing = list(b.walk(match))
153 missing = list(b.walk(match))
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
155 if debug:
155 if debug:
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
157 else:
157 else:
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
159 if debug:
159 if debug:
160 dbg(
160 dbg(
161 b'debug.copies: missing files to search: %d\n'
161 b'debug.copies: missing files to search: %d\n'
162 % len(missing)
162 % len(missing)
163 )
163 )
164
164
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
166
166
167 for f in sorted(missing):
167 for f in sorted(missing):
168 if debug:
168 if debug:
169 dbg(b'debug.copies: tracing file: %s\n' % f)
169 dbg(b'debug.copies: tracing file: %s\n' % f)
170 fctx = b[f]
170 fctx = b[f]
171 fctx._ancestrycontext = ancestrycontext
171 fctx._ancestrycontext = ancestrycontext
172
172
173 if debug:
173 if debug:
174 start = util.timer()
174 start = util.timer()
175 opath = _tracefile(fctx, am, basemf)
175 opath = _tracefile(fctx, am, basemf)
176 if opath:
176 if opath:
177 if debug:
177 if debug:
178 dbg(b'debug.copies: rename of: %s\n' % opath)
178 dbg(b'debug.copies: rename of: %s\n' % opath)
179 cm[f] = opath
179 cm[f] = opath
180 if debug:
180 if debug:
181 dbg(
181 dbg(
182 b'debug.copies: time: %f seconds\n'
182 b'debug.copies: time: %f seconds\n'
183 % (util.timer() - start)
183 % (util.timer() - start)
184 )
184 )
185 return cm
185 return cm
186
186
187
187
188 def _revinfo_getter(repo, match):
188 def _revinfo_getter(repo, match):
189 """returns a function that returns the following data given a <rev>"
189 """returns a function that returns the following data given a <rev>"
190
190
191 * p1: revision number of first parent
191 * p1: revision number of first parent
192 * p2: revision number of first parent
192 * p2: revision number of first parent
193 * changes: a ChangingFiles object
193 * changes: a ChangingFiles object
194 """
194 """
195 cl = repo.changelog
195 cl = repo.changelog
196 parents = cl.parentrevs
196 parents = cl.parentrevs
197 flags = cl.flags
197 flags = cl.flags
198
198
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
200
200
201 changelogrevision = cl.changelogrevision
201 changelogrevision = cl.changelogrevision
202
202
203 if rustmod is not None:
203 if rustmod is not None:
204
204
205 def revinfo(rev):
205 def revinfo(rev):
206 p1, p2 = parents(rev)
206 p1, p2 = parents(rev)
207 if flags(rev) & HASCOPIESINFO:
207 if flags(rev) & HASCOPIESINFO:
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
209 else:
209 else:
210 raw = None
210 raw = None
211 return (p1, p2, raw)
211 return (p1, p2, raw)
212
212
213 else:
213 else:
214
214
215 def revinfo(rev):
215 def revinfo(rev):
216 p1, p2 = parents(rev)
216 p1, p2 = parents(rev)
217 if flags(rev) & HASCOPIESINFO:
217 if flags(rev) & HASCOPIESINFO:
218 changes = changelogrevision(rev).changes
218 changes = changelogrevision(rev).changes
219 else:
219 else:
220 changes = None
220 changes = None
221 return (p1, p2, changes)
221 return (p1, p2, changes)
222
222
223 return revinfo
223 return revinfo
224
224
225
225
226 def cached_is_ancestor(is_ancestor):
226 def cached_is_ancestor(is_ancestor):
227 """return a cached version of is_ancestor"""
227 """return a cached version of is_ancestor"""
228 cache = {}
228 cache = {}
229
229
230 def _is_ancestor(anc, desc):
230 def _is_ancestor(anc, desc):
231 if anc > desc:
231 if anc > desc:
232 return False
232 return False
233 elif anc == desc:
233 elif anc == desc:
234 return True
234 return True
235 key = (anc, desc)
235 key = (anc, desc)
236 ret = cache.get(key)
236 ret = cache.get(key)
237 if ret is None:
237 if ret is None:
238 ret = cache[key] = is_ancestor(anc, desc)
238 ret = cache[key] = is_ancestor(anc, desc)
239 return ret
239 return ret
240
240
241 return _is_ancestor
241 return _is_ancestor
242
242
243
243
244 def _changesetforwardcopies(a, b, match):
244 def _changesetforwardcopies(a, b, match):
245 if a.rev() in (nullrev, b.rev()):
245 if a.rev() in (nullrev, b.rev()):
246 return {}
246 return {}
247
247
248 repo = a.repo().unfiltered()
248 repo = a.repo().unfiltered()
249 children = {}
249 children = {}
250
250
251 cl = repo.changelog
251 cl = repo.changelog
252 isancestor = cl.isancestorrev
252 isancestor = cl.isancestorrev
253
253
254 # To track rename from "A" to B, we need to gather all parent β†’ children
254 # To track rename from "A" to B, we need to gather all parent β†’ children
255 # edges that are contains in `::B` but not in `::A`.
255 # edges that are contains in `::B` but not in `::A`.
256 #
256 #
257 #
257 #
258 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
258 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
259 # ::a`) and also all the "roots point", ie the parents of the exclusive set
259 # ::a`) and also all the "roots point", ie the parents of the exclusive set
260 # that belong to ::a. These are exactly all the revisions needed to express
260 # that belong to ::a. These are exactly all the revisions needed to express
261 # the parent β†’ children we need to combine.
261 # the parent β†’ children we need to combine.
262 #
262 #
263 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
263 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
264 # excluding paths that leads to roots that are not ancestors of `a`. We
264 # excluding paths that leads to roots that are not ancestors of `a`. We
265 # keep this out of the explanation because it is hard enough without this special case..
265 # keep this out of the explanation because it is hard enough without this special case..
266
266
267 parents = cl._uncheckedparentrevs
267 parents = cl._uncheckedparentrevs
268 graph_roots = (nullrev, nullrev)
268 graph_roots = (nullrev, nullrev)
269
269
270 ancestors = cl.ancestors([a.rev()], inclusive=True)
270 ancestors = cl.ancestors([a.rev()], inclusive=True)
271 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
271 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
272 roots = set()
272 roots = set()
273 has_graph_roots = False
273 has_graph_roots = False
274 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
274 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
275
275
276 # iterate over `only(B, A)`
276 # iterate over `only(B, A)`
277 for r in revs:
277 for r in revs:
278 ps = parents(r)
278 ps = parents(r)
279 if ps == graph_roots:
279 if ps == graph_roots:
280 has_graph_roots = True
280 has_graph_roots = True
281 else:
281 else:
282 p1, p2 = ps
282 p1, p2 = ps
283
283
284 # find all the "root points" (see larger comment above)
284 # find all the "root points" (see larger comment above)
285 if p1 != nullrev and p1 in ancestors:
285 if p1 != nullrev and p1 in ancestors:
286 roots.add(p1)
286 roots.add(p1)
287 if p2 != nullrev and p2 in ancestors:
287 if p2 != nullrev and p2 in ancestors:
288 roots.add(p2)
288 roots.add(p2)
289 if not roots:
289 if not roots:
290 # no common revision to track copies from
290 # no common revision to track copies from
291 return {}
291 return {}
292 if has_graph_roots:
292 if has_graph_roots:
293 # this deal with the special case mentionned in the [1] footnotes. We
293 # this deal with the special case mentionned in the [1] footnotes. We
294 # must filter out revisions that leads to non-common graphroots.
294 # must filter out revisions that leads to non-common graphroots.
295 roots = list(roots)
295 roots = list(roots)
296 m = min(roots)
296 m = min(roots)
297 h = [b.rev()]
297 h = [b.rev()]
298 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
298 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
299 roots_to_head = set(roots_to_head)
299 roots_to_head = set(roots_to_head)
300 revs = [r for r in revs if r in roots_to_head]
300 revs = [r for r in revs if r in roots_to_head]
301
301
302 if repo.filecopiesmode == b'changeset-sidedata':
302 if repo.filecopiesmode == b'changeset-sidedata':
303 # When using side-data, we will process the edges "from" the children.
303 # When using side-data, we will process the edges "from" the children.
304 # We iterate over the childre, gathering previous collected data for
304 # We iterate over the childre, gathering previous collected data for
305 # the parents. Do know when the parents data is no longer necessary, we
305 # the parents. Do know when the parents data is no longer necessary, we
306 # keep a counter of how many children each revision has.
306 # keep a counter of how many children each revision has.
307 #
307 #
308 # An interresting property of `children_count` is that it only contains
308 # An interresting property of `children_count` is that it only contains
309 # revision that will be relevant for a edge of the graph. So if a
309 # revision that will be relevant for a edge of the graph. So if a
310 # children has parent not in `children_count`, that edges should not be
310 # children has parent not in `children_count`, that edges should not be
311 # processed.
311 # processed.
312 children_count = dict((r, 0) for r in roots)
312 children_count = dict((r, 0) for r in roots)
313 for r in revs:
313 for r in revs:
314 for p in cl.parentrevs(r):
314 for p in cl.parentrevs(r):
315 if p == nullrev:
315 if p == nullrev:
316 continue
316 continue
317 children_count[r] = 0
317 children_count[r] = 0
318 if p in children_count:
318 if p in children_count:
319 children_count[p] += 1
319 children_count[p] += 1
320 revinfo = _revinfo_getter(repo, match)
320 revinfo = _revinfo_getter(repo, match)
321 return _combine_changeset_copies(
321 with repo.changelog.reading():
322 revs,
322 return _combine_changeset_copies(
323 children_count,
323 revs,
324 b.rev(),
324 children_count,
325 revinfo,
325 b.rev(),
326 match,
326 revinfo,
327 isancestor,
327 match,
328 multi_thread,
328 isancestor,
329 )
329 multi_thread,
330 )
330 else:
331 else:
331 # When not using side-data, we will process the edges "from" the parent.
332 # When not using side-data, we will process the edges "from" the parent.
332 # so we need a full mapping of the parent -> children relation.
333 # so we need a full mapping of the parent -> children relation.
333 children = dict((r, []) for r in roots)
334 children = dict((r, []) for r in roots)
334 for r in revs:
335 for r in revs:
335 for p in cl.parentrevs(r):
336 for p in cl.parentrevs(r):
336 if p == nullrev:
337 if p == nullrev:
337 continue
338 continue
338 children[r] = []
339 children[r] = []
339 if p in children:
340 if p in children:
340 children[p].append(r)
341 children[p].append(r)
341 x = revs.pop()
342 x = revs.pop()
342 assert x == b.rev()
343 assert x == b.rev()
343 revs.extend(roots)
344 revs.extend(roots)
344 revs.sort()
345 revs.sort()
345
346
346 revinfo = _revinfo_getter_extra(repo)
347 revinfo = _revinfo_getter_extra(repo)
347 return _combine_changeset_copies_extra(
348 return _combine_changeset_copies_extra(
348 revs, children, b.rev(), revinfo, match, isancestor
349 revs, children, b.rev(), revinfo, match, isancestor
349 )
350 )
350
351
351
352
352 def _combine_changeset_copies(
353 def _combine_changeset_copies(
353 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
354 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
354 ):
355 ):
355 """combine the copies information for each item of iterrevs
356 """combine the copies information for each item of iterrevs
356
357
357 revs: sorted iterable of revision to visit
358 revs: sorted iterable of revision to visit
358 children_count: a {parent: <number-of-relevant-children>} mapping.
359 children_count: a {parent: <number-of-relevant-children>} mapping.
359 targetrev: the final copies destination revision (not in iterrevs)
360 targetrev: the final copies destination revision (not in iterrevs)
360 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
361 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
361 match: a matcher
362 match: a matcher
362
363
363 It returns the aggregated copies information for `targetrev`.
364 It returns the aggregated copies information for `targetrev`.
364 """
365 """
365
366
366 alwaysmatch = match.always()
367 alwaysmatch = match.always()
367
368
368 if rustmod is not None:
369 if rustmod is not None:
369 final_copies = rustmod.combine_changeset_copies(
370 final_copies = rustmod.combine_changeset_copies(
370 list(revs), children_count, targetrev, revinfo, multi_thread
371 list(revs), children_count, targetrev, revinfo, multi_thread
371 )
372 )
372 else:
373 else:
373 isancestor = cached_is_ancestor(isancestor)
374 isancestor = cached_is_ancestor(isancestor)
374
375
375 all_copies = {}
376 all_copies = {}
376 # iterate over all the "children" side of copy tracing "edge"
377 # iterate over all the "children" side of copy tracing "edge"
377 for current_rev in revs:
378 for current_rev in revs:
378 p1, p2, changes = revinfo(current_rev)
379 p1, p2, changes = revinfo(current_rev)
379 current_copies = None
380 current_copies = None
380 # iterate over all parents to chain the existing data with the
381 # iterate over all parents to chain the existing data with the
381 # data from the parent β†’ child edge.
382 # data from the parent β†’ child edge.
382 for parent, parent_rev in ((1, p1), (2, p2)):
383 for parent, parent_rev in ((1, p1), (2, p2)):
383 if parent_rev == nullrev:
384 if parent_rev == nullrev:
384 continue
385 continue
385 remaining_children = children_count.get(parent_rev)
386 remaining_children = children_count.get(parent_rev)
386 if remaining_children is None:
387 if remaining_children is None:
387 continue
388 continue
388 remaining_children -= 1
389 remaining_children -= 1
389 children_count[parent_rev] = remaining_children
390 children_count[parent_rev] = remaining_children
390 if remaining_children:
391 if remaining_children:
391 copies = all_copies.get(parent_rev, None)
392 copies = all_copies.get(parent_rev, None)
392 else:
393 else:
393 copies = all_copies.pop(parent_rev, None)
394 copies = all_copies.pop(parent_rev, None)
394
395
395 if copies is None:
396 if copies is None:
396 # this is a root
397 # this is a root
397 newcopies = copies = {}
398 newcopies = copies = {}
398 elif remaining_children:
399 elif remaining_children:
399 newcopies = copies.copy()
400 newcopies = copies.copy()
400 else:
401 else:
401 newcopies = copies
402 newcopies = copies
402 # chain the data in the edge with the existing data
403 # chain the data in the edge with the existing data
403 if changes is not None:
404 if changes is not None:
404 childcopies = {}
405 childcopies = {}
405 if parent == 1:
406 if parent == 1:
406 childcopies = changes.copied_from_p1
407 childcopies = changes.copied_from_p1
407 elif parent == 2:
408 elif parent == 2:
408 childcopies = changes.copied_from_p2
409 childcopies = changes.copied_from_p2
409
410
410 if childcopies:
411 if childcopies:
411 newcopies = copies.copy()
412 newcopies = copies.copy()
412 for dest, source in pycompat.iteritems(childcopies):
413 for dest, source in pycompat.iteritems(childcopies):
413 prev = copies.get(source)
414 prev = copies.get(source)
414 if prev is not None and prev[1] is not None:
415 if prev is not None and prev[1] is not None:
415 source = prev[1]
416 source = prev[1]
416 newcopies[dest] = (current_rev, source)
417 newcopies[dest] = (current_rev, source)
417 assert newcopies is not copies
418 assert newcopies is not copies
418 if changes.removed:
419 if changes.removed:
419 for f in changes.removed:
420 for f in changes.removed:
420 if f in newcopies:
421 if f in newcopies:
421 if newcopies is copies:
422 if newcopies is copies:
422 # copy on write to avoid affecting potential other
423 # copy on write to avoid affecting potential other
423 # branches. when there are no other branches, this
424 # branches. when there are no other branches, this
424 # could be avoided.
425 # could be avoided.
425 newcopies = copies.copy()
426 newcopies = copies.copy()
426 newcopies[f] = (current_rev, None)
427 newcopies[f] = (current_rev, None)
427 # check potential need to combine the data from another parent (for
428 # check potential need to combine the data from another parent (for
428 # that child). See comment below for details.
429 # that child). See comment below for details.
429 if current_copies is None:
430 if current_copies is None:
430 current_copies = newcopies
431 current_copies = newcopies
431 else:
432 else:
432 # we are the second parent to work on c, we need to merge our
433 # we are the second parent to work on c, we need to merge our
433 # work with the other.
434 # work with the other.
434 #
435 #
435 # In case of conflict, parent 1 take precedence over parent 2.
436 # In case of conflict, parent 1 take precedence over parent 2.
436 # This is an arbitrary choice made anew when implementing
437 # This is an arbitrary choice made anew when implementing
437 # changeset based copies. It was made without regards with
438 # changeset based copies. It was made without regards with
438 # potential filelog related behavior.
439 # potential filelog related behavior.
439 assert parent == 2
440 assert parent == 2
440 current_copies = _merge_copies_dict(
441 current_copies = _merge_copies_dict(
441 newcopies,
442 newcopies,
442 current_copies,
443 current_copies,
443 isancestor,
444 isancestor,
444 changes,
445 changes,
445 current_rev,
446 current_rev,
446 )
447 )
447 all_copies[current_rev] = current_copies
448 all_copies[current_rev] = current_copies
448
449
449 # filter out internal details and return a {dest: source mapping}
450 # filter out internal details and return a {dest: source mapping}
450 final_copies = {}
451 final_copies = {}
451 for dest, (tt, source) in all_copies[targetrev].items():
452 for dest, (tt, source) in all_copies[targetrev].items():
452 if source is not None:
453 if source is not None:
453 final_copies[dest] = source
454 final_copies[dest] = source
454 if not alwaysmatch:
455 if not alwaysmatch:
455 for filename in list(final_copies.keys()):
456 for filename in list(final_copies.keys()):
456 if not match(filename):
457 if not match(filename):
457 del final_copies[filename]
458 del final_copies[filename]
458 return final_copies
459 return final_copies
459
460
460
461
461 # constant to decide which side to pick with _merge_copies_dict
462 # constant to decide which side to pick with _merge_copies_dict
462 PICK_MINOR = 0
463 PICK_MINOR = 0
463 PICK_MAJOR = 1
464 PICK_MAJOR = 1
464 PICK_EITHER = 2
465 PICK_EITHER = 2
465
466
466
467
467 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
468 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
468 """merge two copies-mapping together, minor and major
469 """merge two copies-mapping together, minor and major
469
470
470 In case of conflict, value from "major" will be picked.
471 In case of conflict, value from "major" will be picked.
471
472
472 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
473 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
473 ancestors of `high_rev`,
474 ancestors of `high_rev`,
474
475
475 - `ismerged(path)`: callable return True if `path` have been merged in the
476 - `ismerged(path)`: callable return True if `path` have been merged in the
476 current revision,
477 current revision,
477
478
478 return the resulting dict (in practice, the "minor" object, updated)
479 return the resulting dict (in practice, the "minor" object, updated)
479 """
480 """
480 for dest, value in major.items():
481 for dest, value in major.items():
481 other = minor.get(dest)
482 other = minor.get(dest)
482 if other is None:
483 if other is None:
483 minor[dest] = value
484 minor[dest] = value
484 else:
485 else:
485 pick, overwrite = _compare_values(
486 pick, overwrite = _compare_values(
486 changes, isancestor, dest, other, value
487 changes, isancestor, dest, other, value
487 )
488 )
488 if overwrite:
489 if overwrite:
489 if pick == PICK_MAJOR:
490 if pick == PICK_MAJOR:
490 minor[dest] = (current_merge, value[1])
491 minor[dest] = (current_merge, value[1])
491 else:
492 else:
492 minor[dest] = (current_merge, other[1])
493 minor[dest] = (current_merge, other[1])
493 elif pick == PICK_MAJOR:
494 elif pick == PICK_MAJOR:
494 minor[dest] = value
495 minor[dest] = value
495 return minor
496 return minor
496
497
497
498
498 def _compare_values(changes, isancestor, dest, minor, major):
499 def _compare_values(changes, isancestor, dest, minor, major):
499 """compare two value within a _merge_copies_dict loop iteration
500 """compare two value within a _merge_copies_dict loop iteration
500
501
501 return (pick, overwrite).
502 return (pick, overwrite).
502
503
503 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
504 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
504 - overwrite is True if pick is a return of an ambiguity that needs resolution.
505 - overwrite is True if pick is a return of an ambiguity that needs resolution.
505 """
506 """
506 major_tt, major_value = major
507 major_tt, major_value = major
507 minor_tt, minor_value = minor
508 minor_tt, minor_value = minor
508
509
509 if major_tt == minor_tt:
510 if major_tt == minor_tt:
510 # if it comes from the same revision it must be the same value
511 # if it comes from the same revision it must be the same value
511 assert major_value == minor_value
512 assert major_value == minor_value
512 return PICK_EITHER, False
513 return PICK_EITHER, False
513 elif (
514 elif (
514 changes is not None
515 changes is not None
515 and minor_value is not None
516 and minor_value is not None
516 and major_value is None
517 and major_value is None
517 and dest in changes.salvaged
518 and dest in changes.salvaged
518 ):
519 ):
519 # In this case, a deletion was reverted, the "alive" value overwrite
520 # In this case, a deletion was reverted, the "alive" value overwrite
520 # the deleted one.
521 # the deleted one.
521 return PICK_MINOR, True
522 return PICK_MINOR, True
522 elif (
523 elif (
523 changes is not None
524 changes is not None
524 and major_value is not None
525 and major_value is not None
525 and minor_value is None
526 and minor_value is None
526 and dest in changes.salvaged
527 and dest in changes.salvaged
527 ):
528 ):
528 # In this case, a deletion was reverted, the "alive" value overwrite
529 # In this case, a deletion was reverted, the "alive" value overwrite
529 # the deleted one.
530 # the deleted one.
530 return PICK_MAJOR, True
531 return PICK_MAJOR, True
531 elif isancestor(minor_tt, major_tt):
532 elif isancestor(minor_tt, major_tt):
532 if changes is not None and dest in changes.merged:
533 if changes is not None and dest in changes.merged:
533 # change to dest happened on the branch without copy-source change,
534 # change to dest happened on the branch without copy-source change,
534 # so both source are valid and "major" wins.
535 # so both source are valid and "major" wins.
535 return PICK_MAJOR, True
536 return PICK_MAJOR, True
536 else:
537 else:
537 return PICK_MAJOR, False
538 return PICK_MAJOR, False
538 elif isancestor(major_tt, minor_tt):
539 elif isancestor(major_tt, minor_tt):
539 if changes is not None and dest in changes.merged:
540 if changes is not None and dest in changes.merged:
540 # change to dest happened on the branch without copy-source change,
541 # change to dest happened on the branch without copy-source change,
541 # so both source are valid and "major" wins.
542 # so both source are valid and "major" wins.
542 return PICK_MAJOR, True
543 return PICK_MAJOR, True
543 else:
544 else:
544 return PICK_MINOR, False
545 return PICK_MINOR, False
545 elif minor_value is None:
546 elif minor_value is None:
546 # in case of conflict, the "alive" side wins.
547 # in case of conflict, the "alive" side wins.
547 return PICK_MAJOR, True
548 return PICK_MAJOR, True
548 elif major_value is None:
549 elif major_value is None:
549 # in case of conflict, the "alive" side wins.
550 # in case of conflict, the "alive" side wins.
550 return PICK_MINOR, True
551 return PICK_MINOR, True
551 else:
552 else:
552 # in case of conflict where both side are alive, major wins.
553 # in case of conflict where both side are alive, major wins.
553 return PICK_MAJOR, True
554 return PICK_MAJOR, True
554
555
555
556
556 def _revinfo_getter_extra(repo):
557 def _revinfo_getter_extra(repo):
557 """return a function that return multiple data given a <rev>"i
558 """return a function that return multiple data given a <rev>"i
558
559
559 * p1: revision number of first parent
560 * p1: revision number of first parent
560 * p2: revision number of first parent
561 * p2: revision number of first parent
561 * p1copies: mapping of copies from p1
562 * p1copies: mapping of copies from p1
562 * p2copies: mapping of copies from p2
563 * p2copies: mapping of copies from p2
563 * removed: a list of removed files
564 * removed: a list of removed files
564 * ismerged: a callback to know if file was merged in that revision
565 * ismerged: a callback to know if file was merged in that revision
565 """
566 """
566 cl = repo.changelog
567 cl = repo.changelog
567 parents = cl.parentrevs
568 parents = cl.parentrevs
568
569
569 def get_ismerged(rev):
570 def get_ismerged(rev):
570 ctx = repo[rev]
571 ctx = repo[rev]
571
572
572 def ismerged(path):
573 def ismerged(path):
573 if path not in ctx.files():
574 if path not in ctx.files():
574 return False
575 return False
575 fctx = ctx[path]
576 fctx = ctx[path]
576 parents = fctx._filelog.parents(fctx._filenode)
577 parents = fctx._filelog.parents(fctx._filenode)
577 nb_parents = 0
578 nb_parents = 0
578 for n in parents:
579 for n in parents:
579 if n != repo.nullid:
580 if n != repo.nullid:
580 nb_parents += 1
581 nb_parents += 1
581 return nb_parents >= 2
582 return nb_parents >= 2
582
583
583 return ismerged
584 return ismerged
584
585
585 def revinfo(rev):
586 def revinfo(rev):
586 p1, p2 = parents(rev)
587 p1, p2 = parents(rev)
587 ctx = repo[rev]
588 ctx = repo[rev]
588 p1copies, p2copies = ctx._copies
589 p1copies, p2copies = ctx._copies
589 removed = ctx.filesremoved()
590 removed = ctx.filesremoved()
590 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
591 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
591
592
592 return revinfo
593 return revinfo
593
594
594
595
595 def _combine_changeset_copies_extra(
596 def _combine_changeset_copies_extra(
596 revs, children, targetrev, revinfo, match, isancestor
597 revs, children, targetrev, revinfo, match, isancestor
597 ):
598 ):
598 """version of `_combine_changeset_copies` that works with the Google
599 """version of `_combine_changeset_copies` that works with the Google
599 specific "extra" based storage for copy information"""
600 specific "extra" based storage for copy information"""
600 all_copies = {}
601 all_copies = {}
601 alwaysmatch = match.always()
602 alwaysmatch = match.always()
602 for r in revs:
603 for r in revs:
603 copies = all_copies.pop(r, None)
604 copies = all_copies.pop(r, None)
604 if copies is None:
605 if copies is None:
605 # this is a root
606 # this is a root
606 copies = {}
607 copies = {}
607 for i, c in enumerate(children[r]):
608 for i, c in enumerate(children[r]):
608 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
609 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
609 if r == p1:
610 if r == p1:
610 parent = 1
611 parent = 1
611 childcopies = p1copies
612 childcopies = p1copies
612 else:
613 else:
613 assert r == p2
614 assert r == p2
614 parent = 2
615 parent = 2
615 childcopies = p2copies
616 childcopies = p2copies
616 if not alwaysmatch:
617 if not alwaysmatch:
617 childcopies = {
618 childcopies = {
618 dst: src for dst, src in childcopies.items() if match(dst)
619 dst: src for dst, src in childcopies.items() if match(dst)
619 }
620 }
620 newcopies = copies
621 newcopies = copies
621 if childcopies:
622 if childcopies:
622 newcopies = copies.copy()
623 newcopies = copies.copy()
623 for dest, source in pycompat.iteritems(childcopies):
624 for dest, source in pycompat.iteritems(childcopies):
624 prev = copies.get(source)
625 prev = copies.get(source)
625 if prev is not None and prev[1] is not None:
626 if prev is not None and prev[1] is not None:
626 source = prev[1]
627 source = prev[1]
627 newcopies[dest] = (c, source)
628 newcopies[dest] = (c, source)
628 assert newcopies is not copies
629 assert newcopies is not copies
629 for f in removed:
630 for f in removed:
630 if f in newcopies:
631 if f in newcopies:
631 if newcopies is copies:
632 if newcopies is copies:
632 # copy on write to avoid affecting potential other
633 # copy on write to avoid affecting potential other
633 # branches. when there are no other branches, this
634 # branches. when there are no other branches, this
634 # could be avoided.
635 # could be avoided.
635 newcopies = copies.copy()
636 newcopies = copies.copy()
636 newcopies[f] = (c, None)
637 newcopies[f] = (c, None)
637 othercopies = all_copies.get(c)
638 othercopies = all_copies.get(c)
638 if othercopies is None:
639 if othercopies is None:
639 all_copies[c] = newcopies
640 all_copies[c] = newcopies
640 else:
641 else:
641 # we are the second parent to work on c, we need to merge our
642 # we are the second parent to work on c, we need to merge our
642 # work with the other.
643 # work with the other.
643 #
644 #
644 # In case of conflict, parent 1 take precedence over parent 2.
645 # In case of conflict, parent 1 take precedence over parent 2.
645 # This is an arbitrary choice made anew when implementing
646 # This is an arbitrary choice made anew when implementing
646 # changeset based copies. It was made without regards with
647 # changeset based copies. It was made without regards with
647 # potential filelog related behavior.
648 # potential filelog related behavior.
648 if parent == 1:
649 if parent == 1:
649 _merge_copies_dict_extra(
650 _merge_copies_dict_extra(
650 othercopies, newcopies, isancestor, ismerged
651 othercopies, newcopies, isancestor, ismerged
651 )
652 )
652 else:
653 else:
653 _merge_copies_dict_extra(
654 _merge_copies_dict_extra(
654 newcopies, othercopies, isancestor, ismerged
655 newcopies, othercopies, isancestor, ismerged
655 )
656 )
656 all_copies[c] = newcopies
657 all_copies[c] = newcopies
657
658
658 final_copies = {}
659 final_copies = {}
659 for dest, (tt, source) in all_copies[targetrev].items():
660 for dest, (tt, source) in all_copies[targetrev].items():
660 if source is not None:
661 if source is not None:
661 final_copies[dest] = source
662 final_copies[dest] = source
662 return final_copies
663 return final_copies
663
664
664
665
665 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
666 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
666 """version of `_merge_copies_dict` that works with the Google
667 """version of `_merge_copies_dict` that works with the Google
667 specific "extra" based storage for copy information"""
668 specific "extra" based storage for copy information"""
668 for dest, value in major.items():
669 for dest, value in major.items():
669 other = minor.get(dest)
670 other = minor.get(dest)
670 if other is None:
671 if other is None:
671 minor[dest] = value
672 minor[dest] = value
672 else:
673 else:
673 new_tt = value[0]
674 new_tt = value[0]
674 other_tt = other[0]
675 other_tt = other[0]
675 if value[1] == other[1]:
676 if value[1] == other[1]:
676 continue
677 continue
677 # content from "major" wins, unless it is older
678 # content from "major" wins, unless it is older
678 # than the branch point or there is a merge
679 # than the branch point or there is a merge
679 if (
680 if (
680 new_tt == other_tt
681 new_tt == other_tt
681 or not isancestor(new_tt, other_tt)
682 or not isancestor(new_tt, other_tt)
682 or ismerged(dest)
683 or ismerged(dest)
683 ):
684 ):
684 minor[dest] = value
685 minor[dest] = value
685
686
686
687
687 def _forwardcopies(a, b, base=None, match=None):
688 def _forwardcopies(a, b, base=None, match=None):
688 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
689 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
689
690
690 if base is None:
691 if base is None:
691 base = a
692 base = a
692 match = a.repo().narrowmatch(match)
693 match = a.repo().narrowmatch(match)
693 # check for working copy
694 # check for working copy
694 if b.rev() is None:
695 if b.rev() is None:
695 cm = _committedforwardcopies(a, b.p1(), base, match)
696 cm = _committedforwardcopies(a, b.p1(), base, match)
696 # combine copies from dirstate if necessary
697 # combine copies from dirstate if necessary
697 copies = _chain(cm, _dirstatecopies(b._repo, match))
698 copies = _chain(cm, _dirstatecopies(b._repo, match))
698 else:
699 else:
699 copies = _committedforwardcopies(a, b, base, match)
700 copies = _committedforwardcopies(a, b, base, match)
700 return copies
701 return copies
701
702
702
703
703 def _backwardrenames(a, b, match):
704 def _backwardrenames(a, b, match):
704 """find renames from a to b"""
705 """find renames from a to b"""
705 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
706 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
706 return {}
707 return {}
707
708
708 # We don't want to pass in "match" here, since that would filter
709 # We don't want to pass in "match" here, since that would filter
709 # the destination by it. Since we're reversing the copies, we want
710 # the destination by it. Since we're reversing the copies, we want
710 # to filter the source instead.
711 # to filter the source instead.
711 copies = _forwardcopies(b, a)
712 copies = _forwardcopies(b, a)
712 return _reverse_renames(copies, a, match)
713 return _reverse_renames(copies, a, match)
713
714
714
715
715 def _reverse_renames(copies, dst, match):
716 def _reverse_renames(copies, dst, match):
716 """given copies to context 'dst', finds renames from that context"""
717 """given copies to context 'dst', finds renames from that context"""
717 # Even though we're not taking copies into account, 1:n rename situations
718 # Even though we're not taking copies into account, 1:n rename situations
718 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
719 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
719 # arbitrarily pick one of the renames.
720 # arbitrarily pick one of the renames.
720 r = {}
721 r = {}
721 for k, v in sorted(pycompat.iteritems(copies)):
722 for k, v in sorted(pycompat.iteritems(copies)):
722 if match and not match(v):
723 if match and not match(v):
723 continue
724 continue
724 # remove copies
725 # remove copies
725 if v in dst:
726 if v in dst:
726 continue
727 continue
727 r[v] = k
728 r[v] = k
728 return r
729 return r
729
730
730
731
731 def pathcopies(x, y, match=None):
732 def pathcopies(x, y, match=None):
732 """find {dst@y: src@x} copy mapping for directed compare"""
733 """find {dst@y: src@x} copy mapping for directed compare"""
733 repo = x._repo
734 repo = x._repo
734 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
735 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
735 if debug:
736 if debug:
736 repo.ui.debug(
737 repo.ui.debug(
737 b'debug.copies: searching copies from %s to %s\n' % (x, y)
738 b'debug.copies: searching copies from %s to %s\n' % (x, y)
738 )
739 )
739 if x == y or not x or not y:
740 if x == y or not x or not y:
740 return {}
741 return {}
741 if y.rev() is None and x == y.p1():
742 if y.rev() is None and x == y.p1():
742 if debug:
743 if debug:
743 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
744 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
744 # short-circuit to avoid issues with merge states
745 # short-circuit to avoid issues with merge states
745 return _dirstatecopies(repo, match)
746 return _dirstatecopies(repo, match)
746 a = y.ancestor(x)
747 a = y.ancestor(x)
747 if a == x:
748 if a == x:
748 if debug:
749 if debug:
749 repo.ui.debug(b'debug.copies: search mode: forward\n')
750 repo.ui.debug(b'debug.copies: search mode: forward\n')
750 copies = _forwardcopies(x, y, match=match)
751 copies = _forwardcopies(x, y, match=match)
751 elif a == y:
752 elif a == y:
752 if debug:
753 if debug:
753 repo.ui.debug(b'debug.copies: search mode: backward\n')
754 repo.ui.debug(b'debug.copies: search mode: backward\n')
754 copies = _backwardrenames(x, y, match=match)
755 copies = _backwardrenames(x, y, match=match)
755 else:
756 else:
756 if debug:
757 if debug:
757 repo.ui.debug(b'debug.copies: search mode: combined\n')
758 repo.ui.debug(b'debug.copies: search mode: combined\n')
758 base = None
759 base = None
759 if a.rev() != nullrev:
760 if a.rev() != nullrev:
760 base = x
761 base = x
761 x_copies = _forwardcopies(a, x)
762 x_copies = _forwardcopies(a, x)
762 y_copies = _forwardcopies(a, y, base, match=match)
763 y_copies = _forwardcopies(a, y, base, match=match)
763 same_keys = set(x_copies) & set(y_copies)
764 same_keys = set(x_copies) & set(y_copies)
764 for k in same_keys:
765 for k in same_keys:
765 if x_copies.get(k) == y_copies.get(k):
766 if x_copies.get(k) == y_copies.get(k):
766 del x_copies[k]
767 del x_copies[k]
767 del y_copies[k]
768 del y_copies[k]
768 x_backward_renames = _reverse_renames(x_copies, x, match)
769 x_backward_renames = _reverse_renames(x_copies, x, match)
769 copies = _chain(
770 copies = _chain(
770 x_backward_renames,
771 x_backward_renames,
771 y_copies,
772 y_copies,
772 )
773 )
773 _filter(x, y, copies)
774 _filter(x, y, copies)
774 return copies
775 return copies
775
776
776
777
777 def mergecopies(repo, c1, c2, base):
778 def mergecopies(repo, c1, c2, base):
778 """
779 """
779 Finds moves and copies between context c1 and c2 that are relevant for
780 Finds moves and copies between context c1 and c2 that are relevant for
780 merging. 'base' will be used as the merge base.
781 merging. 'base' will be used as the merge base.
781
782
782 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
783 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
783 files that were moved/ copied in one merge parent and modified in another.
784 files that were moved/ copied in one merge parent and modified in another.
784 For example:
785 For example:
785
786
786 o ---> 4 another commit
787 o ---> 4 another commit
787 |
788 |
788 | o ---> 3 commit that modifies a.txt
789 | o ---> 3 commit that modifies a.txt
789 | /
790 | /
790 o / ---> 2 commit that moves a.txt to b.txt
791 o / ---> 2 commit that moves a.txt to b.txt
791 |/
792 |/
792 o ---> 1 merge base
793 o ---> 1 merge base
793
794
794 If we try to rebase revision 3 on revision 4, since there is no a.txt in
795 If we try to rebase revision 3 on revision 4, since there is no a.txt in
795 revision 4, and if user have copytrace disabled, we prints the following
796 revision 4, and if user have copytrace disabled, we prints the following
796 message:
797 message:
797
798
798 ```other changed <file> which local deleted```
799 ```other changed <file> which local deleted```
799
800
800 Returns a tuple where:
801 Returns a tuple where:
801
802
802 "branch_copies" an instance of branch_copies.
803 "branch_copies" an instance of branch_copies.
803
804
804 "diverge" is a mapping of source name -> list of destination names
805 "diverge" is a mapping of source name -> list of destination names
805 for divergent renames.
806 for divergent renames.
806
807
807 This function calls different copytracing algorithms based on config.
808 This function calls different copytracing algorithms based on config.
808 """
809 """
809 # avoid silly behavior for update from empty dir
810 # avoid silly behavior for update from empty dir
810 if not c1 or not c2 or c1 == c2:
811 if not c1 or not c2 or c1 == c2:
811 return branch_copies(), branch_copies(), {}
812 return branch_copies(), branch_copies(), {}
812
813
813 narrowmatch = c1.repo().narrowmatch()
814 narrowmatch = c1.repo().narrowmatch()
814
815
815 # avoid silly behavior for parent -> working dir
816 # avoid silly behavior for parent -> working dir
816 if c2.node() is None and c1.node() == repo.dirstate.p1():
817 if c2.node() is None and c1.node() == repo.dirstate.p1():
817 return (
818 return (
818 branch_copies(_dirstatecopies(repo, narrowmatch)),
819 branch_copies(_dirstatecopies(repo, narrowmatch)),
819 branch_copies(),
820 branch_copies(),
820 {},
821 {},
821 )
822 )
822
823
823 copytracing = repo.ui.config(b'experimental', b'copytrace')
824 copytracing = repo.ui.config(b'experimental', b'copytrace')
824 if stringutil.parsebool(copytracing) is False:
825 if stringutil.parsebool(copytracing) is False:
825 # stringutil.parsebool() returns None when it is unable to parse the
826 # stringutil.parsebool() returns None when it is unable to parse the
826 # value, so we should rely on making sure copytracing is on such cases
827 # value, so we should rely on making sure copytracing is on such cases
827 return branch_copies(), branch_copies(), {}
828 return branch_copies(), branch_copies(), {}
828
829
829 if usechangesetcentricalgo(repo):
830 if usechangesetcentricalgo(repo):
830 # The heuristics don't make sense when we need changeset-centric algos
831 # The heuristics don't make sense when we need changeset-centric algos
831 return _fullcopytracing(repo, c1, c2, base)
832 return _fullcopytracing(repo, c1, c2, base)
832
833
833 # Copy trace disabling is explicitly below the node == p1 logic above
834 # Copy trace disabling is explicitly below the node == p1 logic above
834 # because the logic above is required for a simple copy to be kept across a
835 # because the logic above is required for a simple copy to be kept across a
835 # rebase.
836 # rebase.
836 if copytracing == b'heuristics':
837 if copytracing == b'heuristics':
837 # Do full copytracing if only non-public revisions are involved as
838 # Do full copytracing if only non-public revisions are involved as
838 # that will be fast enough and will also cover the copies which could
839 # that will be fast enough and will also cover the copies which could
839 # be missed by heuristics
840 # be missed by heuristics
840 if _isfullcopytraceable(repo, c1, base):
841 if _isfullcopytraceable(repo, c1, base):
841 return _fullcopytracing(repo, c1, c2, base)
842 return _fullcopytracing(repo, c1, c2, base)
842 return _heuristicscopytracing(repo, c1, c2, base)
843 return _heuristicscopytracing(repo, c1, c2, base)
843 else:
844 else:
844 return _fullcopytracing(repo, c1, c2, base)
845 return _fullcopytracing(repo, c1, c2, base)
845
846
846
847
847 def _isfullcopytraceable(repo, c1, base):
848 def _isfullcopytraceable(repo, c1, base):
848 """Checks that if base, source and destination are all no-public branches,
849 """Checks that if base, source and destination are all no-public branches,
849 if yes let's use the full copytrace algorithm for increased capabilities
850 if yes let's use the full copytrace algorithm for increased capabilities
850 since it will be fast enough.
851 since it will be fast enough.
851
852
852 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
853 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
853 number of changesets from c1 to base such that if number of changesets are
854 number of changesets from c1 to base such that if number of changesets are
854 more than the limit, full copytracing algorithm won't be used.
855 more than the limit, full copytracing algorithm won't be used.
855 """
856 """
856 if c1.rev() is None:
857 if c1.rev() is None:
857 c1 = c1.p1()
858 c1 = c1.p1()
858 if c1.mutable() and base.mutable():
859 if c1.mutable() and base.mutable():
859 sourcecommitlimit = repo.ui.configint(
860 sourcecommitlimit = repo.ui.configint(
860 b'experimental', b'copytrace.sourcecommitlimit'
861 b'experimental', b'copytrace.sourcecommitlimit'
861 )
862 )
862 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
863 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
863 return commits < sourcecommitlimit
864 return commits < sourcecommitlimit
864 return False
865 return False
865
866
866
867
867 def _checksinglesidecopies(
868 def _checksinglesidecopies(
868 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
869 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
869 ):
870 ):
870 if src not in m2:
871 if src not in m2:
871 # deleted on side 2
872 # deleted on side 2
872 if src not in m1:
873 if src not in m1:
873 # renamed on side 1, deleted on side 2
874 # renamed on side 1, deleted on side 2
874 renamedelete[src] = dsts1
875 renamedelete[src] = dsts1
875 elif src not in mb:
876 elif src not in mb:
876 # Work around the "short-circuit to avoid issues with merge states"
877 # Work around the "short-circuit to avoid issues with merge states"
877 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
878 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
878 # destination doesn't exist in y.
879 # destination doesn't exist in y.
879 pass
880 pass
880 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
881 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
881 return
882 return
882 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
883 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
883 # modified on side 2
884 # modified on side 2
884 for dst in dsts1:
885 for dst in dsts1:
885 copy[dst] = src
886 copy[dst] = src
886
887
887
888
888 class branch_copies(object):
889 class branch_copies(object):
889 """Information about copies made on one side of a merge/graft.
890 """Information about copies made on one side of a merge/graft.
890
891
891 "copy" is a mapping from destination name -> source name,
892 "copy" is a mapping from destination name -> source name,
892 where source is in c1 and destination is in c2 or vice-versa.
893 where source is in c1 and destination is in c2 or vice-versa.
893
894
894 "movewithdir" is a mapping from source name -> destination name,
895 "movewithdir" is a mapping from source name -> destination name,
895 where the file at source present in one context but not the other
896 where the file at source present in one context but not the other
896 needs to be moved to destination by the merge process, because the
897 needs to be moved to destination by the merge process, because the
897 other context moved the directory it is in.
898 other context moved the directory it is in.
898
899
899 "renamedelete" is a mapping of source name -> list of destination
900 "renamedelete" is a mapping of source name -> list of destination
900 names for files deleted in c1 that were renamed in c2 or vice-versa.
901 names for files deleted in c1 that were renamed in c2 or vice-versa.
901
902
902 "dirmove" is a mapping of detected source dir -> destination dir renames.
903 "dirmove" is a mapping of detected source dir -> destination dir renames.
903 This is needed for handling changes to new files previously grafted into
904 This is needed for handling changes to new files previously grafted into
904 renamed directories.
905 renamed directories.
905 """
906 """
906
907
907 def __init__(
908 def __init__(
908 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
909 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
909 ):
910 ):
910 self.copy = {} if copy is None else copy
911 self.copy = {} if copy is None else copy
911 self.renamedelete = {} if renamedelete is None else renamedelete
912 self.renamedelete = {} if renamedelete is None else renamedelete
912 self.dirmove = {} if dirmove is None else dirmove
913 self.dirmove = {} if dirmove is None else dirmove
913 self.movewithdir = {} if movewithdir is None else movewithdir
914 self.movewithdir = {} if movewithdir is None else movewithdir
914
915
915 def __repr__(self):
916 def __repr__(self):
916 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
917 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
917 self.copy,
918 self.copy,
918 self.renamedelete,
919 self.renamedelete,
919 self.dirmove,
920 self.dirmove,
920 self.movewithdir,
921 self.movewithdir,
921 )
922 )
922
923
923
924
924 def _fullcopytracing(repo, c1, c2, base):
925 def _fullcopytracing(repo, c1, c2, base):
925 """The full copytracing algorithm which finds all the new files that were
926 """The full copytracing algorithm which finds all the new files that were
926 added from merge base up to the top commit and for each file it checks if
927 added from merge base up to the top commit and for each file it checks if
927 this file was copied from another file.
928 this file was copied from another file.
928
929
929 This is pretty slow when a lot of changesets are involved but will track all
930 This is pretty slow when a lot of changesets are involved but will track all
930 the copies.
931 the copies.
931 """
932 """
932 m1 = c1.manifest()
933 m1 = c1.manifest()
933 m2 = c2.manifest()
934 m2 = c2.manifest()
934 mb = base.manifest()
935 mb = base.manifest()
935
936
936 copies1 = pathcopies(base, c1)
937 copies1 = pathcopies(base, c1)
937 copies2 = pathcopies(base, c2)
938 copies2 = pathcopies(base, c2)
938
939
939 if not (copies1 or copies2):
940 if not (copies1 or copies2):
940 return branch_copies(), branch_copies(), {}
941 return branch_copies(), branch_copies(), {}
941
942
942 inversecopies1 = {}
943 inversecopies1 = {}
943 inversecopies2 = {}
944 inversecopies2 = {}
944 for dst, src in copies1.items():
945 for dst, src in copies1.items():
945 inversecopies1.setdefault(src, []).append(dst)
946 inversecopies1.setdefault(src, []).append(dst)
946 for dst, src in copies2.items():
947 for dst, src in copies2.items():
947 inversecopies2.setdefault(src, []).append(dst)
948 inversecopies2.setdefault(src, []).append(dst)
948
949
949 copy1 = {}
950 copy1 = {}
950 copy2 = {}
951 copy2 = {}
951 diverge = {}
952 diverge = {}
952 renamedelete1 = {}
953 renamedelete1 = {}
953 renamedelete2 = {}
954 renamedelete2 = {}
954 allsources = set(inversecopies1) | set(inversecopies2)
955 allsources = set(inversecopies1) | set(inversecopies2)
955 for src in allsources:
956 for src in allsources:
956 dsts1 = inversecopies1.get(src)
957 dsts1 = inversecopies1.get(src)
957 dsts2 = inversecopies2.get(src)
958 dsts2 = inversecopies2.get(src)
958 if dsts1 and dsts2:
959 if dsts1 and dsts2:
959 # copied/renamed on both sides
960 # copied/renamed on both sides
960 if src not in m1 and src not in m2:
961 if src not in m1 and src not in m2:
961 # renamed on both sides
962 # renamed on both sides
962 dsts1 = set(dsts1)
963 dsts1 = set(dsts1)
963 dsts2 = set(dsts2)
964 dsts2 = set(dsts2)
964 # If there's some overlap in the rename destinations, we
965 # If there's some overlap in the rename destinations, we
965 # consider it not divergent. For example, if side 1 copies 'a'
966 # consider it not divergent. For example, if side 1 copies 'a'
966 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
967 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
967 # and 'd' and deletes 'a'.
968 # and 'd' and deletes 'a'.
968 if dsts1 & dsts2:
969 if dsts1 & dsts2:
969 for dst in dsts1 & dsts2:
970 for dst in dsts1 & dsts2:
970 copy1[dst] = src
971 copy1[dst] = src
971 copy2[dst] = src
972 copy2[dst] = src
972 else:
973 else:
973 diverge[src] = sorted(dsts1 | dsts2)
974 diverge[src] = sorted(dsts1 | dsts2)
974 elif src in m1 and src in m2:
975 elif src in m1 and src in m2:
975 # copied on both sides
976 # copied on both sides
976 dsts1 = set(dsts1)
977 dsts1 = set(dsts1)
977 dsts2 = set(dsts2)
978 dsts2 = set(dsts2)
978 for dst in dsts1 & dsts2:
979 for dst in dsts1 & dsts2:
979 copy1[dst] = src
980 copy1[dst] = src
980 copy2[dst] = src
981 copy2[dst] = src
981 # TODO: Handle cases where it was renamed on one side and copied
982 # TODO: Handle cases where it was renamed on one side and copied
982 # on the other side
983 # on the other side
983 elif dsts1:
984 elif dsts1:
984 # copied/renamed only on side 1
985 # copied/renamed only on side 1
985 _checksinglesidecopies(
986 _checksinglesidecopies(
986 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
987 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
987 )
988 )
988 elif dsts2:
989 elif dsts2:
989 # copied/renamed only on side 2
990 # copied/renamed only on side 2
990 _checksinglesidecopies(
991 _checksinglesidecopies(
991 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
992 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
992 )
993 )
993
994
994 # find interesting file sets from manifests
995 # find interesting file sets from manifests
995 cache = []
996 cache = []
996
997
997 def _get_addedfiles(idx):
998 def _get_addedfiles(idx):
998 if not cache:
999 if not cache:
999 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1000 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1000 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1001 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1001 u1 = sorted(addedinm1 - addedinm2)
1002 u1 = sorted(addedinm1 - addedinm2)
1002 u2 = sorted(addedinm2 - addedinm1)
1003 u2 = sorted(addedinm2 - addedinm1)
1003 cache.extend((u1, u2))
1004 cache.extend((u1, u2))
1004 return cache[idx]
1005 return cache[idx]
1005
1006
1006 u1fn = lambda: _get_addedfiles(0)
1007 u1fn = lambda: _get_addedfiles(0)
1007 u2fn = lambda: _get_addedfiles(1)
1008 u2fn = lambda: _get_addedfiles(1)
1008 if repo.ui.debugflag:
1009 if repo.ui.debugflag:
1009 u1 = u1fn()
1010 u1 = u1fn()
1010 u2 = u2fn()
1011 u2 = u2fn()
1011
1012
1012 header = b" unmatched files in %s"
1013 header = b" unmatched files in %s"
1013 if u1:
1014 if u1:
1014 repo.ui.debug(
1015 repo.ui.debug(
1015 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1016 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1016 )
1017 )
1017 if u2:
1018 if u2:
1018 repo.ui.debug(
1019 repo.ui.debug(
1019 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1020 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1020 )
1021 )
1021
1022
1022 renamedeleteset = set()
1023 renamedeleteset = set()
1023 divergeset = set()
1024 divergeset = set()
1024 for dsts in diverge.values():
1025 for dsts in diverge.values():
1025 divergeset.update(dsts)
1026 divergeset.update(dsts)
1026 for dsts in renamedelete1.values():
1027 for dsts in renamedelete1.values():
1027 renamedeleteset.update(dsts)
1028 renamedeleteset.update(dsts)
1028 for dsts in renamedelete2.values():
1029 for dsts in renamedelete2.values():
1029 renamedeleteset.update(dsts)
1030 renamedeleteset.update(dsts)
1030
1031
1031 repo.ui.debug(
1032 repo.ui.debug(
1032 b" all copies found (* = to merge, ! = divergent, "
1033 b" all copies found (* = to merge, ! = divergent, "
1033 b"% = renamed and deleted):\n"
1034 b"% = renamed and deleted):\n"
1034 )
1035 )
1035 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1036 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1036 if not copies:
1037 if not copies:
1037 continue
1038 continue
1038 repo.ui.debug(b" on %s side:\n" % side)
1039 repo.ui.debug(b" on %s side:\n" % side)
1039 for f in sorted(copies):
1040 for f in sorted(copies):
1040 note = b""
1041 note = b""
1041 if f in copy1 or f in copy2:
1042 if f in copy1 or f in copy2:
1042 note += b"*"
1043 note += b"*"
1043 if f in divergeset:
1044 if f in divergeset:
1044 note += b"!"
1045 note += b"!"
1045 if f in renamedeleteset:
1046 if f in renamedeleteset:
1046 note += b"%"
1047 note += b"%"
1047 repo.ui.debug(
1048 repo.ui.debug(
1048 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1049 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1049 )
1050 )
1050 del renamedeleteset
1051 del renamedeleteset
1051 del divergeset
1052 del divergeset
1052
1053
1053 repo.ui.debug(b" checking for directory renames\n")
1054 repo.ui.debug(b" checking for directory renames\n")
1054
1055
1055 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1056 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1056 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1057 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1057
1058
1058 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1059 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1059 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1060 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1060
1061
1061 return branch_copies1, branch_copies2, diverge
1062 return branch_copies1, branch_copies2, diverge
1062
1063
1063
1064
1064 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1065 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1065 """Finds moved directories and files that should move with them.
1066 """Finds moved directories and files that should move with them.
1066
1067
1067 ctx: the context for one of the sides
1068 ctx: the context for one of the sides
1068 copy: files copied on the same side (as ctx)
1069 copy: files copied on the same side (as ctx)
1069 fullcopy: files copied on the same side (as ctx), including those that
1070 fullcopy: files copied on the same side (as ctx), including those that
1070 merge.manifestmerge() won't care about
1071 merge.manifestmerge() won't care about
1071 addedfilesfn: function returning added files on the other side (compared to
1072 addedfilesfn: function returning added files on the other side (compared to
1072 ctx)
1073 ctx)
1073 """
1074 """
1074 # generate a directory move map
1075 # generate a directory move map
1075 invalid = set()
1076 invalid = set()
1076 dirmove = {}
1077 dirmove = {}
1077
1078
1078 # examine each file copy for a potential directory move, which is
1079 # examine each file copy for a potential directory move, which is
1079 # when all the files in a directory are moved to a new directory
1080 # when all the files in a directory are moved to a new directory
1080 for dst, src in pycompat.iteritems(fullcopy):
1081 for dst, src in pycompat.iteritems(fullcopy):
1081 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1082 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1082 if dsrc in invalid:
1083 if dsrc in invalid:
1083 # already seen to be uninteresting
1084 # already seen to be uninteresting
1084 continue
1085 continue
1085 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1086 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1086 # directory wasn't entirely moved locally
1087 # directory wasn't entirely moved locally
1087 invalid.add(dsrc)
1088 invalid.add(dsrc)
1088 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1089 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1089 # files from the same directory moved to two different places
1090 # files from the same directory moved to two different places
1090 invalid.add(dsrc)
1091 invalid.add(dsrc)
1091 else:
1092 else:
1092 # looks good so far
1093 # looks good so far
1093 dirmove[dsrc] = ddst
1094 dirmove[dsrc] = ddst
1094
1095
1095 for i in invalid:
1096 for i in invalid:
1096 if i in dirmove:
1097 if i in dirmove:
1097 del dirmove[i]
1098 del dirmove[i]
1098 del invalid
1099 del invalid
1099
1100
1100 if not dirmove:
1101 if not dirmove:
1101 return {}, {}
1102 return {}, {}
1102
1103
1103 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1104 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1104
1105
1105 for d in dirmove:
1106 for d in dirmove:
1106 repo.ui.debug(
1107 repo.ui.debug(
1107 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1108 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1108 )
1109 )
1109
1110
1110 # Sort the directories in reverse order, so we find children first
1111 # Sort the directories in reverse order, so we find children first
1111 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1112 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1112 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1113 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1113 # to dir2/subdir2/file (not dir2/subdir1/file)
1114 # to dir2/subdir2/file (not dir2/subdir1/file)
1114 dirmove_children_first = sorted(dirmove, reverse=True)
1115 dirmove_children_first = sorted(dirmove, reverse=True)
1115
1116
1116 movewithdir = {}
1117 movewithdir = {}
1117 # check unaccounted nonoverlapping files against directory moves
1118 # check unaccounted nonoverlapping files against directory moves
1118 for f in addedfilesfn():
1119 for f in addedfilesfn():
1119 if f not in fullcopy:
1120 if f not in fullcopy:
1120 for d in dirmove_children_first:
1121 for d in dirmove_children_first:
1121 if f.startswith(d):
1122 if f.startswith(d):
1122 # new file added in a directory that was moved, move it
1123 # new file added in a directory that was moved, move it
1123 df = dirmove[d] + f[len(d) :]
1124 df = dirmove[d] + f[len(d) :]
1124 if df not in copy:
1125 if df not in copy:
1125 movewithdir[f] = df
1126 movewithdir[f] = df
1126 repo.ui.debug(
1127 repo.ui.debug(
1127 b" pending file src: '%s' -> dst: '%s'\n"
1128 b" pending file src: '%s' -> dst: '%s'\n"
1128 % (f, df)
1129 % (f, df)
1129 )
1130 )
1130 break
1131 break
1131
1132
1132 return dirmove, movewithdir
1133 return dirmove, movewithdir
1133
1134
1134
1135
1135 def _heuristicscopytracing(repo, c1, c2, base):
1136 def _heuristicscopytracing(repo, c1, c2, base):
1136 """Fast copytracing using filename heuristics
1137 """Fast copytracing using filename heuristics
1137
1138
1138 Assumes that moves or renames are of following two types:
1139 Assumes that moves or renames are of following two types:
1139
1140
1140 1) Inside a directory only (same directory name but different filenames)
1141 1) Inside a directory only (same directory name but different filenames)
1141 2) Move from one directory to another
1142 2) Move from one directory to another
1142 (same filenames but different directory names)
1143 (same filenames but different directory names)
1143
1144
1144 Works only when there are no merge commits in the "source branch".
1145 Works only when there are no merge commits in the "source branch".
1145 Source branch is commits from base up to c2 not including base.
1146 Source branch is commits from base up to c2 not including base.
1146
1147
1147 If merge is involved it fallbacks to _fullcopytracing().
1148 If merge is involved it fallbacks to _fullcopytracing().
1148
1149
1149 Can be used by setting the following config:
1150 Can be used by setting the following config:
1150
1151
1151 [experimental]
1152 [experimental]
1152 copytrace = heuristics
1153 copytrace = heuristics
1153
1154
1154 In some cases the copy/move candidates found by heuristics can be very large
1155 In some cases the copy/move candidates found by heuristics can be very large
1155 in number and that will make the algorithm slow. The number of possible
1156 in number and that will make the algorithm slow. The number of possible
1156 candidates to check can be limited by using the config
1157 candidates to check can be limited by using the config
1157 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1158 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1158 """
1159 """
1159
1160
1160 if c1.rev() is None:
1161 if c1.rev() is None:
1161 c1 = c1.p1()
1162 c1 = c1.p1()
1162 if c2.rev() is None:
1163 if c2.rev() is None:
1163 c2 = c2.p1()
1164 c2 = c2.p1()
1164
1165
1165 changedfiles = set()
1166 changedfiles = set()
1166 m1 = c1.manifest()
1167 m1 = c1.manifest()
1167 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1168 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1168 # If base is not in c2 branch, we switch to fullcopytracing
1169 # If base is not in c2 branch, we switch to fullcopytracing
1169 repo.ui.debug(
1170 repo.ui.debug(
1170 b"switching to full copytracing as base is not "
1171 b"switching to full copytracing as base is not "
1171 b"an ancestor of c2\n"
1172 b"an ancestor of c2\n"
1172 )
1173 )
1173 return _fullcopytracing(repo, c1, c2, base)
1174 return _fullcopytracing(repo, c1, c2, base)
1174
1175
1175 ctx = c2
1176 ctx = c2
1176 while ctx != base:
1177 while ctx != base:
1177 if len(ctx.parents()) == 2:
1178 if len(ctx.parents()) == 2:
1178 # To keep things simple let's not handle merges
1179 # To keep things simple let's not handle merges
1179 repo.ui.debug(b"switching to full copytracing because of merges\n")
1180 repo.ui.debug(b"switching to full copytracing because of merges\n")
1180 return _fullcopytracing(repo, c1, c2, base)
1181 return _fullcopytracing(repo, c1, c2, base)
1181 changedfiles.update(ctx.files())
1182 changedfiles.update(ctx.files())
1182 ctx = ctx.p1()
1183 ctx = ctx.p1()
1183
1184
1184 copies2 = {}
1185 copies2 = {}
1185 cp = _forwardcopies(base, c2)
1186 cp = _forwardcopies(base, c2)
1186 for dst, src in pycompat.iteritems(cp):
1187 for dst, src in pycompat.iteritems(cp):
1187 if src in m1:
1188 if src in m1:
1188 copies2[dst] = src
1189 copies2[dst] = src
1189
1190
1190 # file is missing if it isn't present in the destination, but is present in
1191 # file is missing if it isn't present in the destination, but is present in
1191 # the base and present in the source.
1192 # the base and present in the source.
1192 # Presence in the base is important to exclude added files, presence in the
1193 # Presence in the base is important to exclude added files, presence in the
1193 # source is important to exclude removed files.
1194 # source is important to exclude removed files.
1194 filt = lambda f: f not in m1 and f in base and f in c2
1195 filt = lambda f: f not in m1 and f in base and f in c2
1195 missingfiles = [f for f in changedfiles if filt(f)]
1196 missingfiles = [f for f in changedfiles if filt(f)]
1196
1197
1197 copies1 = {}
1198 copies1 = {}
1198 if missingfiles:
1199 if missingfiles:
1199 basenametofilename = collections.defaultdict(list)
1200 basenametofilename = collections.defaultdict(list)
1200 dirnametofilename = collections.defaultdict(list)
1201 dirnametofilename = collections.defaultdict(list)
1201
1202
1202 for f in m1.filesnotin(base.manifest()):
1203 for f in m1.filesnotin(base.manifest()):
1203 basename = os.path.basename(f)
1204 basename = os.path.basename(f)
1204 dirname = os.path.dirname(f)
1205 dirname = os.path.dirname(f)
1205 basenametofilename[basename].append(f)
1206 basenametofilename[basename].append(f)
1206 dirnametofilename[dirname].append(f)
1207 dirnametofilename[dirname].append(f)
1207
1208
1208 for f in missingfiles:
1209 for f in missingfiles:
1209 basename = os.path.basename(f)
1210 basename = os.path.basename(f)
1210 dirname = os.path.dirname(f)
1211 dirname = os.path.dirname(f)
1211 samebasename = basenametofilename[basename]
1212 samebasename = basenametofilename[basename]
1212 samedirname = dirnametofilename[dirname]
1213 samedirname = dirnametofilename[dirname]
1213 movecandidates = samebasename + samedirname
1214 movecandidates = samebasename + samedirname
1214 # f is guaranteed to be present in c2, that's why
1215 # f is guaranteed to be present in c2, that's why
1215 # c2.filectx(f) won't fail
1216 # c2.filectx(f) won't fail
1216 f2 = c2.filectx(f)
1217 f2 = c2.filectx(f)
1217 # we can have a lot of candidates which can slow down the heuristics
1218 # we can have a lot of candidates which can slow down the heuristics
1218 # config value to limit the number of candidates moves to check
1219 # config value to limit the number of candidates moves to check
1219 maxcandidates = repo.ui.configint(
1220 maxcandidates = repo.ui.configint(
1220 b'experimental', b'copytrace.movecandidateslimit'
1221 b'experimental', b'copytrace.movecandidateslimit'
1221 )
1222 )
1222
1223
1223 if len(movecandidates) > maxcandidates:
1224 if len(movecandidates) > maxcandidates:
1224 repo.ui.status(
1225 repo.ui.status(
1225 _(
1226 _(
1226 b"skipping copytracing for '%s', more "
1227 b"skipping copytracing for '%s', more "
1227 b"candidates than the limit: %d\n"
1228 b"candidates than the limit: %d\n"
1228 )
1229 )
1229 % (f, len(movecandidates))
1230 % (f, len(movecandidates))
1230 )
1231 )
1231 continue
1232 continue
1232
1233
1233 for candidate in movecandidates:
1234 for candidate in movecandidates:
1234 f1 = c1.filectx(candidate)
1235 f1 = c1.filectx(candidate)
1235 if _related(f1, f2):
1236 if _related(f1, f2):
1236 # if there are a few related copies then we'll merge
1237 # if there are a few related copies then we'll merge
1237 # changes into all of them. This matches the behaviour
1238 # changes into all of them. This matches the behaviour
1238 # of upstream copytracing
1239 # of upstream copytracing
1239 copies1[candidate] = f
1240 copies1[candidate] = f
1240
1241
1241 return branch_copies(copies1), branch_copies(copies2), {}
1242 return branch_copies(copies1), branch_copies(copies2), {}
1242
1243
1243
1244
1244 def _related(f1, f2):
1245 def _related(f1, f2):
1245 """return True if f1 and f2 filectx have a common ancestor
1246 """return True if f1 and f2 filectx have a common ancestor
1246
1247
1247 Walk back to common ancestor to see if the two files originate
1248 Walk back to common ancestor to see if the two files originate
1248 from the same file. Since workingfilectx's rev() is None it messes
1249 from the same file. Since workingfilectx's rev() is None it messes
1249 up the integer comparison logic, hence the pre-step check for
1250 up the integer comparison logic, hence the pre-step check for
1250 None (f1 and f2 can only be workingfilectx's initially).
1251 None (f1 and f2 can only be workingfilectx's initially).
1251 """
1252 """
1252
1253
1253 if f1 == f2:
1254 if f1 == f2:
1254 return True # a match
1255 return True # a match
1255
1256
1256 g1, g2 = f1.ancestors(), f2.ancestors()
1257 g1, g2 = f1.ancestors(), f2.ancestors()
1257 try:
1258 try:
1258 f1r, f2r = f1.linkrev(), f2.linkrev()
1259 f1r, f2r = f1.linkrev(), f2.linkrev()
1259
1260
1260 if f1r is None:
1261 if f1r is None:
1261 f1 = next(g1)
1262 f1 = next(g1)
1262 if f2r is None:
1263 if f2r is None:
1263 f2 = next(g2)
1264 f2 = next(g2)
1264
1265
1265 while True:
1266 while True:
1266 f1r, f2r = f1.linkrev(), f2.linkrev()
1267 f1r, f2r = f1.linkrev(), f2.linkrev()
1267 if f1r > f2r:
1268 if f1r > f2r:
1268 f1 = next(g1)
1269 f1 = next(g1)
1269 elif f2r > f1r:
1270 elif f2r > f1r:
1270 f2 = next(g2)
1271 f2 = next(g2)
1271 else: # f1 and f2 point to files in the same linkrev
1272 else: # f1 and f2 point to files in the same linkrev
1272 return f1 == f2 # true if they point to the same file
1273 return f1 == f2 # true if they point to the same file
1273 except StopIteration:
1274 except StopIteration:
1274 return False
1275 return False
1275
1276
1276
1277
1277 def graftcopies(wctx, ctx, base):
1278 def graftcopies(wctx, ctx, base):
1278 """reproduce copies between base and ctx in the wctx
1279 """reproduce copies between base and ctx in the wctx
1279
1280
1280 Unlike mergecopies(), this function will only consider copies between base
1281 Unlike mergecopies(), this function will only consider copies between base
1281 and ctx; it will ignore copies between base and wctx. Also unlike
1282 and ctx; it will ignore copies between base and wctx. Also unlike
1282 mergecopies(), this function will apply copies to the working copy (instead
1283 mergecopies(), this function will apply copies to the working copy (instead
1283 of just returning information about the copies). That makes it cheaper
1284 of just returning information about the copies). That makes it cheaper
1284 (especially in the common case of base==ctx.p1()) and useful also when
1285 (especially in the common case of base==ctx.p1()) and useful also when
1285 experimental.copytrace=off.
1286 experimental.copytrace=off.
1286
1287
1287 merge.update() will have already marked most copies, but it will only
1288 merge.update() will have already marked most copies, but it will only
1288 mark copies if it thinks the source files are related (see
1289 mark copies if it thinks the source files are related (see
1289 merge._related()). It will also not mark copies if the file wasn't modified
1290 merge._related()). It will also not mark copies if the file wasn't modified
1290 on the local side. This function adds the copies that were "missed"
1291 on the local side. This function adds the copies that were "missed"
1291 by merge.update().
1292 by merge.update().
1292 """
1293 """
1293 new_copies = pathcopies(base, ctx)
1294 new_copies = pathcopies(base, ctx)
1294 parent = wctx.p1()
1295 parent = wctx.p1()
1295 _filter(parent, wctx, new_copies)
1296 _filter(parent, wctx, new_copies)
1296 # Extra filtering to drop copy information for files that existed before
1297 # Extra filtering to drop copy information for files that existed before
1297 # the graft. This is to handle the case of grafting a rename onto a commit
1298 # the graft. This is to handle the case of grafting a rename onto a commit
1298 # that already has the rename. Otherwise the presence of copy information
1299 # that already has the rename. Otherwise the presence of copy information
1299 # would result in the creation of an empty commit where we would prefer to
1300 # would result in the creation of an empty commit where we would prefer to
1300 # not create one.
1301 # not create one.
1301 for dest, __ in list(new_copies.items()):
1302 for dest, __ in list(new_copies.items()):
1302 if dest in parent:
1303 if dest in parent:
1303 del new_copies[dest]
1304 del new_copies[dest]
1304 for dst, src in pycompat.iteritems(new_copies):
1305 for dst, src in pycompat.iteritems(new_copies):
1305 wctx[dst].markcopied(src)
1306 wctx[dst].markcopied(src)
@@ -1,3293 +1,3300 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 randomaccessfile,
90 revlogv0,
90 revlogv0,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 FLAG_INLINE_DATA
104 FLAG_INLINE_DATA
105 FLAG_GENERALDELTA
105 FLAG_GENERALDELTA
106 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_VERSION
108 REVLOG_DEFAULT_VERSION
109 REVLOGV1_FLAGS
109 REVLOGV1_FLAGS
110 REVLOGV2_FLAGS
110 REVLOGV2_FLAGS
111 REVIDX_ISCENSORED
111 REVIDX_ISCENSORED
112 REVIDX_ELLIPSIS
112 REVIDX_ELLIPSIS
113 REVIDX_HASCOPIESINFO
113 REVIDX_HASCOPIESINFO
114 REVIDX_EXTSTORED
114 REVIDX_EXTSTORED
115 REVIDX_DEFAULT_FLAGS
115 REVIDX_DEFAULT_FLAGS
116 REVIDX_FLAGS_ORDER
116 REVIDX_FLAGS_ORDER
117 REVIDX_RAWTEXT_CHANGING_FLAGS
117 REVIDX_RAWTEXT_CHANGING_FLAGS
118
118
119 parsers = policy.importmod('parsers')
119 parsers = policy.importmod('parsers')
120 rustancestor = policy.importrust('ancestor')
120 rustancestor = policy.importrust('ancestor')
121 rustdagop = policy.importrust('dagop')
121 rustdagop = policy.importrust('dagop')
122 rustrevlog = policy.importrust('revlog')
122 rustrevlog = policy.importrust('revlog')
123
123
124 # Aliased for performance.
124 # Aliased for performance.
125 _zlibdecompress = zlib.decompress
125 _zlibdecompress = zlib.decompress
126
126
127 # max size of revlog with inline data
127 # max size of revlog with inline data
128 _maxinline = 131072
128 _maxinline = 131072
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
236 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
237 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
238 )
238 )
239
239
240
240
241 class revlog(object):
241 class revlog(object):
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self.index = None
349 self.index = None
350 self._docket = None
350 self._docket = None
351 self._nodemap_docket = None
351 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
353 self._pcache = {}
354 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
355 self._compengine = b'zlib'
356 self._compengineopts = {}
356 self._compengineopts = {}
357 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
358 self._withsparseread = False
358 self._withsparseread = False
359 self._sparserevlog = False
359 self._sparserevlog = False
360 self.hassidedata = False
360 self.hassidedata = False
361 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
362 self._srmingapsize = 262144
363
363
364 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
365 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
367
367
368 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
369 self._writinghandles = None
370 # prevent nesting of addgroup
370 # prevent nesting of addgroup
371 self._adding_group = None
371 self._adding_group = None
372
372
373 self._loadindex()
373 self._loadindex()
374
374
375 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
376
376
377 def _init_opts(self):
377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
379
379
380 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
381
381
382 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
383
383
384 * newversionflags:
384 * newversionflags:
385 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
386
386
387 * mmapindexthreshold:
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
388 minimal index size for start to use mmap
389
389
390 * force_nodemap:
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
392 """
392 """
393 mmapindexthreshold = None
393 mmapindexthreshold = None
394 opts = self.opener.options
394 opts = self.opener.options
395
395
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
399 new_header = REVLOGV2
400 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
403 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
404 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
405 new_header = REVLOGV0
405 new_header = REVLOGV0
406 else:
406 else:
407 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
408
408
409 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
410 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
411 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
412 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
413 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
414 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 self._lazydeltabase = False
416 self._lazydeltabase = False
417 if self._lazydelta:
417 if self._lazydelta:
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 if b'compengine' in opts:
419 if b'compengine' in opts:
420 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
421 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
458
458
459 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
460 """return a file content with or without mmap
461
461
462 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
463 try:
463 try:
464 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
465 if mmap_threshold is not None:
465 if mmap_threshold is not None:
466 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
467 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
468 if size is not None:
468 if size is not None:
469 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
470 size = min(file_size, size)
470 size = min(file_size, size)
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 if size is None:
473 if size is None:
474 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
475 else:
475 else:
476 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
477 if size is None:
478 return fp.read()
478 return fp.read()
479 else:
479 else:
480 return fp.read(size)
480 return fp.read(size)
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484 return b''
484 return b''
485
485
486 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
487
487
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
489
490 if self.postfix is not None:
490 if self.postfix is not None:
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
494 else:
494 else:
495 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
496
496
497 if docket is not None:
497 if docket is not None:
498 self._docket = docket
498 self._docket = docket
499 self._docket_file = entry_point
499 self._docket_file = entry_point
500 else:
500 else:
501 entry_data = b''
501 entry_data = b''
502 self._initempty = True
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
506 self._initempty = False
507 else:
507 else:
508 header = new_header
508 header = new_header
509
509
510 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
512
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
523
523
524 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
528
528
529 if not features[b'docket']:
529 if not features[b'docket']:
530 self._indexfile = entry_point
530 self._indexfile = entry_point
531 index_data = entry_data
531 index_data = entry_data
532 else:
532 else:
533 self._docket_file = entry_point
533 self._docket_file = entry_point
534 if self._initempty:
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
536 else:
536 else:
537 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
539 )
539 )
540
540
541 if self._docket is not None:
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
543 index_data = b''
544 index_size = self._docket.index_end
544 index_size = self._docket.index_end
545 if index_size > 0:
545 if index_size > 0:
546 index_data = self._get_data(
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
548 )
548 )
549 if len(index_data) < index_size:
549 if len(index_data) < index_size:
550 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
551 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
552 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
553
553
554 self._inline = False
554 self._inline = False
555 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
556 self._generaldelta = True
556 self._generaldelta = True
557 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
558 # main docket, so disable it for now.
559 self._nodemap_file = None
559 self._nodemap_file = None
560
560
561 if self._docket is not None:
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
566 else:
566 else:
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
568
569 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
570 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
571
571
572 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
573 if not self._generaldelta:
574 self._sparserevlog = False
574 self._sparserevlog = False
575
575
576 self._storedeltachains = True
576 self._storedeltachains = True
577
577
578 devel_nodemap = (
578 devel_nodemap = (
579 self._nodemap_file
579 self._nodemap_file
580 and force_nodemap
580 and force_nodemap
581 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
582 )
582 )
583
583
584 use_rust_index = False
584 use_rust_index = False
585 if rustrevlog is not None:
585 if rustrevlog is not None:
586 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
587 use_rust_index = True
587 use_rust_index = True
588 else:
588 else:
589 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
590
590
591 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
592 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
593 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
594 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
595 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
598 elif devel_nodemap:
598 elif devel_nodemap:
599 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
600 elif use_rust_index:
600 elif use_rust_index:
601 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
602 try:
602 try:
603 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
604 index, chunkcache = d
604 index, chunkcache = d
605 use_nodemap = (
605 use_nodemap = (
606 not self._inline
606 not self._inline
607 and self._nodemap_file is not None
607 and self._nodemap_file is not None
608 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
609 )
609 )
610 if use_nodemap:
610 if use_nodemap:
611 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
612 if nodemap_data is not None:
612 if nodemap_data is not None:
613 docket = nodemap_data[0]
613 docket = nodemap_data[0]
614 if (
614 if (
615 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
616 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
617 ):
617 ):
618 # no changelog tampering
618 # no changelog tampering
619 self._nodemap_docket = docket
619 self._nodemap_docket = docket
620 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
621 except (ValueError, IndexError):
621 except (ValueError, IndexError):
622 raise error.RevlogError(
622 raise error.RevlogError(
623 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
624 )
624 )
625 self.index = index
625 self.index = index
626 self._segmentfile = randomaccessfile.randomaccessfile(
626 self._segmentfile = randomaccessfile.randomaccessfile(
627 self.opener,
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
629 self._chunkcachesize,
630 chunkcache,
630 chunkcache,
631 )
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
633 self.opener,
634 self._sidedatafile,
634 self._sidedatafile,
635 self._chunkcachesize,
635 self._chunkcachesize,
636 )
636 )
637 # revnum -> (chain-length, sum-delta-length)
637 # revnum -> (chain-length, sum-delta-length)
638 self._chaininfocache = util.lrucachedict(500)
638 self._chaininfocache = util.lrucachedict(500)
639 # revlog header -> revlog compressor
639 # revlog header -> revlog compressor
640 self._decompressors = {}
640 self._decompressors = {}
641
641
642 @util.propertycache
642 @util.propertycache
643 def revlog_kind(self):
643 def revlog_kind(self):
644 return self.target[0]
644 return self.target[0]
645
645
646 @util.propertycache
646 @util.propertycache
647 def display_id(self):
647 def display_id(self):
648 """The public facing "ID" of the revlog that we use in message"""
648 """The public facing "ID" of the revlog that we use in message"""
649 # Maybe we should build a user facing representation of
649 # Maybe we should build a user facing representation of
650 # revlog.target instead of using `self.radix`
650 # revlog.target instead of using `self.radix`
651 return self.radix
651 return self.radix
652
652
653 def _get_decompressor(self, t):
653 def _get_decompressor(self, t):
654 try:
654 try:
655 compressor = self._decompressors[t]
655 compressor = self._decompressors[t]
656 except KeyError:
656 except KeyError:
657 try:
657 try:
658 engine = util.compengines.forrevlogheader(t)
658 engine = util.compengines.forrevlogheader(t)
659 compressor = engine.revlogcompressor(self._compengineopts)
659 compressor = engine.revlogcompressor(self._compengineopts)
660 self._decompressors[t] = compressor
660 self._decompressors[t] = compressor
661 except KeyError:
661 except KeyError:
662 raise error.RevlogError(
662 raise error.RevlogError(
663 _(b'unknown compression type %s') % binascii.hexlify(t)
663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 )
664 )
665 return compressor
665 return compressor
666
666
667 @util.propertycache
667 @util.propertycache
668 def _compressor(self):
668 def _compressor(self):
669 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
670 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
671
671
672 @util.propertycache
672 @util.propertycache
673 def _decompressor(self):
673 def _decompressor(self):
674 """the default decompressor"""
674 """the default decompressor"""
675 if self._docket is None:
675 if self._docket is None:
676 return None
676 return None
677 t = self._docket.default_compression_header
677 t = self._docket.default_compression_header
678 c = self._get_decompressor(t)
678 c = self._get_decompressor(t)
679 return c.decompress
679 return c.decompress
680
680
681 def _indexfp(self):
681 def _indexfp(self):
682 """file object for the revlog's index file"""
682 """file object for the revlog's index file"""
683 return self.opener(self._indexfile, mode=b"r")
683 return self.opener(self._indexfile, mode=b"r")
684
684
685 def __index_write_fp(self):
685 def __index_write_fp(self):
686 # You should not use this directly and use `_writing` instead
686 # You should not use this directly and use `_writing` instead
687 try:
687 try:
688 f = self.opener(
688 f = self.opener(
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 )
690 )
691 if self._docket is None:
691 if self._docket is None:
692 f.seek(0, os.SEEK_END)
692 f.seek(0, os.SEEK_END)
693 else:
693 else:
694 f.seek(self._docket.index_end, os.SEEK_SET)
694 f.seek(self._docket.index_end, os.SEEK_SET)
695 return f
695 return f
696 except IOError as inst:
696 except IOError as inst:
697 if inst.errno != errno.ENOENT:
697 if inst.errno != errno.ENOENT:
698 raise
698 raise
699 return self.opener(
699 return self.opener(
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 )
701 )
702
702
703 def __index_new_fp(self):
703 def __index_new_fp(self):
704 # You should not use this unless you are upgrading from inline revlog
704 # You should not use this unless you are upgrading from inline revlog
705 return self.opener(
705 return self.opener(
706 self._indexfile,
706 self._indexfile,
707 mode=b"w",
707 mode=b"w",
708 checkambig=self._checkambig,
708 checkambig=self._checkambig,
709 atomictemp=True,
709 atomictemp=True,
710 )
710 )
711
711
712 def _datafp(self, mode=b'r'):
712 def _datafp(self, mode=b'r'):
713 """file object for the revlog's data file"""
713 """file object for the revlog's data file"""
714 return self.opener(self._datafile, mode=mode)
714 return self.opener(self._datafile, mode=mode)
715
715
716 @contextlib.contextmanager
716 @contextlib.contextmanager
717 def _sidedatareadfp(self):
717 def _sidedatareadfp(self):
718 """file object suitable to read sidedata"""
718 """file object suitable to read sidedata"""
719 if self._writinghandles:
719 if self._writinghandles:
720 yield self._writinghandles[2]
720 yield self._writinghandles[2]
721 else:
721 else:
722 with self.opener(self._sidedatafile) as fp:
722 with self.opener(self._sidedatafile) as fp:
723 yield fp
723 yield fp
724
724
725 def tiprev(self):
725 def tiprev(self):
726 return len(self.index) - 1
726 return len(self.index) - 1
727
727
728 def tip(self):
728 def tip(self):
729 return self.node(self.tiprev())
729 return self.node(self.tiprev())
730
730
731 def __contains__(self, rev):
731 def __contains__(self, rev):
732 return 0 <= rev < len(self)
732 return 0 <= rev < len(self)
733
733
734 def __len__(self):
734 def __len__(self):
735 return len(self.index)
735 return len(self.index)
736
736
737 def __iter__(self):
737 def __iter__(self):
738 return iter(pycompat.xrange(len(self)))
738 return iter(pycompat.xrange(len(self)))
739
739
740 def revs(self, start=0, stop=None):
740 def revs(self, start=0, stop=None):
741 """iterate over all rev in this revlog (from start to stop)"""
741 """iterate over all rev in this revlog (from start to stop)"""
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743
743
744 @property
744 @property
745 def nodemap(self):
745 def nodemap(self):
746 msg = (
746 msg = (
747 b"revlog.nodemap is deprecated, "
747 b"revlog.nodemap is deprecated, "
748 b"use revlog.index.[has_node|rev|get_rev]"
748 b"use revlog.index.[has_node|rev|get_rev]"
749 )
749 )
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 return self.index.nodemap
751 return self.index.nodemap
752
752
753 @property
753 @property
754 def _nodecache(self):
754 def _nodecache(self):
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757 return self.index.nodemap
757 return self.index.nodemap
758
758
759 def hasnode(self, node):
759 def hasnode(self, node):
760 try:
760 try:
761 self.rev(node)
761 self.rev(node)
762 return True
762 return True
763 except KeyError:
763 except KeyError:
764 return False
764 return False
765
765
766 def candelta(self, baserev, rev):
766 def candelta(self, baserev, rev):
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
768 # Disable delta if either rev requires a content-changing flag
768 # Disable delta if either rev requires a content-changing flag
769 # processor (ex. LFS). This is because such flag processor can alter
769 # processor (ex. LFS). This is because such flag processor can alter
770 # the rawtext content that the delta will be based on, and two clients
770 # the rawtext content that the delta will be based on, and two clients
771 # could have a same revlog node with different flags (i.e. different
771 # could have a same revlog node with different flags (i.e. different
772 # rawtext contents) and the delta could be incompatible.
772 # rawtext contents) and the delta could be incompatible.
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775 ):
775 ):
776 return False
776 return False
777 return True
777 return True
778
778
779 def update_caches(self, transaction):
779 def update_caches(self, transaction):
780 if self._nodemap_file is not None:
780 if self._nodemap_file is not None:
781 if transaction is None:
781 if transaction is None:
782 nodemaputil.update_persistent_nodemap(self)
782 nodemaputil.update_persistent_nodemap(self)
783 else:
783 else:
784 nodemaputil.setup_persistent_nodemap(transaction, self)
784 nodemaputil.setup_persistent_nodemap(transaction, self)
785
785
786 def clearcaches(self):
786 def clearcaches(self):
787 self._revisioncache = None
787 self._revisioncache = None
788 self._chainbasecache.clear()
788 self._chainbasecache.clear()
789 self._segmentfile.clear_cache()
789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
791 self._pcache = {}
791 self._pcache = {}
792 self._nodemap_docket = None
792 self._nodemap_docket = None
793 self.index.clearcaches()
793 self.index.clearcaches()
794 # The python code is the one responsible for validating the docket, we
794 # The python code is the one responsible for validating the docket, we
795 # end up having to refresh it here.
795 # end up having to refresh it here.
796 use_nodemap = (
796 use_nodemap = (
797 not self._inline
797 not self._inline
798 and self._nodemap_file is not None
798 and self._nodemap_file is not None
799 and util.safehasattr(self.index, 'update_nodemap_data')
799 and util.safehasattr(self.index, 'update_nodemap_data')
800 )
800 )
801 if use_nodemap:
801 if use_nodemap:
802 nodemap_data = nodemaputil.persisted_data(self)
802 nodemap_data = nodemaputil.persisted_data(self)
803 if nodemap_data is not None:
803 if nodemap_data is not None:
804 self._nodemap_docket = nodemap_data[0]
804 self._nodemap_docket = nodemap_data[0]
805 self.index.update_nodemap_data(*nodemap_data)
805 self.index.update_nodemap_data(*nodemap_data)
806
806
807 def rev(self, node):
807 def rev(self, node):
808 try:
808 try:
809 return self.index.rev(node)
809 return self.index.rev(node)
810 except TypeError:
810 except TypeError:
811 raise
811 raise
812 except error.RevlogError:
812 except error.RevlogError:
813 # parsers.c radix tree lookup failed
813 # parsers.c radix tree lookup failed
814 if (
814 if (
815 node == self.nodeconstants.wdirid
815 node == self.nodeconstants.wdirid
816 or node in self.nodeconstants.wdirfilenodeids
816 or node in self.nodeconstants.wdirfilenodeids
817 ):
817 ):
818 raise error.WdirUnsupported
818 raise error.WdirUnsupported
819 raise error.LookupError(node, self.display_id, _(b'no node'))
819 raise error.LookupError(node, self.display_id, _(b'no node'))
820
820
821 # Accessors for index entries.
821 # Accessors for index entries.
822
822
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 # are flags.
824 # are flags.
825 def start(self, rev):
825 def start(self, rev):
826 return int(self.index[rev][0] >> 16)
826 return int(self.index[rev][0] >> 16)
827
827
828 def sidedata_cut_off(self, rev):
828 def sidedata_cut_off(self, rev):
829 sd_cut_off = self.index[rev][8]
829 sd_cut_off = self.index[rev][8]
830 if sd_cut_off != 0:
830 if sd_cut_off != 0:
831 return sd_cut_off
831 return sd_cut_off
832 # This is some annoying dance, because entries without sidedata
832 # This is some annoying dance, because entries without sidedata
833 # currently use 0 as their ofsset. (instead of previous-offset +
833 # currently use 0 as their ofsset. (instead of previous-offset +
834 # previous-size)
834 # previous-size)
835 #
835 #
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
837 # In the meantime, we need this.
837 # In the meantime, we need this.
838 while 0 <= rev:
838 while 0 <= rev:
839 e = self.index[rev]
839 e = self.index[rev]
840 if e[9] != 0:
840 if e[9] != 0:
841 return e[8] + e[9]
841 return e[8] + e[9]
842 rev -= 1
842 rev -= 1
843 return 0
843 return 0
844
844
845 def flags(self, rev):
845 def flags(self, rev):
846 return self.index[rev][0] & 0xFFFF
846 return self.index[rev][0] & 0xFFFF
847
847
848 def length(self, rev):
848 def length(self, rev):
849 return self.index[rev][1]
849 return self.index[rev][1]
850
850
851 def sidedata_length(self, rev):
851 def sidedata_length(self, rev):
852 if not self.hassidedata:
852 if not self.hassidedata:
853 return 0
853 return 0
854 return self.index[rev][9]
854 return self.index[rev][9]
855
855
856 def rawsize(self, rev):
856 def rawsize(self, rev):
857 """return the length of the uncompressed text for a given revision"""
857 """return the length of the uncompressed text for a given revision"""
858 l = self.index[rev][2]
858 l = self.index[rev][2]
859 if l >= 0:
859 if l >= 0:
860 return l
860 return l
861
861
862 t = self.rawdata(rev)
862 t = self.rawdata(rev)
863 return len(t)
863 return len(t)
864
864
865 def size(self, rev):
865 def size(self, rev):
866 """length of non-raw text (processed by a "read" flag processor)"""
866 """length of non-raw text (processed by a "read" flag processor)"""
867 # fast path: if no "read" flag processor could change the content,
867 # fast path: if no "read" flag processor could change the content,
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 flags = self.flags(rev)
869 flags = self.flags(rev)
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 return self.rawsize(rev)
871 return self.rawsize(rev)
872
872
873 return len(self.revision(rev, raw=False))
873 return len(self.revision(rev, raw=False))
874
874
875 def chainbase(self, rev):
875 def chainbase(self, rev):
876 base = self._chainbasecache.get(rev)
876 base = self._chainbasecache.get(rev)
877 if base is not None:
877 if base is not None:
878 return base
878 return base
879
879
880 index = self.index
880 index = self.index
881 iterrev = rev
881 iterrev = rev
882 base = index[iterrev][3]
882 base = index[iterrev][3]
883 while base != iterrev:
883 while base != iterrev:
884 iterrev = base
884 iterrev = base
885 base = index[iterrev][3]
885 base = index[iterrev][3]
886
886
887 self._chainbasecache[rev] = base
887 self._chainbasecache[rev] = base
888 return base
888 return base
889
889
890 def linkrev(self, rev):
890 def linkrev(self, rev):
891 return self.index[rev][4]
891 return self.index[rev][4]
892
892
893 def parentrevs(self, rev):
893 def parentrevs(self, rev):
894 try:
894 try:
895 entry = self.index[rev]
895 entry = self.index[rev]
896 except IndexError:
896 except IndexError:
897 if rev == wdirrev:
897 if rev == wdirrev:
898 raise error.WdirUnsupported
898 raise error.WdirUnsupported
899 raise
899 raise
900 if entry[5] == nullrev:
900 if entry[5] == nullrev:
901 return entry[6], entry[5]
901 return entry[6], entry[5]
902 else:
902 else:
903 return entry[5], entry[6]
903 return entry[5], entry[6]
904
904
905 # fast parentrevs(rev) where rev isn't filtered
905 # fast parentrevs(rev) where rev isn't filtered
906 _uncheckedparentrevs = parentrevs
906 _uncheckedparentrevs = parentrevs
907
907
908 def node(self, rev):
908 def node(self, rev):
909 try:
909 try:
910 return self.index[rev][7]
910 return self.index[rev][7]
911 except IndexError:
911 except IndexError:
912 if rev == wdirrev:
912 if rev == wdirrev:
913 raise error.WdirUnsupported
913 raise error.WdirUnsupported
914 raise
914 raise
915
915
916 # Derived from index values.
916 # Derived from index values.
917
917
918 def end(self, rev):
918 def end(self, rev):
919 return self.start(rev) + self.length(rev)
919 return self.start(rev) + self.length(rev)
920
920
921 def parents(self, node):
921 def parents(self, node):
922 i = self.index
922 i = self.index
923 d = i[self.rev(node)]
923 d = i[self.rev(node)]
924 # inline node() to avoid function call overhead
924 # inline node() to avoid function call overhead
925 if d[5] == self.nullid:
925 if d[5] == self.nullid:
926 return i[d[6]][7], i[d[5]][7]
926 return i[d[6]][7], i[d[5]][7]
927 else:
927 else:
928 return i[d[5]][7], i[d[6]][7]
928 return i[d[5]][7], i[d[6]][7]
929
929
930 def chainlen(self, rev):
930 def chainlen(self, rev):
931 return self._chaininfo(rev)[0]
931 return self._chaininfo(rev)[0]
932
932
933 def _chaininfo(self, rev):
933 def _chaininfo(self, rev):
934 chaininfocache = self._chaininfocache
934 chaininfocache = self._chaininfocache
935 if rev in chaininfocache:
935 if rev in chaininfocache:
936 return chaininfocache[rev]
936 return chaininfocache[rev]
937 index = self.index
937 index = self.index
938 generaldelta = self._generaldelta
938 generaldelta = self._generaldelta
939 iterrev = rev
939 iterrev = rev
940 e = index[iterrev]
940 e = index[iterrev]
941 clen = 0
941 clen = 0
942 compresseddeltalen = 0
942 compresseddeltalen = 0
943 while iterrev != e[3]:
943 while iterrev != e[3]:
944 clen += 1
944 clen += 1
945 compresseddeltalen += e[1]
945 compresseddeltalen += e[1]
946 if generaldelta:
946 if generaldelta:
947 iterrev = e[3]
947 iterrev = e[3]
948 else:
948 else:
949 iterrev -= 1
949 iterrev -= 1
950 if iterrev in chaininfocache:
950 if iterrev in chaininfocache:
951 t = chaininfocache[iterrev]
951 t = chaininfocache[iterrev]
952 clen += t[0]
952 clen += t[0]
953 compresseddeltalen += t[1]
953 compresseddeltalen += t[1]
954 break
954 break
955 e = index[iterrev]
955 e = index[iterrev]
956 else:
956 else:
957 # Add text length of base since decompressing that also takes
957 # Add text length of base since decompressing that also takes
958 # work. For cache hits the length is already included.
958 # work. For cache hits the length is already included.
959 compresseddeltalen += e[1]
959 compresseddeltalen += e[1]
960 r = (clen, compresseddeltalen)
960 r = (clen, compresseddeltalen)
961 chaininfocache[rev] = r
961 chaininfocache[rev] = r
962 return r
962 return r
963
963
964 def _deltachain(self, rev, stoprev=None):
964 def _deltachain(self, rev, stoprev=None):
965 """Obtain the delta chain for a revision.
965 """Obtain the delta chain for a revision.
966
966
967 ``stoprev`` specifies a revision to stop at. If not specified, we
967 ``stoprev`` specifies a revision to stop at. If not specified, we
968 stop at the base of the chain.
968 stop at the base of the chain.
969
969
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 revs in ascending order and ``stopped`` is a bool indicating whether
971 revs in ascending order and ``stopped`` is a bool indicating whether
972 ``stoprev`` was hit.
972 ``stoprev`` was hit.
973 """
973 """
974 # Try C implementation.
974 # Try C implementation.
975 try:
975 try:
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 except AttributeError:
977 except AttributeError:
978 pass
978 pass
979
979
980 chain = []
980 chain = []
981
981
982 # Alias to prevent attribute lookup in tight loop.
982 # Alias to prevent attribute lookup in tight loop.
983 index = self.index
983 index = self.index
984 generaldelta = self._generaldelta
984 generaldelta = self._generaldelta
985
985
986 iterrev = rev
986 iterrev = rev
987 e = index[iterrev]
987 e = index[iterrev]
988 while iterrev != e[3] and iterrev != stoprev:
988 while iterrev != e[3] and iterrev != stoprev:
989 chain.append(iterrev)
989 chain.append(iterrev)
990 if generaldelta:
990 if generaldelta:
991 iterrev = e[3]
991 iterrev = e[3]
992 else:
992 else:
993 iterrev -= 1
993 iterrev -= 1
994 e = index[iterrev]
994 e = index[iterrev]
995
995
996 if iterrev == stoprev:
996 if iterrev == stoprev:
997 stopped = True
997 stopped = True
998 else:
998 else:
999 chain.append(iterrev)
999 chain.append(iterrev)
1000 stopped = False
1000 stopped = False
1001
1001
1002 chain.reverse()
1002 chain.reverse()
1003 return chain, stopped
1003 return chain, stopped
1004
1004
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 """Generate the ancestors of 'revs' in reverse revision order.
1006 """Generate the ancestors of 'revs' in reverse revision order.
1007 Does not generate revs lower than stoprev.
1007 Does not generate revs lower than stoprev.
1008
1008
1009 See the documentation for ancestor.lazyancestors for more details."""
1009 See the documentation for ancestor.lazyancestors for more details."""
1010
1010
1011 # first, make sure start revisions aren't filtered
1011 # first, make sure start revisions aren't filtered
1012 revs = list(revs)
1012 revs = list(revs)
1013 checkrev = self.node
1013 checkrev = self.node
1014 for r in revs:
1014 for r in revs:
1015 checkrev(r)
1015 checkrev(r)
1016 # and we're sure ancestors aren't filtered as well
1016 # and we're sure ancestors aren't filtered as well
1017
1017
1018 if rustancestor is not None and self.index.rust_ext_compat:
1018 if rustancestor is not None and self.index.rust_ext_compat:
1019 lazyancestors = rustancestor.LazyAncestors
1019 lazyancestors = rustancestor.LazyAncestors
1020 arg = self.index
1020 arg = self.index
1021 else:
1021 else:
1022 lazyancestors = ancestor.lazyancestors
1022 lazyancestors = ancestor.lazyancestors
1023 arg = self._uncheckedparentrevs
1023 arg = self._uncheckedparentrevs
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025
1025
1026 def descendants(self, revs):
1026 def descendants(self, revs):
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028
1028
1029 def findcommonmissing(self, common=None, heads=None):
1029 def findcommonmissing(self, common=None, heads=None):
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1031 that are not ancestors of common. In revset terminology, we return the
1031 that are not ancestors of common. In revset terminology, we return the
1032 tuple:
1032 tuple:
1033
1033
1034 ::common, (::heads) - (::common)
1034 ::common, (::heads) - (::common)
1035
1035
1036 The list is sorted by revision number, meaning it is
1036 The list is sorted by revision number, meaning it is
1037 topologically sorted.
1037 topologically sorted.
1038
1038
1039 'heads' and 'common' are both lists of node IDs. If heads is
1039 'heads' and 'common' are both lists of node IDs. If heads is
1040 not supplied, uses all of the revlog's heads. If common is not
1040 not supplied, uses all of the revlog's heads. If common is not
1041 supplied, uses nullid."""
1041 supplied, uses nullid."""
1042 if common is None:
1042 if common is None:
1043 common = [self.nullid]
1043 common = [self.nullid]
1044 if heads is None:
1044 if heads is None:
1045 heads = self.heads()
1045 heads = self.heads()
1046
1046
1047 common = [self.rev(n) for n in common]
1047 common = [self.rev(n) for n in common]
1048 heads = [self.rev(n) for n in heads]
1048 heads = [self.rev(n) for n in heads]
1049
1049
1050 # we want the ancestors, but inclusive
1050 # we want the ancestors, but inclusive
1051 class lazyset(object):
1051 class lazyset(object):
1052 def __init__(self, lazyvalues):
1052 def __init__(self, lazyvalues):
1053 self.addedvalues = set()
1053 self.addedvalues = set()
1054 self.lazyvalues = lazyvalues
1054 self.lazyvalues = lazyvalues
1055
1055
1056 def __contains__(self, value):
1056 def __contains__(self, value):
1057 return value in self.addedvalues or value in self.lazyvalues
1057 return value in self.addedvalues or value in self.lazyvalues
1058
1058
1059 def __iter__(self):
1059 def __iter__(self):
1060 added = self.addedvalues
1060 added = self.addedvalues
1061 for r in added:
1061 for r in added:
1062 yield r
1062 yield r
1063 for r in self.lazyvalues:
1063 for r in self.lazyvalues:
1064 if not r in added:
1064 if not r in added:
1065 yield r
1065 yield r
1066
1066
1067 def add(self, value):
1067 def add(self, value):
1068 self.addedvalues.add(value)
1068 self.addedvalues.add(value)
1069
1069
1070 def update(self, values):
1070 def update(self, values):
1071 self.addedvalues.update(values)
1071 self.addedvalues.update(values)
1072
1072
1073 has = lazyset(self.ancestors(common))
1073 has = lazyset(self.ancestors(common))
1074 has.add(nullrev)
1074 has.add(nullrev)
1075 has.update(common)
1075 has.update(common)
1076
1076
1077 # take all ancestors from heads that aren't in has
1077 # take all ancestors from heads that aren't in has
1078 missing = set()
1078 missing = set()
1079 visit = collections.deque(r for r in heads if r not in has)
1079 visit = collections.deque(r for r in heads if r not in has)
1080 while visit:
1080 while visit:
1081 r = visit.popleft()
1081 r = visit.popleft()
1082 if r in missing:
1082 if r in missing:
1083 continue
1083 continue
1084 else:
1084 else:
1085 missing.add(r)
1085 missing.add(r)
1086 for p in self.parentrevs(r):
1086 for p in self.parentrevs(r):
1087 if p not in has:
1087 if p not in has:
1088 visit.append(p)
1088 visit.append(p)
1089 missing = list(missing)
1089 missing = list(missing)
1090 missing.sort()
1090 missing.sort()
1091 return has, [self.node(miss) for miss in missing]
1091 return has, [self.node(miss) for miss in missing]
1092
1092
1093 def incrementalmissingrevs(self, common=None):
1093 def incrementalmissingrevs(self, common=None):
1094 """Return an object that can be used to incrementally compute the
1094 """Return an object that can be used to incrementally compute the
1095 revision numbers of the ancestors of arbitrary sets that are not
1095 revision numbers of the ancestors of arbitrary sets that are not
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 object.
1097 object.
1098
1098
1099 'common' is a list of revision numbers. If common is not supplied, uses
1099 'common' is a list of revision numbers. If common is not supplied, uses
1100 nullrev.
1100 nullrev.
1101 """
1101 """
1102 if common is None:
1102 if common is None:
1103 common = [nullrev]
1103 common = [nullrev]
1104
1104
1105 if rustancestor is not None and self.index.rust_ext_compat:
1105 if rustancestor is not None and self.index.rust_ext_compat:
1106 return rustancestor.MissingAncestors(self.index, common)
1106 return rustancestor.MissingAncestors(self.index, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108
1108
1109 def findmissingrevs(self, common=None, heads=None):
1109 def findmissingrevs(self, common=None, heads=None):
1110 """Return the revision numbers of the ancestors of heads that
1110 """Return the revision numbers of the ancestors of heads that
1111 are not ancestors of common.
1111 are not ancestors of common.
1112
1112
1113 More specifically, return a list of revision numbers corresponding to
1113 More specifically, return a list of revision numbers corresponding to
1114 nodes N such that every N satisfies the following constraints:
1114 nodes N such that every N satisfies the following constraints:
1115
1115
1116 1. N is an ancestor of some node in 'heads'
1116 1. N is an ancestor of some node in 'heads'
1117 2. N is not an ancestor of any node in 'common'
1117 2. N is not an ancestor of any node in 'common'
1118
1118
1119 The list is sorted by revision number, meaning it is
1119 The list is sorted by revision number, meaning it is
1120 topologically sorted.
1120 topologically sorted.
1121
1121
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1123 not supplied, uses all of the revlog's heads. If common is not
1123 not supplied, uses all of the revlog's heads. If common is not
1124 supplied, uses nullid."""
1124 supplied, uses nullid."""
1125 if common is None:
1125 if common is None:
1126 common = [nullrev]
1126 common = [nullrev]
1127 if heads is None:
1127 if heads is None:
1128 heads = self.headrevs()
1128 heads = self.headrevs()
1129
1129
1130 inc = self.incrementalmissingrevs(common=common)
1130 inc = self.incrementalmissingrevs(common=common)
1131 return inc.missingancestors(heads)
1131 return inc.missingancestors(heads)
1132
1132
1133 def findmissing(self, common=None, heads=None):
1133 def findmissing(self, common=None, heads=None):
1134 """Return the ancestors of heads that are not ancestors of common.
1134 """Return the ancestors of heads that are not ancestors of common.
1135
1135
1136 More specifically, return a list of nodes N such that every N
1136 More specifically, return a list of nodes N such that every N
1137 satisfies the following constraints:
1137 satisfies the following constraints:
1138
1138
1139 1. N is an ancestor of some node in 'heads'
1139 1. N is an ancestor of some node in 'heads'
1140 2. N is not an ancestor of any node in 'common'
1140 2. N is not an ancestor of any node in 'common'
1141
1141
1142 The list is sorted by revision number, meaning it is
1142 The list is sorted by revision number, meaning it is
1143 topologically sorted.
1143 topologically sorted.
1144
1144
1145 'heads' and 'common' are both lists of node IDs. If heads is
1145 'heads' and 'common' are both lists of node IDs. If heads is
1146 not supplied, uses all of the revlog's heads. If common is not
1146 not supplied, uses all of the revlog's heads. If common is not
1147 supplied, uses nullid."""
1147 supplied, uses nullid."""
1148 if common is None:
1148 if common is None:
1149 common = [self.nullid]
1149 common = [self.nullid]
1150 if heads is None:
1150 if heads is None:
1151 heads = self.heads()
1151 heads = self.heads()
1152
1152
1153 common = [self.rev(n) for n in common]
1153 common = [self.rev(n) for n in common]
1154 heads = [self.rev(n) for n in heads]
1154 heads = [self.rev(n) for n in heads]
1155
1155
1156 inc = self.incrementalmissingrevs(common=common)
1156 inc = self.incrementalmissingrevs(common=common)
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1158
1158
1159 def nodesbetween(self, roots=None, heads=None):
1159 def nodesbetween(self, roots=None, heads=None):
1160 """Return a topological path from 'roots' to 'heads'.
1160 """Return a topological path from 'roots' to 'heads'.
1161
1161
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 topologically sorted list of all nodes N that satisfy both of
1163 topologically sorted list of all nodes N that satisfy both of
1164 these constraints:
1164 these constraints:
1165
1165
1166 1. N is a descendant of some node in 'roots'
1166 1. N is a descendant of some node in 'roots'
1167 2. N is an ancestor of some node in 'heads'
1167 2. N is an ancestor of some node in 'heads'
1168
1168
1169 Every node is considered to be both a descendant and an ancestor
1169 Every node is considered to be both a descendant and an ancestor
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1171 included in 'nodes'.
1171 included in 'nodes'.
1172
1172
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176
1176
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1179 unspecified, uses list of all of the revlog's heads."""
1179 unspecified, uses list of all of the revlog's heads."""
1180 nonodes = ([], [], [])
1180 nonodes = ([], [], [])
1181 if roots is not None:
1181 if roots is not None:
1182 roots = list(roots)
1182 roots = list(roots)
1183 if not roots:
1183 if not roots:
1184 return nonodes
1184 return nonodes
1185 lowestrev = min([self.rev(n) for n in roots])
1185 lowestrev = min([self.rev(n) for n in roots])
1186 else:
1186 else:
1187 roots = [self.nullid] # Everybody's a descendant of nullid
1187 roots = [self.nullid] # Everybody's a descendant of nullid
1188 lowestrev = nullrev
1188 lowestrev = nullrev
1189 if (lowestrev == nullrev) and (heads is None):
1189 if (lowestrev == nullrev) and (heads is None):
1190 # We want _all_ the nodes!
1190 # We want _all_ the nodes!
1191 return (
1191 return (
1192 [self.node(r) for r in self],
1192 [self.node(r) for r in self],
1193 [self.nullid],
1193 [self.nullid],
1194 list(self.heads()),
1194 list(self.heads()),
1195 )
1195 )
1196 if heads is None:
1196 if heads is None:
1197 # All nodes are ancestors, so the latest ancestor is the last
1197 # All nodes are ancestors, so the latest ancestor is the last
1198 # node.
1198 # node.
1199 highestrev = len(self) - 1
1199 highestrev = len(self) - 1
1200 # Set ancestors to None to signal that every node is an ancestor.
1200 # Set ancestors to None to signal that every node is an ancestor.
1201 ancestors = None
1201 ancestors = None
1202 # Set heads to an empty dictionary for later discovery of heads
1202 # Set heads to an empty dictionary for later discovery of heads
1203 heads = {}
1203 heads = {}
1204 else:
1204 else:
1205 heads = list(heads)
1205 heads = list(heads)
1206 if not heads:
1206 if not heads:
1207 return nonodes
1207 return nonodes
1208 ancestors = set()
1208 ancestors = set()
1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1210 # Also, later we will be using it to filter out the heads we can't
1210 # Also, later we will be using it to filter out the heads we can't
1211 # find from roots.
1211 # find from roots.
1212 heads = dict.fromkeys(heads, False)
1212 heads = dict.fromkeys(heads, False)
1213 # Start at the top and keep marking parents until we're done.
1213 # Start at the top and keep marking parents until we're done.
1214 nodestotag = set(heads)
1214 nodestotag = set(heads)
1215 # Remember where the top was so we can use it as a limit later.
1215 # Remember where the top was so we can use it as a limit later.
1216 highestrev = max([self.rev(n) for n in nodestotag])
1216 highestrev = max([self.rev(n) for n in nodestotag])
1217 while nodestotag:
1217 while nodestotag:
1218 # grab a node to tag
1218 # grab a node to tag
1219 n = nodestotag.pop()
1219 n = nodestotag.pop()
1220 # Never tag nullid
1220 # Never tag nullid
1221 if n == self.nullid:
1221 if n == self.nullid:
1222 continue
1222 continue
1223 # A node's revision number represents its place in a
1223 # A node's revision number represents its place in a
1224 # topologically sorted list of nodes.
1224 # topologically sorted list of nodes.
1225 r = self.rev(n)
1225 r = self.rev(n)
1226 if r >= lowestrev:
1226 if r >= lowestrev:
1227 if n not in ancestors:
1227 if n not in ancestors:
1228 # If we are possibly a descendant of one of the roots
1228 # If we are possibly a descendant of one of the roots
1229 # and we haven't already been marked as an ancestor
1229 # and we haven't already been marked as an ancestor
1230 ancestors.add(n) # Mark as ancestor
1230 ancestors.add(n) # Mark as ancestor
1231 # Add non-nullid parents to list of nodes to tag.
1231 # Add non-nullid parents to list of nodes to tag.
1232 nodestotag.update(
1232 nodestotag.update(
1233 [p for p in self.parents(n) if p != self.nullid]
1233 [p for p in self.parents(n) if p != self.nullid]
1234 )
1234 )
1235 elif n in heads: # We've seen it before, is it a fake head?
1235 elif n in heads: # We've seen it before, is it a fake head?
1236 # So it is, real heads should not be the ancestors of
1236 # So it is, real heads should not be the ancestors of
1237 # any other heads.
1237 # any other heads.
1238 heads.pop(n)
1238 heads.pop(n)
1239 if not ancestors:
1239 if not ancestors:
1240 return nonodes
1240 return nonodes
1241 # Now that we have our set of ancestors, we want to remove any
1241 # Now that we have our set of ancestors, we want to remove any
1242 # roots that are not ancestors.
1242 # roots that are not ancestors.
1243
1243
1244 # If one of the roots was nullid, everything is included anyway.
1244 # If one of the roots was nullid, everything is included anyway.
1245 if lowestrev > nullrev:
1245 if lowestrev > nullrev:
1246 # But, since we weren't, let's recompute the lowest rev to not
1246 # But, since we weren't, let's recompute the lowest rev to not
1247 # include roots that aren't ancestors.
1247 # include roots that aren't ancestors.
1248
1248
1249 # Filter out roots that aren't ancestors of heads
1249 # Filter out roots that aren't ancestors of heads
1250 roots = [root for root in roots if root in ancestors]
1250 roots = [root for root in roots if root in ancestors]
1251 # Recompute the lowest revision
1251 # Recompute the lowest revision
1252 if roots:
1252 if roots:
1253 lowestrev = min([self.rev(root) for root in roots])
1253 lowestrev = min([self.rev(root) for root in roots])
1254 else:
1254 else:
1255 # No more roots? Return empty list
1255 # No more roots? Return empty list
1256 return nonodes
1256 return nonodes
1257 else:
1257 else:
1258 # We are descending from nullid, and don't need to care about
1258 # We are descending from nullid, and don't need to care about
1259 # any other roots.
1259 # any other roots.
1260 lowestrev = nullrev
1260 lowestrev = nullrev
1261 roots = [self.nullid]
1261 roots = [self.nullid]
1262 # Transform our roots list into a set.
1262 # Transform our roots list into a set.
1263 descendants = set(roots)
1263 descendants = set(roots)
1264 # Also, keep the original roots so we can filter out roots that aren't
1264 # Also, keep the original roots so we can filter out roots that aren't
1265 # 'real' roots (i.e. are descended from other roots).
1265 # 'real' roots (i.e. are descended from other roots).
1266 roots = descendants.copy()
1266 roots = descendants.copy()
1267 # Our topologically sorted list of output nodes.
1267 # Our topologically sorted list of output nodes.
1268 orderedout = []
1268 orderedout = []
1269 # Don't start at nullid since we don't want nullid in our output list,
1269 # Don't start at nullid since we don't want nullid in our output list,
1270 # and if nullid shows up in descendants, empty parents will look like
1270 # and if nullid shows up in descendants, empty parents will look like
1271 # they're descendants.
1271 # they're descendants.
1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1273 n = self.node(r)
1273 n = self.node(r)
1274 isdescendant = False
1274 isdescendant = False
1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1276 isdescendant = True
1276 isdescendant = True
1277 elif n in descendants:
1277 elif n in descendants:
1278 # n is already a descendant
1278 # n is already a descendant
1279 isdescendant = True
1279 isdescendant = True
1280 # This check only needs to be done here because all the roots
1280 # This check only needs to be done here because all the roots
1281 # will start being marked is descendants before the loop.
1281 # will start being marked is descendants before the loop.
1282 if n in roots:
1282 if n in roots:
1283 # If n was a root, check if it's a 'real' root.
1283 # If n was a root, check if it's a 'real' root.
1284 p = tuple(self.parents(n))
1284 p = tuple(self.parents(n))
1285 # If any of its parents are descendants, it's not a root.
1285 # If any of its parents are descendants, it's not a root.
1286 if (p[0] in descendants) or (p[1] in descendants):
1286 if (p[0] in descendants) or (p[1] in descendants):
1287 roots.remove(n)
1287 roots.remove(n)
1288 else:
1288 else:
1289 p = tuple(self.parents(n))
1289 p = tuple(self.parents(n))
1290 # A node is a descendant if either of its parents are
1290 # A node is a descendant if either of its parents are
1291 # descendants. (We seeded the dependents list with the roots
1291 # descendants. (We seeded the dependents list with the roots
1292 # up there, remember?)
1292 # up there, remember?)
1293 if (p[0] in descendants) or (p[1] in descendants):
1293 if (p[0] in descendants) or (p[1] in descendants):
1294 descendants.add(n)
1294 descendants.add(n)
1295 isdescendant = True
1295 isdescendant = True
1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1297 # Only include nodes that are both descendants and ancestors.
1297 # Only include nodes that are both descendants and ancestors.
1298 orderedout.append(n)
1298 orderedout.append(n)
1299 if (ancestors is not None) and (n in heads):
1299 if (ancestors is not None) and (n in heads):
1300 # We're trying to figure out which heads are reachable
1300 # We're trying to figure out which heads are reachable
1301 # from roots.
1301 # from roots.
1302 # Mark this head as having been reached
1302 # Mark this head as having been reached
1303 heads[n] = True
1303 heads[n] = True
1304 elif ancestors is None:
1304 elif ancestors is None:
1305 # Otherwise, we're trying to discover the heads.
1305 # Otherwise, we're trying to discover the heads.
1306 # Assume this is a head because if it isn't, the next step
1306 # Assume this is a head because if it isn't, the next step
1307 # will eventually remove it.
1307 # will eventually remove it.
1308 heads[n] = True
1308 heads[n] = True
1309 # But, obviously its parents aren't.
1309 # But, obviously its parents aren't.
1310 for p in self.parents(n):
1310 for p in self.parents(n):
1311 heads.pop(p, None)
1311 heads.pop(p, None)
1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1313 roots = list(roots)
1313 roots = list(roots)
1314 assert orderedout
1314 assert orderedout
1315 assert roots
1315 assert roots
1316 assert heads
1316 assert heads
1317 return (orderedout, roots, heads)
1317 return (orderedout, roots, heads)
1318
1318
1319 def headrevs(self, revs=None):
1319 def headrevs(self, revs=None):
1320 if revs is None:
1320 if revs is None:
1321 try:
1321 try:
1322 return self.index.headrevs()
1322 return self.index.headrevs()
1323 except AttributeError:
1323 except AttributeError:
1324 return self._headrevs()
1324 return self._headrevs()
1325 if rustdagop is not None and self.index.rust_ext_compat:
1325 if rustdagop is not None and self.index.rust_ext_compat:
1326 return rustdagop.headrevs(self.index, revs)
1326 return rustdagop.headrevs(self.index, revs)
1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1328
1328
1329 def computephases(self, roots):
1329 def computephases(self, roots):
1330 return self.index.computephasesmapsets(roots)
1330 return self.index.computephasesmapsets(roots)
1331
1331
1332 def _headrevs(self):
1332 def _headrevs(self):
1333 count = len(self)
1333 count = len(self)
1334 if not count:
1334 if not count:
1335 return [nullrev]
1335 return [nullrev]
1336 # we won't iter over filtered rev so nobody is a head at start
1336 # we won't iter over filtered rev so nobody is a head at start
1337 ishead = [0] * (count + 1)
1337 ishead = [0] * (count + 1)
1338 index = self.index
1338 index = self.index
1339 for r in self:
1339 for r in self:
1340 ishead[r] = 1 # I may be an head
1340 ishead[r] = 1 # I may be an head
1341 e = index[r]
1341 e = index[r]
1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1343 return [r for r, val in enumerate(ishead) if val]
1343 return [r for r, val in enumerate(ishead) if val]
1344
1344
1345 def heads(self, start=None, stop=None):
1345 def heads(self, start=None, stop=None):
1346 """return the list of all nodes that have no children
1346 """return the list of all nodes that have no children
1347
1347
1348 if start is specified, only heads that are descendants of
1348 if start is specified, only heads that are descendants of
1349 start will be returned
1349 start will be returned
1350 if stop is specified, it will consider all the revs from stop
1350 if stop is specified, it will consider all the revs from stop
1351 as if they had no children
1351 as if they had no children
1352 """
1352 """
1353 if start is None and stop is None:
1353 if start is None and stop is None:
1354 if not len(self):
1354 if not len(self):
1355 return [self.nullid]
1355 return [self.nullid]
1356 return [self.node(r) for r in self.headrevs()]
1356 return [self.node(r) for r in self.headrevs()]
1357
1357
1358 if start is None:
1358 if start is None:
1359 start = nullrev
1359 start = nullrev
1360 else:
1360 else:
1361 start = self.rev(start)
1361 start = self.rev(start)
1362
1362
1363 stoprevs = {self.rev(n) for n in stop or []}
1363 stoprevs = {self.rev(n) for n in stop or []}
1364
1364
1365 revs = dagop.headrevssubset(
1365 revs = dagop.headrevssubset(
1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1367 )
1367 )
1368
1368
1369 return [self.node(rev) for rev in revs]
1369 return [self.node(rev) for rev in revs]
1370
1370
1371 def children(self, node):
1371 def children(self, node):
1372 """find the children of a given node"""
1372 """find the children of a given node"""
1373 c = []
1373 c = []
1374 p = self.rev(node)
1374 p = self.rev(node)
1375 for r in self.revs(start=p + 1):
1375 for r in self.revs(start=p + 1):
1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1377 if prevs:
1377 if prevs:
1378 for pr in prevs:
1378 for pr in prevs:
1379 if pr == p:
1379 if pr == p:
1380 c.append(self.node(r))
1380 c.append(self.node(r))
1381 elif p == nullrev:
1381 elif p == nullrev:
1382 c.append(self.node(r))
1382 c.append(self.node(r))
1383 return c
1383 return c
1384
1384
1385 def commonancestorsheads(self, a, b):
1385 def commonancestorsheads(self, a, b):
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1387 a, b = self.rev(a), self.rev(b)
1387 a, b = self.rev(a), self.rev(b)
1388 ancs = self._commonancestorsheads(a, b)
1388 ancs = self._commonancestorsheads(a, b)
1389 return pycompat.maplist(self.node, ancs)
1389 return pycompat.maplist(self.node, ancs)
1390
1390
1391 def _commonancestorsheads(self, *revs):
1391 def _commonancestorsheads(self, *revs):
1392 """calculate all the heads of the common ancestors of revs"""
1392 """calculate all the heads of the common ancestors of revs"""
1393 try:
1393 try:
1394 ancs = self.index.commonancestorsheads(*revs)
1394 ancs = self.index.commonancestorsheads(*revs)
1395 except (AttributeError, OverflowError): # C implementation failed
1395 except (AttributeError, OverflowError): # C implementation failed
1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1397 return ancs
1397 return ancs
1398
1398
1399 def isancestor(self, a, b):
1399 def isancestor(self, a, b):
1400 """return True if node a is an ancestor of node b
1400 """return True if node a is an ancestor of node b
1401
1401
1402 A revision is considered an ancestor of itself."""
1402 A revision is considered an ancestor of itself."""
1403 a, b = self.rev(a), self.rev(b)
1403 a, b = self.rev(a), self.rev(b)
1404 return self.isancestorrev(a, b)
1404 return self.isancestorrev(a, b)
1405
1405
1406 def isancestorrev(self, a, b):
1406 def isancestorrev(self, a, b):
1407 """return True if revision a is an ancestor of revision b
1407 """return True if revision a is an ancestor of revision b
1408
1408
1409 A revision is considered an ancestor of itself.
1409 A revision is considered an ancestor of itself.
1410
1410
1411 The implementation of this is trivial but the use of
1411 The implementation of this is trivial but the use of
1412 reachableroots is not."""
1412 reachableroots is not."""
1413 if a == nullrev:
1413 if a == nullrev:
1414 return True
1414 return True
1415 elif a == b:
1415 elif a == b:
1416 return True
1416 return True
1417 elif a > b:
1417 elif a > b:
1418 return False
1418 return False
1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1420
1420
1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1422 """return (heads(::(<roots> and <roots>::<heads>)))
1422 """return (heads(::(<roots> and <roots>::<heads>)))
1423
1423
1424 If includepath is True, return (<roots>::<heads>)."""
1424 If includepath is True, return (<roots>::<heads>)."""
1425 try:
1425 try:
1426 return self.index.reachableroots2(
1426 return self.index.reachableroots2(
1427 minroot, heads, roots, includepath
1427 minroot, heads, roots, includepath
1428 )
1428 )
1429 except AttributeError:
1429 except AttributeError:
1430 return dagop._reachablerootspure(
1430 return dagop._reachablerootspure(
1431 self.parentrevs, minroot, roots, heads, includepath
1431 self.parentrevs, minroot, roots, heads, includepath
1432 )
1432 )
1433
1433
1434 def ancestor(self, a, b):
1434 def ancestor(self, a, b):
1435 """calculate the "best" common ancestor of nodes a and b"""
1435 """calculate the "best" common ancestor of nodes a and b"""
1436
1436
1437 a, b = self.rev(a), self.rev(b)
1437 a, b = self.rev(a), self.rev(b)
1438 try:
1438 try:
1439 ancs = self.index.ancestors(a, b)
1439 ancs = self.index.ancestors(a, b)
1440 except (AttributeError, OverflowError):
1440 except (AttributeError, OverflowError):
1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1442 if ancs:
1442 if ancs:
1443 # choose a consistent winner when there's a tie
1443 # choose a consistent winner when there's a tie
1444 return min(map(self.node, ancs))
1444 return min(map(self.node, ancs))
1445 return self.nullid
1445 return self.nullid
1446
1446
1447 def _match(self, id):
1447 def _match(self, id):
1448 if isinstance(id, int):
1448 if isinstance(id, int):
1449 # rev
1449 # rev
1450 return self.node(id)
1450 return self.node(id)
1451 if len(id) == self.nodeconstants.nodelen:
1451 if len(id) == self.nodeconstants.nodelen:
1452 # possibly a binary node
1452 # possibly a binary node
1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1454 try:
1454 try:
1455 node = id
1455 node = id
1456 self.rev(node) # quick search the index
1456 self.rev(node) # quick search the index
1457 return node
1457 return node
1458 except error.LookupError:
1458 except error.LookupError:
1459 pass # may be partial hex id
1459 pass # may be partial hex id
1460 try:
1460 try:
1461 # str(rev)
1461 # str(rev)
1462 rev = int(id)
1462 rev = int(id)
1463 if b"%d" % rev != id:
1463 if b"%d" % rev != id:
1464 raise ValueError
1464 raise ValueError
1465 if rev < 0:
1465 if rev < 0:
1466 rev = len(self) + rev
1466 rev = len(self) + rev
1467 if rev < 0 or rev >= len(self):
1467 if rev < 0 or rev >= len(self):
1468 raise ValueError
1468 raise ValueError
1469 return self.node(rev)
1469 return self.node(rev)
1470 except (ValueError, OverflowError):
1470 except (ValueError, OverflowError):
1471 pass
1471 pass
1472 if len(id) == 2 * self.nodeconstants.nodelen:
1472 if len(id) == 2 * self.nodeconstants.nodelen:
1473 try:
1473 try:
1474 # a full hex nodeid?
1474 # a full hex nodeid?
1475 node = bin(id)
1475 node = bin(id)
1476 self.rev(node)
1476 self.rev(node)
1477 return node
1477 return node
1478 except (TypeError, error.LookupError):
1478 except (TypeError, error.LookupError):
1479 pass
1479 pass
1480
1480
1481 def _partialmatch(self, id):
1481 def _partialmatch(self, id):
1482 # we don't care wdirfilenodeids as they should be always full hash
1482 # we don't care wdirfilenodeids as they should be always full hash
1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1484 ambiguous = False
1484 ambiguous = False
1485 try:
1485 try:
1486 partial = self.index.partialmatch(id)
1486 partial = self.index.partialmatch(id)
1487 if partial and self.hasnode(partial):
1487 if partial and self.hasnode(partial):
1488 if maybewdir:
1488 if maybewdir:
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1490 ambiguous = True
1490 ambiguous = True
1491 else:
1491 else:
1492 return partial
1492 return partial
1493 elif maybewdir:
1493 elif maybewdir:
1494 # no 'ff...' match in radix tree, wdir identified
1494 # no 'ff...' match in radix tree, wdir identified
1495 raise error.WdirUnsupported
1495 raise error.WdirUnsupported
1496 else:
1496 else:
1497 return None
1497 return None
1498 except error.RevlogError:
1498 except error.RevlogError:
1499 # parsers.c radix tree lookup gave multiple matches
1499 # parsers.c radix tree lookup gave multiple matches
1500 # fast path: for unfiltered changelog, radix tree is accurate
1500 # fast path: for unfiltered changelog, radix tree is accurate
1501 if not getattr(self, 'filteredrevs', None):
1501 if not getattr(self, 'filteredrevs', None):
1502 ambiguous = True
1502 ambiguous = True
1503 # fall through to slow path that filters hidden revisions
1503 # fall through to slow path that filters hidden revisions
1504 except (AttributeError, ValueError):
1504 except (AttributeError, ValueError):
1505 # we are pure python, or key was too short to search radix tree
1505 # we are pure python, or key was too short to search radix tree
1506 pass
1506 pass
1507 if ambiguous:
1507 if ambiguous:
1508 raise error.AmbiguousPrefixLookupError(
1508 raise error.AmbiguousPrefixLookupError(
1509 id, self.display_id, _(b'ambiguous identifier')
1509 id, self.display_id, _(b'ambiguous identifier')
1510 )
1510 )
1511
1511
1512 if id in self._pcache:
1512 if id in self._pcache:
1513 return self._pcache[id]
1513 return self._pcache[id]
1514
1514
1515 if len(id) <= 40:
1515 if len(id) <= 40:
1516 try:
1516 try:
1517 # hex(node)[:...]
1517 # hex(node)[:...]
1518 l = len(id) // 2 # grab an even number of digits
1518 l = len(id) // 2 # grab an even number of digits
1519 prefix = bin(id[: l * 2])
1519 prefix = bin(id[: l * 2])
1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1521 nl = [
1521 nl = [
1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1523 ]
1523 ]
1524 if self.nodeconstants.nullhex.startswith(id):
1524 if self.nodeconstants.nullhex.startswith(id):
1525 nl.append(self.nullid)
1525 nl.append(self.nullid)
1526 if len(nl) > 0:
1526 if len(nl) > 0:
1527 if len(nl) == 1 and not maybewdir:
1527 if len(nl) == 1 and not maybewdir:
1528 self._pcache[id] = nl[0]
1528 self._pcache[id] = nl[0]
1529 return nl[0]
1529 return nl[0]
1530 raise error.AmbiguousPrefixLookupError(
1530 raise error.AmbiguousPrefixLookupError(
1531 id, self.display_id, _(b'ambiguous identifier')
1531 id, self.display_id, _(b'ambiguous identifier')
1532 )
1532 )
1533 if maybewdir:
1533 if maybewdir:
1534 raise error.WdirUnsupported
1534 raise error.WdirUnsupported
1535 return None
1535 return None
1536 except TypeError:
1536 except TypeError:
1537 pass
1537 pass
1538
1538
1539 def lookup(self, id):
1539 def lookup(self, id):
1540 """locate a node based on:
1540 """locate a node based on:
1541 - revision number or str(revision number)
1541 - revision number or str(revision number)
1542 - nodeid or subset of hex nodeid
1542 - nodeid or subset of hex nodeid
1543 """
1543 """
1544 n = self._match(id)
1544 n = self._match(id)
1545 if n is not None:
1545 if n is not None:
1546 return n
1546 return n
1547 n = self._partialmatch(id)
1547 n = self._partialmatch(id)
1548 if n:
1548 if n:
1549 return n
1549 return n
1550
1550
1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1552
1552
1553 def shortest(self, node, minlength=1):
1553 def shortest(self, node, minlength=1):
1554 """Find the shortest unambiguous prefix that matches node."""
1554 """Find the shortest unambiguous prefix that matches node."""
1555
1555
1556 def isvalid(prefix):
1556 def isvalid(prefix):
1557 try:
1557 try:
1558 matchednode = self._partialmatch(prefix)
1558 matchednode = self._partialmatch(prefix)
1559 except error.AmbiguousPrefixLookupError:
1559 except error.AmbiguousPrefixLookupError:
1560 return False
1560 return False
1561 except error.WdirUnsupported:
1561 except error.WdirUnsupported:
1562 # single 'ff...' match
1562 # single 'ff...' match
1563 return True
1563 return True
1564 if matchednode is None:
1564 if matchednode is None:
1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1566 return True
1566 return True
1567
1567
1568 def maybewdir(prefix):
1568 def maybewdir(prefix):
1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1570
1570
1571 hexnode = hex(node)
1571 hexnode = hex(node)
1572
1572
1573 def disambiguate(hexnode, minlength):
1573 def disambiguate(hexnode, minlength):
1574 """Disambiguate against wdirid."""
1574 """Disambiguate against wdirid."""
1575 for length in range(minlength, len(hexnode) + 1):
1575 for length in range(minlength, len(hexnode) + 1):
1576 prefix = hexnode[:length]
1576 prefix = hexnode[:length]
1577 if not maybewdir(prefix):
1577 if not maybewdir(prefix):
1578 return prefix
1578 return prefix
1579
1579
1580 if not getattr(self, 'filteredrevs', None):
1580 if not getattr(self, 'filteredrevs', None):
1581 try:
1581 try:
1582 length = max(self.index.shortest(node), minlength)
1582 length = max(self.index.shortest(node), minlength)
1583 return disambiguate(hexnode, length)
1583 return disambiguate(hexnode, length)
1584 except error.RevlogError:
1584 except error.RevlogError:
1585 if node != self.nodeconstants.wdirid:
1585 if node != self.nodeconstants.wdirid:
1586 raise error.LookupError(
1586 raise error.LookupError(
1587 node, self.display_id, _(b'no node')
1587 node, self.display_id, _(b'no node')
1588 )
1588 )
1589 except AttributeError:
1589 except AttributeError:
1590 # Fall through to pure code
1590 # Fall through to pure code
1591 pass
1591 pass
1592
1592
1593 if node == self.nodeconstants.wdirid:
1593 if node == self.nodeconstants.wdirid:
1594 for length in range(minlength, len(hexnode) + 1):
1594 for length in range(minlength, len(hexnode) + 1):
1595 prefix = hexnode[:length]
1595 prefix = hexnode[:length]
1596 if isvalid(prefix):
1596 if isvalid(prefix):
1597 return prefix
1597 return prefix
1598
1598
1599 for length in range(minlength, len(hexnode) + 1):
1599 for length in range(minlength, len(hexnode) + 1):
1600 prefix = hexnode[:length]
1600 prefix = hexnode[:length]
1601 if isvalid(prefix):
1601 if isvalid(prefix):
1602 return disambiguate(hexnode, length)
1602 return disambiguate(hexnode, length)
1603
1603
1604 def cmp(self, node, text):
1604 def cmp(self, node, text):
1605 """compare text with a given file revision
1605 """compare text with a given file revision
1606
1606
1607 returns True if text is different than what is stored.
1607 returns True if text is different than what is stored.
1608 """
1608 """
1609 p1, p2 = self.parents(node)
1609 p1, p2 = self.parents(node)
1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1611
1611
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1614
1614
1615 Accepts the start and end revisions and an optional already-open
1615 Accepts the start and end revisions and an optional already-open
1616 file handle to be used for reading. If the file handle is read, its
1616 file handle to be used for reading. If the file handle is read, its
1617 seek position will not be preserved.
1617 seek position will not be preserved.
1618
1618
1619 Requests for data may be satisfied by a cache.
1619 Requests for data may be satisfied by a cache.
1620
1620
1621 Returns a 2-tuple of (offset, data) for the requested range of
1621 Returns a 2-tuple of (offset, data) for the requested range of
1622 revisions. Offset is the integer offset from the beginning of the
1622 revisions. Offset is the integer offset from the beginning of the
1623 revlog and data is a str or buffer of the raw byte data.
1623 revlog and data is a str or buffer of the raw byte data.
1624
1624
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 to determine where each revision's data begins and ends.
1626 to determine where each revision's data begins and ends.
1627 """
1627 """
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 # (functions are expensive).
1629 # (functions are expensive).
1630 index = self.index
1630 index = self.index
1631 istart = index[startrev]
1631 istart = index[startrev]
1632 start = int(istart[0] >> 16)
1632 start = int(istart[0] >> 16)
1633 if startrev == endrev:
1633 if startrev == endrev:
1634 end = start + istart[1]
1634 end = start + istart[1]
1635 else:
1635 else:
1636 iend = index[endrev]
1636 iend = index[endrev]
1637 end = int(iend[0] >> 16) + iend[1]
1637 end = int(iend[0] >> 16) + iend[1]
1638
1638
1639 if self._inline:
1639 if self._inline:
1640 start += (startrev + 1) * self.index.entry_size
1640 start += (startrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1642 length = end - start
1642 length = end - start
1643
1643
1644 return start, self._segmentfile.read_chunk(start, length, df)
1644 return start, self._segmentfile.read_chunk(start, length, df)
1645
1645
1646 def _chunk(self, rev, df=None):
1646 def _chunk(self, rev, df=None):
1647 """Obtain a single decompressed chunk for a revision.
1647 """Obtain a single decompressed chunk for a revision.
1648
1648
1649 Accepts an integer revision and an optional already-open file handle
1649 Accepts an integer revision and an optional already-open file handle
1650 to be used for reading. If used, the seek position of the file will not
1650 to be used for reading. If used, the seek position of the file will not
1651 be preserved.
1651 be preserved.
1652
1652
1653 Returns a str holding uncompressed data for the requested revision.
1653 Returns a str holding uncompressed data for the requested revision.
1654 """
1654 """
1655 compression_mode = self.index[rev][10]
1655 compression_mode = self.index[rev][10]
1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1657 if compression_mode == COMP_MODE_PLAIN:
1657 if compression_mode == COMP_MODE_PLAIN:
1658 return data
1658 return data
1659 elif compression_mode == COMP_MODE_DEFAULT:
1659 elif compression_mode == COMP_MODE_DEFAULT:
1660 return self._decompressor(data)
1660 return self._decompressor(data)
1661 elif compression_mode == COMP_MODE_INLINE:
1661 elif compression_mode == COMP_MODE_INLINE:
1662 return self.decompress(data)
1662 return self.decompress(data)
1663 else:
1663 else:
1664 msg = b'unknown compression mode %d'
1664 msg = b'unknown compression mode %d'
1665 msg %= compression_mode
1665 msg %= compression_mode
1666 raise error.RevlogError(msg)
1666 raise error.RevlogError(msg)
1667
1667
1668 def _chunks(self, revs, df=None, targetsize=None):
1668 def _chunks(self, revs, df=None, targetsize=None):
1669 """Obtain decompressed chunks for the specified revisions.
1669 """Obtain decompressed chunks for the specified revisions.
1670
1670
1671 Accepts an iterable of numeric revisions that are assumed to be in
1671 Accepts an iterable of numeric revisions that are assumed to be in
1672 ascending order. Also accepts an optional already-open file handle
1672 ascending order. Also accepts an optional already-open file handle
1673 to be used for reading. If used, the seek position of the file will
1673 to be used for reading. If used, the seek position of the file will
1674 not be preserved.
1674 not be preserved.
1675
1675
1676 This function is similar to calling ``self._chunk()`` multiple times,
1676 This function is similar to calling ``self._chunk()`` multiple times,
1677 but is faster.
1677 but is faster.
1678
1678
1679 Returns a list with decompressed data for each requested revision.
1679 Returns a list with decompressed data for each requested revision.
1680 """
1680 """
1681 if not revs:
1681 if not revs:
1682 return []
1682 return []
1683 start = self.start
1683 start = self.start
1684 length = self.length
1684 length = self.length
1685 inline = self._inline
1685 inline = self._inline
1686 iosize = self.index.entry_size
1686 iosize = self.index.entry_size
1687 buffer = util.buffer
1687 buffer = util.buffer
1688
1688
1689 l = []
1689 l = []
1690 ladd = l.append
1690 ladd = l.append
1691
1691
1692 if not self._withsparseread:
1692 if not self._withsparseread:
1693 slicedchunks = (revs,)
1693 slicedchunks = (revs,)
1694 else:
1694 else:
1695 slicedchunks = deltautil.slicechunk(
1695 slicedchunks = deltautil.slicechunk(
1696 self, revs, targetsize=targetsize
1696 self, revs, targetsize=targetsize
1697 )
1697 )
1698
1698
1699 for revschunk in slicedchunks:
1699 for revschunk in slicedchunks:
1700 firstrev = revschunk[0]
1700 firstrev = revschunk[0]
1701 # Skip trailing revisions with empty diff
1701 # Skip trailing revisions with empty diff
1702 for lastrev in revschunk[::-1]:
1702 for lastrev in revschunk[::-1]:
1703 if length(lastrev) != 0:
1703 if length(lastrev) != 0:
1704 break
1704 break
1705
1705
1706 try:
1706 try:
1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1708 except OverflowError:
1708 except OverflowError:
1709 # issue4215 - we can't cache a run of chunks greater than
1709 # issue4215 - we can't cache a run of chunks greater than
1710 # 2G on Windows
1710 # 2G on Windows
1711 return [self._chunk(rev, df=df) for rev in revschunk]
1711 return [self._chunk(rev, df=df) for rev in revschunk]
1712
1712
1713 decomp = self.decompress
1713 decomp = self.decompress
1714 # self._decompressor might be None, but will not be used in that case
1714 # self._decompressor might be None, but will not be used in that case
1715 def_decomp = self._decompressor
1715 def_decomp = self._decompressor
1716 for rev in revschunk:
1716 for rev in revschunk:
1717 chunkstart = start(rev)
1717 chunkstart = start(rev)
1718 if inline:
1718 if inline:
1719 chunkstart += (rev + 1) * iosize
1719 chunkstart += (rev + 1) * iosize
1720 chunklength = length(rev)
1720 chunklength = length(rev)
1721 comp_mode = self.index[rev][10]
1721 comp_mode = self.index[rev][10]
1722 c = buffer(data, chunkstart - offset, chunklength)
1722 c = buffer(data, chunkstart - offset, chunklength)
1723 if comp_mode == COMP_MODE_PLAIN:
1723 if comp_mode == COMP_MODE_PLAIN:
1724 ladd(c)
1724 ladd(c)
1725 elif comp_mode == COMP_MODE_INLINE:
1725 elif comp_mode == COMP_MODE_INLINE:
1726 ladd(decomp(c))
1726 ladd(decomp(c))
1727 elif comp_mode == COMP_MODE_DEFAULT:
1727 elif comp_mode == COMP_MODE_DEFAULT:
1728 ladd(def_decomp(c))
1728 ladd(def_decomp(c))
1729 else:
1729 else:
1730 msg = b'unknown compression mode %d'
1730 msg = b'unknown compression mode %d'
1731 msg %= comp_mode
1731 msg %= comp_mode
1732 raise error.RevlogError(msg)
1732 raise error.RevlogError(msg)
1733
1733
1734 return l
1734 return l
1735
1735
1736 def deltaparent(self, rev):
1736 def deltaparent(self, rev):
1737 """return deltaparent of the given revision"""
1737 """return deltaparent of the given revision"""
1738 base = self.index[rev][3]
1738 base = self.index[rev][3]
1739 if base == rev:
1739 if base == rev:
1740 return nullrev
1740 return nullrev
1741 elif self._generaldelta:
1741 elif self._generaldelta:
1742 return base
1742 return base
1743 else:
1743 else:
1744 return rev - 1
1744 return rev - 1
1745
1745
1746 def issnapshot(self, rev):
1746 def issnapshot(self, rev):
1747 """tells whether rev is a snapshot"""
1747 """tells whether rev is a snapshot"""
1748 if not self._sparserevlog:
1748 if not self._sparserevlog:
1749 return self.deltaparent(rev) == nullrev
1749 return self.deltaparent(rev) == nullrev
1750 elif util.safehasattr(self.index, b'issnapshot'):
1750 elif util.safehasattr(self.index, b'issnapshot'):
1751 # directly assign the method to cache the testing and access
1751 # directly assign the method to cache the testing and access
1752 self.issnapshot = self.index.issnapshot
1752 self.issnapshot = self.index.issnapshot
1753 return self.issnapshot(rev)
1753 return self.issnapshot(rev)
1754 if rev == nullrev:
1754 if rev == nullrev:
1755 return True
1755 return True
1756 entry = self.index[rev]
1756 entry = self.index[rev]
1757 base = entry[3]
1757 base = entry[3]
1758 if base == rev:
1758 if base == rev:
1759 return True
1759 return True
1760 if base == nullrev:
1760 if base == nullrev:
1761 return True
1761 return True
1762 p1 = entry[5]
1762 p1 = entry[5]
1763 p2 = entry[6]
1763 p2 = entry[6]
1764 if base == p1 or base == p2:
1764 if base == p1 or base == p2:
1765 return False
1765 return False
1766 return self.issnapshot(base)
1766 return self.issnapshot(base)
1767
1767
1768 def snapshotdepth(self, rev):
1768 def snapshotdepth(self, rev):
1769 """number of snapshot in the chain before this one"""
1769 """number of snapshot in the chain before this one"""
1770 if not self.issnapshot(rev):
1770 if not self.issnapshot(rev):
1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1772 return len(self._deltachain(rev)[0]) - 1
1772 return len(self._deltachain(rev)[0]) - 1
1773
1773
1774 def revdiff(self, rev1, rev2):
1774 def revdiff(self, rev1, rev2):
1775 """return or calculate a delta between two revisions
1775 """return or calculate a delta between two revisions
1776
1776
1777 The delta calculated is in binary form and is intended to be written to
1777 The delta calculated is in binary form and is intended to be written to
1778 revlog data directly. So this function needs raw revision data.
1778 revlog data directly. So this function needs raw revision data.
1779 """
1779 """
1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1781 return bytes(self._chunk(rev2))
1781 return bytes(self._chunk(rev2))
1782
1782
1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1784
1784
1785 def _processflags(self, text, flags, operation, raw=False):
1785 def _processflags(self, text, flags, operation, raw=False):
1786 """deprecated entry point to access flag processors"""
1786 """deprecated entry point to access flag processors"""
1787 msg = b'_processflag(...) use the specialized variant'
1787 msg = b'_processflag(...) use the specialized variant'
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1789 if raw:
1789 if raw:
1790 return text, flagutil.processflagsraw(self, text, flags)
1790 return text, flagutil.processflagsraw(self, text, flags)
1791 elif operation == b'read':
1791 elif operation == b'read':
1792 return flagutil.processflagsread(self, text, flags)
1792 return flagutil.processflagsread(self, text, flags)
1793 else: # write operation
1793 else: # write operation
1794 return flagutil.processflagswrite(self, text, flags)
1794 return flagutil.processflagswrite(self, text, flags)
1795
1795
1796 def revision(self, nodeorrev, _df=None, raw=False):
1796 def revision(self, nodeorrev, _df=None, raw=False):
1797 """return an uncompressed revision of a given node or revision
1797 """return an uncompressed revision of a given node or revision
1798 number.
1798 number.
1799
1799
1800 _df - an existing file handle to read from. (internal-only)
1800 _df - an existing file handle to read from. (internal-only)
1801 raw - an optional argument specifying if the revision data is to be
1801 raw - an optional argument specifying if the revision data is to be
1802 treated as raw data when applying flag transforms. 'raw' should be set
1802 treated as raw data when applying flag transforms. 'raw' should be set
1803 to True when generating changegroups or in debug commands.
1803 to True when generating changegroups or in debug commands.
1804 """
1804 """
1805 if raw:
1805 if raw:
1806 msg = (
1806 msg = (
1807 b'revlog.revision(..., raw=True) is deprecated, '
1807 b'revlog.revision(..., raw=True) is deprecated, '
1808 b'use revlog.rawdata(...)'
1808 b'use revlog.rawdata(...)'
1809 )
1809 )
1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1812
1812
1813 def sidedata(self, nodeorrev, _df=None):
1813 def sidedata(self, nodeorrev, _df=None):
1814 """a map of extra data related to the changeset but not part of the hash
1814 """a map of extra data related to the changeset but not part of the hash
1815
1815
1816 This function currently return a dictionary. However, more advanced
1816 This function currently return a dictionary. However, more advanced
1817 mapping object will likely be used in the future for a more
1817 mapping object will likely be used in the future for a more
1818 efficient/lazy code.
1818 efficient/lazy code.
1819 """
1819 """
1820 # deal with <nodeorrev> argument type
1820 # deal with <nodeorrev> argument type
1821 if isinstance(nodeorrev, int):
1821 if isinstance(nodeorrev, int):
1822 rev = nodeorrev
1822 rev = nodeorrev
1823 else:
1823 else:
1824 rev = self.rev(nodeorrev)
1824 rev = self.rev(nodeorrev)
1825 return self._sidedata(rev)
1825 return self._sidedata(rev)
1826
1826
1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1828 # deal with <nodeorrev> argument type
1828 # deal with <nodeorrev> argument type
1829 if isinstance(nodeorrev, int):
1829 if isinstance(nodeorrev, int):
1830 rev = nodeorrev
1830 rev = nodeorrev
1831 node = self.node(rev)
1831 node = self.node(rev)
1832 else:
1832 else:
1833 node = nodeorrev
1833 node = nodeorrev
1834 rev = None
1834 rev = None
1835
1835
1836 # fast path the special `nullid` rev
1836 # fast path the special `nullid` rev
1837 if node == self.nullid:
1837 if node == self.nullid:
1838 return b""
1838 return b""
1839
1839
1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1841 # revision or might need to be processed to retrieve the revision.
1841 # revision or might need to be processed to retrieve the revision.
1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1843
1843
1844 if raw and validated:
1844 if raw and validated:
1845 # if we don't want to process the raw text and that raw
1845 # if we don't want to process the raw text and that raw
1846 # text is cached, we can exit early.
1846 # text is cached, we can exit early.
1847 return rawtext
1847 return rawtext
1848 if rev is None:
1848 if rev is None:
1849 rev = self.rev(node)
1849 rev = self.rev(node)
1850 # the revlog's flag for this revision
1850 # the revlog's flag for this revision
1851 # (usually alter its state or content)
1851 # (usually alter its state or content)
1852 flags = self.flags(rev)
1852 flags = self.flags(rev)
1853
1853
1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1855 # no extra flags set, no flag processor runs, text = rawtext
1855 # no extra flags set, no flag processor runs, text = rawtext
1856 return rawtext
1856 return rawtext
1857
1857
1858 if raw:
1858 if raw:
1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1860 text = rawtext
1860 text = rawtext
1861 else:
1861 else:
1862 r = flagutil.processflagsread(self, rawtext, flags)
1862 r = flagutil.processflagsread(self, rawtext, flags)
1863 text, validatehash = r
1863 text, validatehash = r
1864 if validatehash:
1864 if validatehash:
1865 self.checkhash(text, node, rev=rev)
1865 self.checkhash(text, node, rev=rev)
1866 if not validated:
1866 if not validated:
1867 self._revisioncache = (node, rev, rawtext)
1867 self._revisioncache = (node, rev, rawtext)
1868
1868
1869 return text
1869 return text
1870
1870
1871 def _rawtext(self, node, rev, _df=None):
1871 def _rawtext(self, node, rev, _df=None):
1872 """return the possibly unvalidated rawtext for a revision
1872 """return the possibly unvalidated rawtext for a revision
1873
1873
1874 returns (rev, rawtext, validated)
1874 returns (rev, rawtext, validated)
1875 """
1875 """
1876
1876
1877 # revision in the cache (could be useful to apply delta)
1877 # revision in the cache (could be useful to apply delta)
1878 cachedrev = None
1878 cachedrev = None
1879 # An intermediate text to apply deltas to
1879 # An intermediate text to apply deltas to
1880 basetext = None
1880 basetext = None
1881
1881
1882 # Check if we have the entry in cache
1882 # Check if we have the entry in cache
1883 # The cache entry looks like (node, rev, rawtext)
1883 # The cache entry looks like (node, rev, rawtext)
1884 if self._revisioncache:
1884 if self._revisioncache:
1885 if self._revisioncache[0] == node:
1885 if self._revisioncache[0] == node:
1886 return (rev, self._revisioncache[2], True)
1886 return (rev, self._revisioncache[2], True)
1887 cachedrev = self._revisioncache[1]
1887 cachedrev = self._revisioncache[1]
1888
1888
1889 if rev is None:
1889 if rev is None:
1890 rev = self.rev(node)
1890 rev = self.rev(node)
1891
1891
1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1893 if stopped:
1893 if stopped:
1894 basetext = self._revisioncache[2]
1894 basetext = self._revisioncache[2]
1895
1895
1896 # drop cache to save memory, the caller is expected to
1896 # drop cache to save memory, the caller is expected to
1897 # update self._revisioncache after validating the text
1897 # update self._revisioncache after validating the text
1898 self._revisioncache = None
1898 self._revisioncache = None
1899
1899
1900 targetsize = None
1900 targetsize = None
1901 rawsize = self.index[rev][2]
1901 rawsize = self.index[rev][2]
1902 if 0 <= rawsize:
1902 if 0 <= rawsize:
1903 targetsize = 4 * rawsize
1903 targetsize = 4 * rawsize
1904
1904
1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1906 if basetext is None:
1906 if basetext is None:
1907 basetext = bytes(bins[0])
1907 basetext = bytes(bins[0])
1908 bins = bins[1:]
1908 bins = bins[1:]
1909
1909
1910 rawtext = mdiff.patches(basetext, bins)
1910 rawtext = mdiff.patches(basetext, bins)
1911 del basetext # let us have a chance to free memory early
1911 del basetext # let us have a chance to free memory early
1912 return (rev, rawtext, False)
1912 return (rev, rawtext, False)
1913
1913
1914 def _sidedata(self, rev):
1914 def _sidedata(self, rev):
1915 """Return the sidedata for a given revision number."""
1915 """Return the sidedata for a given revision number."""
1916 index_entry = self.index[rev]
1916 index_entry = self.index[rev]
1917 sidedata_offset = index_entry[8]
1917 sidedata_offset = index_entry[8]
1918 sidedata_size = index_entry[9]
1918 sidedata_size = index_entry[9]
1919
1919
1920 if self._inline:
1920 if self._inline:
1921 sidedata_offset += self.index.entry_size * (1 + rev)
1921 sidedata_offset += self.index.entry_size * (1 + rev)
1922 if sidedata_size == 0:
1922 if sidedata_size == 0:
1923 return {}
1923 return {}
1924
1924
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1926 filename = self._sidedatafile
1926 filename = self._sidedatafile
1927 end = self._docket.sidedata_end
1927 end = self._docket.sidedata_end
1928 offset = sidedata_offset
1928 offset = sidedata_offset
1929 length = sidedata_size
1929 length = sidedata_size
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1931 raise error.RevlogError(m)
1931 raise error.RevlogError(m)
1932
1932
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1934 sidedata_offset, sidedata_size
1934 sidedata_offset, sidedata_size
1935 )
1935 )
1936
1936
1937 comp = self.index[rev][11]
1937 comp = self.index[rev][11]
1938 if comp == COMP_MODE_PLAIN:
1938 if comp == COMP_MODE_PLAIN:
1939 segment = comp_segment
1939 segment = comp_segment
1940 elif comp == COMP_MODE_DEFAULT:
1940 elif comp == COMP_MODE_DEFAULT:
1941 segment = self._decompressor(comp_segment)
1941 segment = self._decompressor(comp_segment)
1942 elif comp == COMP_MODE_INLINE:
1942 elif comp == COMP_MODE_INLINE:
1943 segment = self.decompress(comp_segment)
1943 segment = self.decompress(comp_segment)
1944 else:
1944 else:
1945 msg = b'unknown compression mode %d'
1945 msg = b'unknown compression mode %d'
1946 msg %= comp
1946 msg %= comp
1947 raise error.RevlogError(msg)
1947 raise error.RevlogError(msg)
1948
1948
1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1950 return sidedata
1950 return sidedata
1951
1951
1952 def rawdata(self, nodeorrev, _df=None):
1952 def rawdata(self, nodeorrev, _df=None):
1953 """return an uncompressed raw data of a given node or revision number.
1953 """return an uncompressed raw data of a given node or revision number.
1954
1954
1955 _df - an existing file handle to read from. (internal-only)
1955 _df - an existing file handle to read from. (internal-only)
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df, raw=True)
1957 return self._revisiondata(nodeorrev, _df, raw=True)
1958
1958
1959 def hash(self, text, p1, p2):
1959 def hash(self, text, p1, p2):
1960 """Compute a node hash.
1960 """Compute a node hash.
1961
1961
1962 Available as a function so that subclasses can replace the hash
1962 Available as a function so that subclasses can replace the hash
1963 as needed.
1963 as needed.
1964 """
1964 """
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1966
1966
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 """Check node hash integrity.
1968 """Check node hash integrity.
1969
1969
1970 Available as a function so that subclasses can extend hash mismatch
1970 Available as a function so that subclasses can extend hash mismatch
1971 behaviors as needed.
1971 behaviors as needed.
1972 """
1972 """
1973 try:
1973 try:
1974 if p1 is None and p2 is None:
1974 if p1 is None and p2 is None:
1975 p1, p2 = self.parents(node)
1975 p1, p2 = self.parents(node)
1976 if node != self.hash(text, p1, p2):
1976 if node != self.hash(text, p1, p2):
1977 # Clear the revision cache on hash failure. The revision cache
1977 # Clear the revision cache on hash failure. The revision cache
1978 # only stores the raw revision and clearing the cache does have
1978 # only stores the raw revision and clearing the cache does have
1979 # the side-effect that we won't have a cache hit when the raw
1979 # the side-effect that we won't have a cache hit when the raw
1980 # revision data is accessed. But this case should be rare and
1980 # revision data is accessed. But this case should be rare and
1981 # it is extra work to teach the cache about the hash
1981 # it is extra work to teach the cache about the hash
1982 # verification state.
1982 # verification state.
1983 if self._revisioncache and self._revisioncache[0] == node:
1983 if self._revisioncache and self._revisioncache[0] == node:
1984 self._revisioncache = None
1984 self._revisioncache = None
1985
1985
1986 revornode = rev
1986 revornode = rev
1987 if revornode is None:
1987 if revornode is None:
1988 revornode = templatefilters.short(hex(node))
1988 revornode = templatefilters.short(hex(node))
1989 raise error.RevlogError(
1989 raise error.RevlogError(
1990 _(b"integrity check failed on %s:%s")
1990 _(b"integrity check failed on %s:%s")
1991 % (self.display_id, pycompat.bytestr(revornode))
1991 % (self.display_id, pycompat.bytestr(revornode))
1992 )
1992 )
1993 except error.RevlogError:
1993 except error.RevlogError:
1994 if self._censorable and storageutil.iscensoredtext(text):
1994 if self._censorable and storageutil.iscensoredtext(text):
1995 raise error.CensoredNodeError(self.display_id, node, text)
1995 raise error.CensoredNodeError(self.display_id, node, text)
1996 raise
1996 raise
1997
1997
1998 def _enforceinlinesize(self, tr):
1998 def _enforceinlinesize(self, tr):
1999 """Check if the revlog is too big for inline and convert if so.
1999 """Check if the revlog is too big for inline and convert if so.
2000
2000
2001 This should be called after revisions are added to the revlog. If the
2001 This should be called after revisions are added to the revlog. If the
2002 revlog has grown too large to be an inline revlog, it will convert it
2002 revlog has grown too large to be an inline revlog, it will convert it
2003 to use multiple index and data files.
2003 to use multiple index and data files.
2004 """
2004 """
2005 tiprev = len(self) - 1
2005 tiprev = len(self) - 1
2006 total_size = self.start(tiprev) + self.length(tiprev)
2006 total_size = self.start(tiprev) + self.length(tiprev)
2007 if not self._inline or total_size < _maxinline:
2007 if not self._inline or total_size < _maxinline:
2008 return
2008 return
2009
2009
2010 troffset = tr.findoffset(self._indexfile)
2010 troffset = tr.findoffset(self._indexfile)
2011 if troffset is None:
2011 if troffset is None:
2012 raise error.RevlogError(
2012 raise error.RevlogError(
2013 _(b"%s not found in the transaction") % self._indexfile
2013 _(b"%s not found in the transaction") % self._indexfile
2014 )
2014 )
2015 trindex = 0
2015 trindex = 0
2016 tr.add(self._datafile, 0)
2016 tr.add(self._datafile, 0)
2017
2017
2018 existing_handles = False
2018 existing_handles = False
2019 if self._writinghandles is not None:
2019 if self._writinghandles is not None:
2020 existing_handles = True
2020 existing_handles = True
2021 fp = self._writinghandles[0]
2021 fp = self._writinghandles[0]
2022 fp.flush()
2022 fp.flush()
2023 fp.close()
2023 fp.close()
2024 # We can't use the cached file handle after close(). So prevent
2024 # We can't use the cached file handle after close(). So prevent
2025 # its usage.
2025 # its usage.
2026 self._writinghandles = None
2026 self._writinghandles = None
2027 self._segmentfile.writing_handle = None
2027 self._segmentfile.writing_handle = None
2028 # No need to deal with sidedata writing handle as it is only
2028 # No need to deal with sidedata writing handle as it is only
2029 # relevant with revlog-v2 which is never inline, not reaching
2029 # relevant with revlog-v2 which is never inline, not reaching
2030 # this code
2030 # this code
2031
2031
2032 new_dfh = self._datafp(b'w+')
2032 new_dfh = self._datafp(b'w+')
2033 new_dfh.truncate(0) # drop any potentially existing data
2033 new_dfh.truncate(0) # drop any potentially existing data
2034 try:
2034 try:
2035 with self._indexfp() as read_ifh:
2035 with self._indexfp() as read_ifh:
2036 for r in self:
2036 for r in self:
2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2038 if troffset <= self.start(r) + r * self.index.entry_size:
2038 if troffset <= self.start(r) + r * self.index.entry_size:
2039 trindex = r
2039 trindex = r
2040 new_dfh.flush()
2040 new_dfh.flush()
2041
2041
2042 with self.__index_new_fp() as fp:
2042 with self.__index_new_fp() as fp:
2043 self._format_flags &= ~FLAG_INLINE_DATA
2043 self._format_flags &= ~FLAG_INLINE_DATA
2044 self._inline = False
2044 self._inline = False
2045 for i in self:
2045 for i in self:
2046 e = self.index.entry_binary(i)
2046 e = self.index.entry_binary(i)
2047 if i == 0 and self._docket is None:
2047 if i == 0 and self._docket is None:
2048 header = self._format_flags | self._format_version
2048 header = self._format_flags | self._format_version
2049 header = self.index.pack_header(header)
2049 header = self.index.pack_header(header)
2050 e = header + e
2050 e = header + e
2051 fp.write(e)
2051 fp.write(e)
2052 if self._docket is not None:
2052 if self._docket is not None:
2053 self._docket.index_end = fp.tell()
2053 self._docket.index_end = fp.tell()
2054
2054
2055 # There is a small transactional race here. If the rename of
2055 # There is a small transactional race here. If the rename of
2056 # the index fails, we should remove the datafile. It is more
2056 # the index fails, we should remove the datafile. It is more
2057 # important to ensure that the data file is not truncated
2057 # important to ensure that the data file is not truncated
2058 # when the index is replaced as otherwise data is lost.
2058 # when the index is replaced as otherwise data is lost.
2059 tr.replace(self._datafile, self.start(trindex))
2059 tr.replace(self._datafile, self.start(trindex))
2060
2060
2061 # the temp file replace the real index when we exit the context
2061 # the temp file replace the real index when we exit the context
2062 # manager
2062 # manager
2063
2063
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2067 self.opener,
2067 self.opener,
2068 self._datafile,
2068 self._datafile,
2069 self._chunkcachesize,
2069 self._chunkcachesize,
2070 )
2070 )
2071
2071
2072 if existing_handles:
2072 if existing_handles:
2073 # switched from inline to conventional reopen the index
2073 # switched from inline to conventional reopen the index
2074 ifh = self.__index_write_fp()
2074 ifh = self.__index_write_fp()
2075 self._writinghandles = (ifh, new_dfh, None)
2075 self._writinghandles = (ifh, new_dfh, None)
2076 self._segmentfile.writing_handle = new_dfh
2076 self._segmentfile.writing_handle = new_dfh
2077 new_dfh = None
2077 new_dfh = None
2078 # No need to deal with sidedata writing handle as it is only
2078 # No need to deal with sidedata writing handle as it is only
2079 # relevant with revlog-v2 which is never inline, not reaching
2079 # relevant with revlog-v2 which is never inline, not reaching
2080 # this code
2080 # this code
2081 finally:
2081 finally:
2082 if new_dfh is not None:
2082 if new_dfh is not None:
2083 new_dfh.close()
2083 new_dfh.close()
2084
2084
2085 def _nodeduplicatecallback(self, transaction, node):
2085 def _nodeduplicatecallback(self, transaction, node):
2086 """called when trying to add a node already stored."""
2086 """called when trying to add a node already stored."""
2087
2087
2088 @contextlib.contextmanager
2088 @contextlib.contextmanager
2089 def reading(self):
2090 """Context manager that keeps data and sidedata files open for reading"""
2091 with self._segmentfile.reading():
2092 with self._segmentfile_sidedata.reading():
2093 yield
2094
2095 @contextlib.contextmanager
2089 def _writing(self, transaction):
2096 def _writing(self, transaction):
2090 if self._trypending:
2097 if self._trypending:
2091 msg = b'try to write in a `trypending` revlog: %s'
2098 msg = b'try to write in a `trypending` revlog: %s'
2092 msg %= self.display_id
2099 msg %= self.display_id
2093 raise error.ProgrammingError(msg)
2100 raise error.ProgrammingError(msg)
2094 if self._writinghandles is not None:
2101 if self._writinghandles is not None:
2095 yield
2102 yield
2096 else:
2103 else:
2097 ifh = dfh = sdfh = None
2104 ifh = dfh = sdfh = None
2098 try:
2105 try:
2099 r = len(self)
2106 r = len(self)
2100 # opening the data file.
2107 # opening the data file.
2101 dsize = 0
2108 dsize = 0
2102 if r:
2109 if r:
2103 dsize = self.end(r - 1)
2110 dsize = self.end(r - 1)
2104 dfh = None
2111 dfh = None
2105 if not self._inline:
2112 if not self._inline:
2106 try:
2113 try:
2107 dfh = self._datafp(b"r+")
2114 dfh = self._datafp(b"r+")
2108 if self._docket is None:
2115 if self._docket is None:
2109 dfh.seek(0, os.SEEK_END)
2116 dfh.seek(0, os.SEEK_END)
2110 else:
2117 else:
2111 dfh.seek(self._docket.data_end, os.SEEK_SET)
2118 dfh.seek(self._docket.data_end, os.SEEK_SET)
2112 except IOError as inst:
2119 except IOError as inst:
2113 if inst.errno != errno.ENOENT:
2120 if inst.errno != errno.ENOENT:
2114 raise
2121 raise
2115 dfh = self._datafp(b"w+")
2122 dfh = self._datafp(b"w+")
2116 transaction.add(self._datafile, dsize)
2123 transaction.add(self._datafile, dsize)
2117 if self._sidedatafile is not None:
2124 if self._sidedatafile is not None:
2118 try:
2125 try:
2119 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2126 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2120 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2127 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2121 except IOError as inst:
2128 except IOError as inst:
2122 if inst.errno != errno.ENOENT:
2129 if inst.errno != errno.ENOENT:
2123 raise
2130 raise
2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2131 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2125 transaction.add(
2132 transaction.add(
2126 self._sidedatafile, self._docket.sidedata_end
2133 self._sidedatafile, self._docket.sidedata_end
2127 )
2134 )
2128
2135
2129 # opening the index file.
2136 # opening the index file.
2130 isize = r * self.index.entry_size
2137 isize = r * self.index.entry_size
2131 ifh = self.__index_write_fp()
2138 ifh = self.__index_write_fp()
2132 if self._inline:
2139 if self._inline:
2133 transaction.add(self._indexfile, dsize + isize)
2140 transaction.add(self._indexfile, dsize + isize)
2134 else:
2141 else:
2135 transaction.add(self._indexfile, isize)
2142 transaction.add(self._indexfile, isize)
2136 # exposing all file handle for writing.
2143 # exposing all file handle for writing.
2137 self._writinghandles = (ifh, dfh, sdfh)
2144 self._writinghandles = (ifh, dfh, sdfh)
2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2145 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2146 self._segmentfile_sidedata.writing_handle = sdfh
2140 yield
2147 yield
2141 if self._docket is not None:
2148 if self._docket is not None:
2142 self._write_docket(transaction)
2149 self._write_docket(transaction)
2143 finally:
2150 finally:
2144 self._writinghandles = None
2151 self._writinghandles = None
2145 self._segmentfile.writing_handle = None
2152 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2153 self._segmentfile_sidedata.writing_handle = None
2147 if dfh is not None:
2154 if dfh is not None:
2148 dfh.close()
2155 dfh.close()
2149 if sdfh is not None:
2156 if sdfh is not None:
2150 sdfh.close()
2157 sdfh.close()
2151 # closing the index file last to avoid exposing referent to
2158 # closing the index file last to avoid exposing referent to
2152 # potential unflushed data content.
2159 # potential unflushed data content.
2153 if ifh is not None:
2160 if ifh is not None:
2154 ifh.close()
2161 ifh.close()
2155
2162
2156 def _write_docket(self, transaction):
2163 def _write_docket(self, transaction):
2157 """write the current docket on disk
2164 """write the current docket on disk
2158
2165
2159 Exist as a method to help changelog to implement transaction logic
2166 Exist as a method to help changelog to implement transaction logic
2160
2167
2161 We could also imagine using the same transaction logic for all revlog
2168 We could also imagine using the same transaction logic for all revlog
2162 since docket are cheap."""
2169 since docket are cheap."""
2163 self._docket.write(transaction)
2170 self._docket.write(transaction)
2164
2171
2165 def addrevision(
2172 def addrevision(
2166 self,
2173 self,
2167 text,
2174 text,
2168 transaction,
2175 transaction,
2169 link,
2176 link,
2170 p1,
2177 p1,
2171 p2,
2178 p2,
2172 cachedelta=None,
2179 cachedelta=None,
2173 node=None,
2180 node=None,
2174 flags=REVIDX_DEFAULT_FLAGS,
2181 flags=REVIDX_DEFAULT_FLAGS,
2175 deltacomputer=None,
2182 deltacomputer=None,
2176 sidedata=None,
2183 sidedata=None,
2177 ):
2184 ):
2178 """add a revision to the log
2185 """add a revision to the log
2179
2186
2180 text - the revision data to add
2187 text - the revision data to add
2181 transaction - the transaction object used for rollback
2188 transaction - the transaction object used for rollback
2182 link - the linkrev data to add
2189 link - the linkrev data to add
2183 p1, p2 - the parent nodeids of the revision
2190 p1, p2 - the parent nodeids of the revision
2184 cachedelta - an optional precomputed delta
2191 cachedelta - an optional precomputed delta
2185 node - nodeid of revision; typically node is not specified, and it is
2192 node - nodeid of revision; typically node is not specified, and it is
2186 computed by default as hash(text, p1, p2), however subclasses might
2193 computed by default as hash(text, p1, p2), however subclasses might
2187 use different hashing method (and override checkhash() in such case)
2194 use different hashing method (and override checkhash() in such case)
2188 flags - the known flags to set on the revision
2195 flags - the known flags to set on the revision
2189 deltacomputer - an optional deltacomputer instance shared between
2196 deltacomputer - an optional deltacomputer instance shared between
2190 multiple calls
2197 multiple calls
2191 """
2198 """
2192 if link == nullrev:
2199 if link == nullrev:
2193 raise error.RevlogError(
2200 raise error.RevlogError(
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2201 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 )
2202 )
2196
2203
2197 if sidedata is None:
2204 if sidedata is None:
2198 sidedata = {}
2205 sidedata = {}
2199 elif sidedata and not self.hassidedata:
2206 elif sidedata and not self.hassidedata:
2200 raise error.ProgrammingError(
2207 raise error.ProgrammingError(
2201 _(b"trying to add sidedata to a revlog who don't support them")
2208 _(b"trying to add sidedata to a revlog who don't support them")
2202 )
2209 )
2203
2210
2204 if flags:
2211 if flags:
2205 node = node or self.hash(text, p1, p2)
2212 node = node or self.hash(text, p1, p2)
2206
2213
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2214 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208
2215
2209 # If the flag processor modifies the revision data, ignore any provided
2216 # If the flag processor modifies the revision data, ignore any provided
2210 # cachedelta.
2217 # cachedelta.
2211 if rawtext != text:
2218 if rawtext != text:
2212 cachedelta = None
2219 cachedelta = None
2213
2220
2214 if len(rawtext) > _maxentrysize:
2221 if len(rawtext) > _maxentrysize:
2215 raise error.RevlogError(
2222 raise error.RevlogError(
2216 _(
2223 _(
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2224 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 )
2225 )
2219 % (self.display_id, len(rawtext))
2226 % (self.display_id, len(rawtext))
2220 )
2227 )
2221
2228
2222 node = node or self.hash(rawtext, p1, p2)
2229 node = node or self.hash(rawtext, p1, p2)
2223 rev = self.index.get_rev(node)
2230 rev = self.index.get_rev(node)
2224 if rev is not None:
2231 if rev is not None:
2225 return rev
2232 return rev
2226
2233
2227 if validatehash:
2234 if validatehash:
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2235 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229
2236
2230 return self.addrawrevision(
2237 return self.addrawrevision(
2231 rawtext,
2238 rawtext,
2232 transaction,
2239 transaction,
2233 link,
2240 link,
2234 p1,
2241 p1,
2235 p2,
2242 p2,
2236 node,
2243 node,
2237 flags,
2244 flags,
2238 cachedelta=cachedelta,
2245 cachedelta=cachedelta,
2239 deltacomputer=deltacomputer,
2246 deltacomputer=deltacomputer,
2240 sidedata=sidedata,
2247 sidedata=sidedata,
2241 )
2248 )
2242
2249
2243 def addrawrevision(
2250 def addrawrevision(
2244 self,
2251 self,
2245 rawtext,
2252 rawtext,
2246 transaction,
2253 transaction,
2247 link,
2254 link,
2248 p1,
2255 p1,
2249 p2,
2256 p2,
2250 node,
2257 node,
2251 flags,
2258 flags,
2252 cachedelta=None,
2259 cachedelta=None,
2253 deltacomputer=None,
2260 deltacomputer=None,
2254 sidedata=None,
2261 sidedata=None,
2255 ):
2262 ):
2256 """add a raw revision with known flags, node and parents
2263 """add a raw revision with known flags, node and parents
2257 useful when reusing a revision not stored in this revlog (ex: received
2264 useful when reusing a revision not stored in this revlog (ex: received
2258 over wire, or read from an external bundle).
2265 over wire, or read from an external bundle).
2259 """
2266 """
2260 with self._writing(transaction):
2267 with self._writing(transaction):
2261 return self._addrevision(
2268 return self._addrevision(
2262 node,
2269 node,
2263 rawtext,
2270 rawtext,
2264 transaction,
2271 transaction,
2265 link,
2272 link,
2266 p1,
2273 p1,
2267 p2,
2274 p2,
2268 flags,
2275 flags,
2269 cachedelta,
2276 cachedelta,
2270 deltacomputer=deltacomputer,
2277 deltacomputer=deltacomputer,
2271 sidedata=sidedata,
2278 sidedata=sidedata,
2272 )
2279 )
2273
2280
2274 def compress(self, data):
2281 def compress(self, data):
2275 """Generate a possibly-compressed representation of data."""
2282 """Generate a possibly-compressed representation of data."""
2276 if not data:
2283 if not data:
2277 return b'', data
2284 return b'', data
2278
2285
2279 compressed = self._compressor.compress(data)
2286 compressed = self._compressor.compress(data)
2280
2287
2281 if compressed:
2288 if compressed:
2282 # The revlog compressor added the header in the returned data.
2289 # The revlog compressor added the header in the returned data.
2283 return b'', compressed
2290 return b'', compressed
2284
2291
2285 if data[0:1] == b'\0':
2292 if data[0:1] == b'\0':
2286 return b'', data
2293 return b'', data
2287 return b'u', data
2294 return b'u', data
2288
2295
2289 def decompress(self, data):
2296 def decompress(self, data):
2290 """Decompress a revlog chunk.
2297 """Decompress a revlog chunk.
2291
2298
2292 The chunk is expected to begin with a header identifying the
2299 The chunk is expected to begin with a header identifying the
2293 format type so it can be routed to an appropriate decompressor.
2300 format type so it can be routed to an appropriate decompressor.
2294 """
2301 """
2295 if not data:
2302 if not data:
2296 return data
2303 return data
2297
2304
2298 # Revlogs are read much more frequently than they are written and many
2305 # Revlogs are read much more frequently than they are written and many
2299 # chunks only take microseconds to decompress, so performance is
2306 # chunks only take microseconds to decompress, so performance is
2300 # important here.
2307 # important here.
2301 #
2308 #
2302 # We can make a few assumptions about revlogs:
2309 # We can make a few assumptions about revlogs:
2303 #
2310 #
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2311 # 1) the majority of chunks will be compressed (as opposed to inline
2305 # raw data).
2312 # raw data).
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2313 # 2) decompressing *any* data will likely by at least 10x slower than
2307 # returning raw inline data.
2314 # returning raw inline data.
2308 # 3) we want to prioritize common and officially supported compression
2315 # 3) we want to prioritize common and officially supported compression
2309 # engines
2316 # engines
2310 #
2317 #
2311 # It follows that we want to optimize for "decompress compressed data
2318 # It follows that we want to optimize for "decompress compressed data
2312 # when encoded with common and officially supported compression engines"
2319 # when encoded with common and officially supported compression engines"
2313 # case over "raw data" and "data encoded by less common or non-official
2320 # case over "raw data" and "data encoded by less common or non-official
2314 # compression engines." That is why we have the inline lookup first
2321 # compression engines." That is why we have the inline lookup first
2315 # followed by the compengines lookup.
2322 # followed by the compengines lookup.
2316 #
2323 #
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2324 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 # compressed chunks. And this matters for changelog and manifest reads.
2325 # compressed chunks. And this matters for changelog and manifest reads.
2319 t = data[0:1]
2326 t = data[0:1]
2320
2327
2321 if t == b'x':
2328 if t == b'x':
2322 try:
2329 try:
2323 return _zlibdecompress(data)
2330 return _zlibdecompress(data)
2324 except zlib.error as e:
2331 except zlib.error as e:
2325 raise error.RevlogError(
2332 raise error.RevlogError(
2326 _(b'revlog decompress error: %s')
2333 _(b'revlog decompress error: %s')
2327 % stringutil.forcebytestr(e)
2334 % stringutil.forcebytestr(e)
2328 )
2335 )
2329 # '\0' is more common than 'u' so it goes first.
2336 # '\0' is more common than 'u' so it goes first.
2330 elif t == b'\0':
2337 elif t == b'\0':
2331 return data
2338 return data
2332 elif t == b'u':
2339 elif t == b'u':
2333 return util.buffer(data, 1)
2340 return util.buffer(data, 1)
2334
2341
2335 compressor = self._get_decompressor(t)
2342 compressor = self._get_decompressor(t)
2336
2343
2337 return compressor.decompress(data)
2344 return compressor.decompress(data)
2338
2345
2339 def _addrevision(
2346 def _addrevision(
2340 self,
2347 self,
2341 node,
2348 node,
2342 rawtext,
2349 rawtext,
2343 transaction,
2350 transaction,
2344 link,
2351 link,
2345 p1,
2352 p1,
2346 p2,
2353 p2,
2347 flags,
2354 flags,
2348 cachedelta,
2355 cachedelta,
2349 alwayscache=False,
2356 alwayscache=False,
2350 deltacomputer=None,
2357 deltacomputer=None,
2351 sidedata=None,
2358 sidedata=None,
2352 ):
2359 ):
2353 """internal function to add revisions to the log
2360 """internal function to add revisions to the log
2354
2361
2355 see addrevision for argument descriptions.
2362 see addrevision for argument descriptions.
2356
2363
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2364 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358
2365
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2366 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 be used.
2367 be used.
2361
2368
2362 invariants:
2369 invariants:
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2370 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 if both are set, they must correspond to each other.
2371 if both are set, they must correspond to each other.
2365 """
2372 """
2366 if node == self.nullid:
2373 if node == self.nullid:
2367 raise error.RevlogError(
2374 raise error.RevlogError(
2368 _(b"%s: attempt to add null revision") % self.display_id
2375 _(b"%s: attempt to add null revision") % self.display_id
2369 )
2376 )
2370 if (
2377 if (
2371 node == self.nodeconstants.wdirid
2378 node == self.nodeconstants.wdirid
2372 or node in self.nodeconstants.wdirfilenodeids
2379 or node in self.nodeconstants.wdirfilenodeids
2373 ):
2380 ):
2374 raise error.RevlogError(
2381 raise error.RevlogError(
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2382 _(b"%s: attempt to add wdir revision") % self.display_id
2376 )
2383 )
2377 if self._writinghandles is None:
2384 if self._writinghandles is None:
2378 msg = b'adding revision outside `revlog._writing` context'
2385 msg = b'adding revision outside `revlog._writing` context'
2379 raise error.ProgrammingError(msg)
2386 raise error.ProgrammingError(msg)
2380
2387
2381 if self._inline:
2388 if self._inline:
2382 fh = self._writinghandles[0]
2389 fh = self._writinghandles[0]
2383 else:
2390 else:
2384 fh = self._writinghandles[1]
2391 fh = self._writinghandles[1]
2385
2392
2386 btext = [rawtext]
2393 btext = [rawtext]
2387
2394
2388 curr = len(self)
2395 curr = len(self)
2389 prev = curr - 1
2396 prev = curr - 1
2390
2397
2391 offset = self._get_data_offset(prev)
2398 offset = self._get_data_offset(prev)
2392
2399
2393 if self._concurrencychecker:
2400 if self._concurrencychecker:
2394 ifh, dfh, sdfh = self._writinghandles
2401 ifh, dfh, sdfh = self._writinghandles
2395 # XXX no checking for the sidedata file
2402 # XXX no checking for the sidedata file
2396 if self._inline:
2403 if self._inline:
2397 # offset is "as if" it were in the .d file, so we need to add on
2404 # offset is "as if" it were in the .d file, so we need to add on
2398 # the size of the entry metadata.
2405 # the size of the entry metadata.
2399 self._concurrencychecker(
2406 self._concurrencychecker(
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2407 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 )
2408 )
2402 else:
2409 else:
2403 # Entries in the .i are a consistent size.
2410 # Entries in the .i are a consistent size.
2404 self._concurrencychecker(
2411 self._concurrencychecker(
2405 ifh, self._indexfile, curr * self.index.entry_size
2412 ifh, self._indexfile, curr * self.index.entry_size
2406 )
2413 )
2407 self._concurrencychecker(dfh, self._datafile, offset)
2414 self._concurrencychecker(dfh, self._datafile, offset)
2408
2415
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2416 p1r, p2r = self.rev(p1), self.rev(p2)
2410
2417
2411 # full versions are inserted when the needed deltas
2418 # full versions are inserted when the needed deltas
2412 # become comparable to the uncompressed text
2419 # become comparable to the uncompressed text
2413 if rawtext is None:
2420 if rawtext is None:
2414 # need rawtext size, before changed by flag processors, which is
2421 # need rawtext size, before changed by flag processors, which is
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2422 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 # logic that might remove metadata size.
2423 # logic that might remove metadata size.
2417 textlen = mdiff.patchedsize(
2424 textlen = mdiff.patchedsize(
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2425 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 )
2426 )
2420 else:
2427 else:
2421 textlen = len(rawtext)
2428 textlen = len(rawtext)
2422
2429
2423 if deltacomputer is None:
2430 if deltacomputer is None:
2424 deltacomputer = deltautil.deltacomputer(self)
2431 deltacomputer = deltautil.deltacomputer(self)
2425
2432
2426 revinfo = revlogutils.revisioninfo(
2433 revinfo = revlogutils.revisioninfo(
2427 node,
2434 node,
2428 p1,
2435 p1,
2429 p2,
2436 p2,
2430 btext,
2437 btext,
2431 textlen,
2438 textlen,
2432 cachedelta,
2439 cachedelta,
2433 flags,
2440 flags,
2434 )
2441 )
2435
2442
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2443 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2437
2444
2438 compression_mode = COMP_MODE_INLINE
2445 compression_mode = COMP_MODE_INLINE
2439 if self._docket is not None:
2446 if self._docket is not None:
2440 default_comp = self._docket.default_compression_header
2447 default_comp = self._docket.default_compression_header
2441 r = deltautil.delta_compression(default_comp, deltainfo)
2448 r = deltautil.delta_compression(default_comp, deltainfo)
2442 compression_mode, deltainfo = r
2449 compression_mode, deltainfo = r
2443
2450
2444 sidedata_compression_mode = COMP_MODE_INLINE
2451 sidedata_compression_mode = COMP_MODE_INLINE
2445 if sidedata and self.hassidedata:
2452 if sidedata and self.hassidedata:
2446 sidedata_compression_mode = COMP_MODE_PLAIN
2453 sidedata_compression_mode = COMP_MODE_PLAIN
2447 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2454 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2448 sidedata_offset = self._docket.sidedata_end
2455 sidedata_offset = self._docket.sidedata_end
2449 h, comp_sidedata = self.compress(serialized_sidedata)
2456 h, comp_sidedata = self.compress(serialized_sidedata)
2450 if (
2457 if (
2451 h != b'u'
2458 h != b'u'
2452 and comp_sidedata[0:1] != b'\0'
2459 and comp_sidedata[0:1] != b'\0'
2453 and len(comp_sidedata) < len(serialized_sidedata)
2460 and len(comp_sidedata) < len(serialized_sidedata)
2454 ):
2461 ):
2455 assert not h
2462 assert not h
2456 if (
2463 if (
2457 comp_sidedata[0:1]
2464 comp_sidedata[0:1]
2458 == self._docket.default_compression_header
2465 == self._docket.default_compression_header
2459 ):
2466 ):
2460 sidedata_compression_mode = COMP_MODE_DEFAULT
2467 sidedata_compression_mode = COMP_MODE_DEFAULT
2461 serialized_sidedata = comp_sidedata
2468 serialized_sidedata = comp_sidedata
2462 else:
2469 else:
2463 sidedata_compression_mode = COMP_MODE_INLINE
2470 sidedata_compression_mode = COMP_MODE_INLINE
2464 serialized_sidedata = comp_sidedata
2471 serialized_sidedata = comp_sidedata
2465 else:
2472 else:
2466 serialized_sidedata = b""
2473 serialized_sidedata = b""
2467 # Don't store the offset if the sidedata is empty, that way
2474 # Don't store the offset if the sidedata is empty, that way
2468 # we can easily detect empty sidedata and they will be no different
2475 # we can easily detect empty sidedata and they will be no different
2469 # than ones we manually add.
2476 # than ones we manually add.
2470 sidedata_offset = 0
2477 sidedata_offset = 0
2471
2478
2472 e = revlogutils.entry(
2479 e = revlogutils.entry(
2473 flags=flags,
2480 flags=flags,
2474 data_offset=offset,
2481 data_offset=offset,
2475 data_compressed_length=deltainfo.deltalen,
2482 data_compressed_length=deltainfo.deltalen,
2476 data_uncompressed_length=textlen,
2483 data_uncompressed_length=textlen,
2477 data_compression_mode=compression_mode,
2484 data_compression_mode=compression_mode,
2478 data_delta_base=deltainfo.base,
2485 data_delta_base=deltainfo.base,
2479 link_rev=link,
2486 link_rev=link,
2480 parent_rev_1=p1r,
2487 parent_rev_1=p1r,
2481 parent_rev_2=p2r,
2488 parent_rev_2=p2r,
2482 node_id=node,
2489 node_id=node,
2483 sidedata_offset=sidedata_offset,
2490 sidedata_offset=sidedata_offset,
2484 sidedata_compressed_length=len(serialized_sidedata),
2491 sidedata_compressed_length=len(serialized_sidedata),
2485 sidedata_compression_mode=sidedata_compression_mode,
2492 sidedata_compression_mode=sidedata_compression_mode,
2486 )
2493 )
2487
2494
2488 self.index.append(e)
2495 self.index.append(e)
2489 entry = self.index.entry_binary(curr)
2496 entry = self.index.entry_binary(curr)
2490 if curr == 0 and self._docket is None:
2497 if curr == 0 and self._docket is None:
2491 header = self._format_flags | self._format_version
2498 header = self._format_flags | self._format_version
2492 header = self.index.pack_header(header)
2499 header = self.index.pack_header(header)
2493 entry = header + entry
2500 entry = header + entry
2494 self._writeentry(
2501 self._writeentry(
2495 transaction,
2502 transaction,
2496 entry,
2503 entry,
2497 deltainfo.data,
2504 deltainfo.data,
2498 link,
2505 link,
2499 offset,
2506 offset,
2500 serialized_sidedata,
2507 serialized_sidedata,
2501 sidedata_offset,
2508 sidedata_offset,
2502 )
2509 )
2503
2510
2504 rawtext = btext[0]
2511 rawtext = btext[0]
2505
2512
2506 if alwayscache and rawtext is None:
2513 if alwayscache and rawtext is None:
2507 rawtext = deltacomputer.buildtext(revinfo, fh)
2514 rawtext = deltacomputer.buildtext(revinfo, fh)
2508
2515
2509 if type(rawtext) == bytes: # only accept immutable objects
2516 if type(rawtext) == bytes: # only accept immutable objects
2510 self._revisioncache = (node, curr, rawtext)
2517 self._revisioncache = (node, curr, rawtext)
2511 self._chainbasecache[curr] = deltainfo.chainbase
2518 self._chainbasecache[curr] = deltainfo.chainbase
2512 return curr
2519 return curr
2513
2520
2514 def _get_data_offset(self, prev):
2521 def _get_data_offset(self, prev):
2515 """Returns the current offset in the (in-transaction) data file.
2522 """Returns the current offset in the (in-transaction) data file.
2516 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2523 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2517 file to store that information: since sidedata can be rewritten to the
2524 file to store that information: since sidedata can be rewritten to the
2518 end of the data file within a transaction, you can have cases where, for
2525 end of the data file within a transaction, you can have cases where, for
2519 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2526 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2520 to `n - 1`'s sidedata being written after `n`'s data.
2527 to `n - 1`'s sidedata being written after `n`'s data.
2521
2528
2522 TODO cache this in a docket file before getting out of experimental."""
2529 TODO cache this in a docket file before getting out of experimental."""
2523 if self._docket is None:
2530 if self._docket is None:
2524 return self.end(prev)
2531 return self.end(prev)
2525 else:
2532 else:
2526 return self._docket.data_end
2533 return self._docket.data_end
2527
2534
2528 def _writeentry(
2535 def _writeentry(
2529 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2536 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2530 ):
2537 ):
2531 # Files opened in a+ mode have inconsistent behavior on various
2538 # Files opened in a+ mode have inconsistent behavior on various
2532 # platforms. Windows requires that a file positioning call be made
2539 # platforms. Windows requires that a file positioning call be made
2533 # when the file handle transitions between reads and writes. See
2540 # when the file handle transitions between reads and writes. See
2534 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2541 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2535 # platforms, Python or the platform itself can be buggy. Some versions
2542 # platforms, Python or the platform itself can be buggy. Some versions
2536 # of Solaris have been observed to not append at the end of the file
2543 # of Solaris have been observed to not append at the end of the file
2537 # if the file was seeked to before the end. See issue4943 for more.
2544 # if the file was seeked to before the end. See issue4943 for more.
2538 #
2545 #
2539 # We work around this issue by inserting a seek() before writing.
2546 # We work around this issue by inserting a seek() before writing.
2540 # Note: This is likely not necessary on Python 3. However, because
2547 # Note: This is likely not necessary on Python 3. However, because
2541 # the file handle is reused for reads and may be seeked there, we need
2548 # the file handle is reused for reads and may be seeked there, we need
2542 # to be careful before changing this.
2549 # to be careful before changing this.
2543 if self._writinghandles is None:
2550 if self._writinghandles is None:
2544 msg = b'adding revision outside `revlog._writing` context'
2551 msg = b'adding revision outside `revlog._writing` context'
2545 raise error.ProgrammingError(msg)
2552 raise error.ProgrammingError(msg)
2546 ifh, dfh, sdfh = self._writinghandles
2553 ifh, dfh, sdfh = self._writinghandles
2547 if self._docket is None:
2554 if self._docket is None:
2548 ifh.seek(0, os.SEEK_END)
2555 ifh.seek(0, os.SEEK_END)
2549 else:
2556 else:
2550 ifh.seek(self._docket.index_end, os.SEEK_SET)
2557 ifh.seek(self._docket.index_end, os.SEEK_SET)
2551 if dfh:
2558 if dfh:
2552 if self._docket is None:
2559 if self._docket is None:
2553 dfh.seek(0, os.SEEK_END)
2560 dfh.seek(0, os.SEEK_END)
2554 else:
2561 else:
2555 dfh.seek(self._docket.data_end, os.SEEK_SET)
2562 dfh.seek(self._docket.data_end, os.SEEK_SET)
2556 if sdfh:
2563 if sdfh:
2557 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2564 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2558
2565
2559 curr = len(self) - 1
2566 curr = len(self) - 1
2560 if not self._inline:
2567 if not self._inline:
2561 transaction.add(self._datafile, offset)
2568 transaction.add(self._datafile, offset)
2562 if self._sidedatafile:
2569 if self._sidedatafile:
2563 transaction.add(self._sidedatafile, sidedata_offset)
2570 transaction.add(self._sidedatafile, sidedata_offset)
2564 transaction.add(self._indexfile, curr * len(entry))
2571 transaction.add(self._indexfile, curr * len(entry))
2565 if data[0]:
2572 if data[0]:
2566 dfh.write(data[0])
2573 dfh.write(data[0])
2567 dfh.write(data[1])
2574 dfh.write(data[1])
2568 if sidedata:
2575 if sidedata:
2569 sdfh.write(sidedata)
2576 sdfh.write(sidedata)
2570 ifh.write(entry)
2577 ifh.write(entry)
2571 else:
2578 else:
2572 offset += curr * self.index.entry_size
2579 offset += curr * self.index.entry_size
2573 transaction.add(self._indexfile, offset)
2580 transaction.add(self._indexfile, offset)
2574 ifh.write(entry)
2581 ifh.write(entry)
2575 ifh.write(data[0])
2582 ifh.write(data[0])
2576 ifh.write(data[1])
2583 ifh.write(data[1])
2577 assert not sidedata
2584 assert not sidedata
2578 self._enforceinlinesize(transaction)
2585 self._enforceinlinesize(transaction)
2579 if self._docket is not None:
2586 if self._docket is not None:
2580 self._docket.index_end = self._writinghandles[0].tell()
2587 self._docket.index_end = self._writinghandles[0].tell()
2581 self._docket.data_end = self._writinghandles[1].tell()
2588 self._docket.data_end = self._writinghandles[1].tell()
2582 self._docket.sidedata_end = self._writinghandles[2].tell()
2589 self._docket.sidedata_end = self._writinghandles[2].tell()
2583
2590
2584 nodemaputil.setup_persistent_nodemap(transaction, self)
2591 nodemaputil.setup_persistent_nodemap(transaction, self)
2585
2592
2586 def addgroup(
2593 def addgroup(
2587 self,
2594 self,
2588 deltas,
2595 deltas,
2589 linkmapper,
2596 linkmapper,
2590 transaction,
2597 transaction,
2591 alwayscache=False,
2598 alwayscache=False,
2592 addrevisioncb=None,
2599 addrevisioncb=None,
2593 duplicaterevisioncb=None,
2600 duplicaterevisioncb=None,
2594 ):
2601 ):
2595 """
2602 """
2596 add a delta group
2603 add a delta group
2597
2604
2598 given a set of deltas, add them to the revision log. the
2605 given a set of deltas, add them to the revision log. the
2599 first delta is against its parent, which should be in our
2606 first delta is against its parent, which should be in our
2600 log, the rest are against the previous delta.
2607 log, the rest are against the previous delta.
2601
2608
2602 If ``addrevisioncb`` is defined, it will be called with arguments of
2609 If ``addrevisioncb`` is defined, it will be called with arguments of
2603 this revlog and the node that was added.
2610 this revlog and the node that was added.
2604 """
2611 """
2605
2612
2606 if self._adding_group:
2613 if self._adding_group:
2607 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2614 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2608
2615
2609 self._adding_group = True
2616 self._adding_group = True
2610 empty = True
2617 empty = True
2611 try:
2618 try:
2612 with self._writing(transaction):
2619 with self._writing(transaction):
2613 deltacomputer = deltautil.deltacomputer(self)
2620 deltacomputer = deltautil.deltacomputer(self)
2614 # loop through our set of deltas
2621 # loop through our set of deltas
2615 for data in deltas:
2622 for data in deltas:
2616 (
2623 (
2617 node,
2624 node,
2618 p1,
2625 p1,
2619 p2,
2626 p2,
2620 linknode,
2627 linknode,
2621 deltabase,
2628 deltabase,
2622 delta,
2629 delta,
2623 flags,
2630 flags,
2624 sidedata,
2631 sidedata,
2625 ) = data
2632 ) = data
2626 link = linkmapper(linknode)
2633 link = linkmapper(linknode)
2627 flags = flags or REVIDX_DEFAULT_FLAGS
2634 flags = flags or REVIDX_DEFAULT_FLAGS
2628
2635
2629 rev = self.index.get_rev(node)
2636 rev = self.index.get_rev(node)
2630 if rev is not None:
2637 if rev is not None:
2631 # this can happen if two branches make the same change
2638 # this can happen if two branches make the same change
2632 self._nodeduplicatecallback(transaction, rev)
2639 self._nodeduplicatecallback(transaction, rev)
2633 if duplicaterevisioncb:
2640 if duplicaterevisioncb:
2634 duplicaterevisioncb(self, rev)
2641 duplicaterevisioncb(self, rev)
2635 empty = False
2642 empty = False
2636 continue
2643 continue
2637
2644
2638 for p in (p1, p2):
2645 for p in (p1, p2):
2639 if not self.index.has_node(p):
2646 if not self.index.has_node(p):
2640 raise error.LookupError(
2647 raise error.LookupError(
2641 p, self.radix, _(b'unknown parent')
2648 p, self.radix, _(b'unknown parent')
2642 )
2649 )
2643
2650
2644 if not self.index.has_node(deltabase):
2651 if not self.index.has_node(deltabase):
2645 raise error.LookupError(
2652 raise error.LookupError(
2646 deltabase, self.display_id, _(b'unknown delta base')
2653 deltabase, self.display_id, _(b'unknown delta base')
2647 )
2654 )
2648
2655
2649 baserev = self.rev(deltabase)
2656 baserev = self.rev(deltabase)
2650
2657
2651 if baserev != nullrev and self.iscensored(baserev):
2658 if baserev != nullrev and self.iscensored(baserev):
2652 # if base is censored, delta must be full replacement in a
2659 # if base is censored, delta must be full replacement in a
2653 # single patch operation
2660 # single patch operation
2654 hlen = struct.calcsize(b">lll")
2661 hlen = struct.calcsize(b">lll")
2655 oldlen = self.rawsize(baserev)
2662 oldlen = self.rawsize(baserev)
2656 newlen = len(delta) - hlen
2663 newlen = len(delta) - hlen
2657 if delta[:hlen] != mdiff.replacediffheader(
2664 if delta[:hlen] != mdiff.replacediffheader(
2658 oldlen, newlen
2665 oldlen, newlen
2659 ):
2666 ):
2660 raise error.CensoredBaseError(
2667 raise error.CensoredBaseError(
2661 self.display_id, self.node(baserev)
2668 self.display_id, self.node(baserev)
2662 )
2669 )
2663
2670
2664 if not flags and self._peek_iscensored(baserev, delta):
2671 if not flags and self._peek_iscensored(baserev, delta):
2665 flags |= REVIDX_ISCENSORED
2672 flags |= REVIDX_ISCENSORED
2666
2673
2667 # We assume consumers of addrevisioncb will want to retrieve
2674 # We assume consumers of addrevisioncb will want to retrieve
2668 # the added revision, which will require a call to
2675 # the added revision, which will require a call to
2669 # revision(). revision() will fast path if there is a cache
2676 # revision(). revision() will fast path if there is a cache
2670 # hit. So, we tell _addrevision() to always cache in this case.
2677 # hit. So, we tell _addrevision() to always cache in this case.
2671 # We're only using addgroup() in the context of changegroup
2678 # We're only using addgroup() in the context of changegroup
2672 # generation so the revision data can always be handled as raw
2679 # generation so the revision data can always be handled as raw
2673 # by the flagprocessor.
2680 # by the flagprocessor.
2674 rev = self._addrevision(
2681 rev = self._addrevision(
2675 node,
2682 node,
2676 None,
2683 None,
2677 transaction,
2684 transaction,
2678 link,
2685 link,
2679 p1,
2686 p1,
2680 p2,
2687 p2,
2681 flags,
2688 flags,
2682 (baserev, delta),
2689 (baserev, delta),
2683 alwayscache=alwayscache,
2690 alwayscache=alwayscache,
2684 deltacomputer=deltacomputer,
2691 deltacomputer=deltacomputer,
2685 sidedata=sidedata,
2692 sidedata=sidedata,
2686 )
2693 )
2687
2694
2688 if addrevisioncb:
2695 if addrevisioncb:
2689 addrevisioncb(self, rev)
2696 addrevisioncb(self, rev)
2690 empty = False
2697 empty = False
2691 finally:
2698 finally:
2692 self._adding_group = False
2699 self._adding_group = False
2693 return not empty
2700 return not empty
2694
2701
2695 def iscensored(self, rev):
2702 def iscensored(self, rev):
2696 """Check if a file revision is censored."""
2703 """Check if a file revision is censored."""
2697 if not self._censorable:
2704 if not self._censorable:
2698 return False
2705 return False
2699
2706
2700 return self.flags(rev) & REVIDX_ISCENSORED
2707 return self.flags(rev) & REVIDX_ISCENSORED
2701
2708
2702 def _peek_iscensored(self, baserev, delta):
2709 def _peek_iscensored(self, baserev, delta):
2703 """Quickly check if a delta produces a censored revision."""
2710 """Quickly check if a delta produces a censored revision."""
2704 if not self._censorable:
2711 if not self._censorable:
2705 return False
2712 return False
2706
2713
2707 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2714 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2708
2715
2709 def getstrippoint(self, minlink):
2716 def getstrippoint(self, minlink):
2710 """find the minimum rev that must be stripped to strip the linkrev
2717 """find the minimum rev that must be stripped to strip the linkrev
2711
2718
2712 Returns a tuple containing the minimum rev and a set of all revs that
2719 Returns a tuple containing the minimum rev and a set of all revs that
2713 have linkrevs that will be broken by this strip.
2720 have linkrevs that will be broken by this strip.
2714 """
2721 """
2715 return storageutil.resolvestripinfo(
2722 return storageutil.resolvestripinfo(
2716 minlink,
2723 minlink,
2717 len(self) - 1,
2724 len(self) - 1,
2718 self.headrevs(),
2725 self.headrevs(),
2719 self.linkrev,
2726 self.linkrev,
2720 self.parentrevs,
2727 self.parentrevs,
2721 )
2728 )
2722
2729
2723 def strip(self, minlink, transaction):
2730 def strip(self, minlink, transaction):
2724 """truncate the revlog on the first revision with a linkrev >= minlink
2731 """truncate the revlog on the first revision with a linkrev >= minlink
2725
2732
2726 This function is called when we're stripping revision minlink and
2733 This function is called when we're stripping revision minlink and
2727 its descendants from the repository.
2734 its descendants from the repository.
2728
2735
2729 We have to remove all revisions with linkrev >= minlink, because
2736 We have to remove all revisions with linkrev >= minlink, because
2730 the equivalent changelog revisions will be renumbered after the
2737 the equivalent changelog revisions will be renumbered after the
2731 strip.
2738 strip.
2732
2739
2733 So we truncate the revlog on the first of these revisions, and
2740 So we truncate the revlog on the first of these revisions, and
2734 trust that the caller has saved the revisions that shouldn't be
2741 trust that the caller has saved the revisions that shouldn't be
2735 removed and that it'll re-add them after this truncation.
2742 removed and that it'll re-add them after this truncation.
2736 """
2743 """
2737 if len(self) == 0:
2744 if len(self) == 0:
2738 return
2745 return
2739
2746
2740 rev, _ = self.getstrippoint(minlink)
2747 rev, _ = self.getstrippoint(minlink)
2741 if rev == len(self):
2748 if rev == len(self):
2742 return
2749 return
2743
2750
2744 # first truncate the files on disk
2751 # first truncate the files on disk
2745 data_end = self.start(rev)
2752 data_end = self.start(rev)
2746 if not self._inline:
2753 if not self._inline:
2747 transaction.add(self._datafile, data_end)
2754 transaction.add(self._datafile, data_end)
2748 end = rev * self.index.entry_size
2755 end = rev * self.index.entry_size
2749 else:
2756 else:
2750 end = data_end + (rev * self.index.entry_size)
2757 end = data_end + (rev * self.index.entry_size)
2751
2758
2752 if self._sidedatafile:
2759 if self._sidedatafile:
2753 sidedata_end = self.sidedata_cut_off(rev)
2760 sidedata_end = self.sidedata_cut_off(rev)
2754 transaction.add(self._sidedatafile, sidedata_end)
2761 transaction.add(self._sidedatafile, sidedata_end)
2755
2762
2756 transaction.add(self._indexfile, end)
2763 transaction.add(self._indexfile, end)
2757 if self._docket is not None:
2764 if self._docket is not None:
2758 # XXX we could, leverage the docket while stripping. However it is
2765 # XXX we could, leverage the docket while stripping. However it is
2759 # not powerfull enough at the time of this comment
2766 # not powerfull enough at the time of this comment
2760 self._docket.index_end = end
2767 self._docket.index_end = end
2761 self._docket.data_end = data_end
2768 self._docket.data_end = data_end
2762 self._docket.sidedata_end = sidedata_end
2769 self._docket.sidedata_end = sidedata_end
2763 self._docket.write(transaction, stripping=True)
2770 self._docket.write(transaction, stripping=True)
2764
2771
2765 # then reset internal state in memory to forget those revisions
2772 # then reset internal state in memory to forget those revisions
2766 self._revisioncache = None
2773 self._revisioncache = None
2767 self._chaininfocache = util.lrucachedict(500)
2774 self._chaininfocache = util.lrucachedict(500)
2768 self._segmentfile.clear_cache()
2775 self._segmentfile.clear_cache()
2769 self._segmentfile_sidedata.clear_cache()
2776 self._segmentfile_sidedata.clear_cache()
2770
2777
2771 del self.index[rev:-1]
2778 del self.index[rev:-1]
2772
2779
2773 def checksize(self):
2780 def checksize(self):
2774 """Check size of index and data files
2781 """Check size of index and data files
2775
2782
2776 return a (dd, di) tuple.
2783 return a (dd, di) tuple.
2777 - dd: extra bytes for the "data" file
2784 - dd: extra bytes for the "data" file
2778 - di: extra bytes for the "index" file
2785 - di: extra bytes for the "index" file
2779
2786
2780 A healthy revlog will return (0, 0).
2787 A healthy revlog will return (0, 0).
2781 """
2788 """
2782 expected = 0
2789 expected = 0
2783 if len(self):
2790 if len(self):
2784 expected = max(0, self.end(len(self) - 1))
2791 expected = max(0, self.end(len(self) - 1))
2785
2792
2786 try:
2793 try:
2787 with self._datafp() as f:
2794 with self._datafp() as f:
2788 f.seek(0, io.SEEK_END)
2795 f.seek(0, io.SEEK_END)
2789 actual = f.tell()
2796 actual = f.tell()
2790 dd = actual - expected
2797 dd = actual - expected
2791 except IOError as inst:
2798 except IOError as inst:
2792 if inst.errno != errno.ENOENT:
2799 if inst.errno != errno.ENOENT:
2793 raise
2800 raise
2794 dd = 0
2801 dd = 0
2795
2802
2796 try:
2803 try:
2797 f = self.opener(self._indexfile)
2804 f = self.opener(self._indexfile)
2798 f.seek(0, io.SEEK_END)
2805 f.seek(0, io.SEEK_END)
2799 actual = f.tell()
2806 actual = f.tell()
2800 f.close()
2807 f.close()
2801 s = self.index.entry_size
2808 s = self.index.entry_size
2802 i = max(0, actual // s)
2809 i = max(0, actual // s)
2803 di = actual - (i * s)
2810 di = actual - (i * s)
2804 if self._inline:
2811 if self._inline:
2805 databytes = 0
2812 databytes = 0
2806 for r in self:
2813 for r in self:
2807 databytes += max(0, self.length(r))
2814 databytes += max(0, self.length(r))
2808 dd = 0
2815 dd = 0
2809 di = actual - len(self) * s - databytes
2816 di = actual - len(self) * s - databytes
2810 except IOError as inst:
2817 except IOError as inst:
2811 if inst.errno != errno.ENOENT:
2818 if inst.errno != errno.ENOENT:
2812 raise
2819 raise
2813 di = 0
2820 di = 0
2814
2821
2815 return (dd, di)
2822 return (dd, di)
2816
2823
2817 def files(self):
2824 def files(self):
2818 res = [self._indexfile]
2825 res = [self._indexfile]
2819 if self._docket_file is None:
2826 if self._docket_file is None:
2820 if not self._inline:
2827 if not self._inline:
2821 res.append(self._datafile)
2828 res.append(self._datafile)
2822 else:
2829 else:
2823 res.append(self._docket_file)
2830 res.append(self._docket_file)
2824 res.extend(self._docket.old_index_filepaths(include_empty=False))
2831 res.extend(self._docket.old_index_filepaths(include_empty=False))
2825 if self._docket.data_end:
2832 if self._docket.data_end:
2826 res.append(self._datafile)
2833 res.append(self._datafile)
2827 res.extend(self._docket.old_data_filepaths(include_empty=False))
2834 res.extend(self._docket.old_data_filepaths(include_empty=False))
2828 if self._docket.sidedata_end:
2835 if self._docket.sidedata_end:
2829 res.append(self._sidedatafile)
2836 res.append(self._sidedatafile)
2830 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2837 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2831 return res
2838 return res
2832
2839
2833 def emitrevisions(
2840 def emitrevisions(
2834 self,
2841 self,
2835 nodes,
2842 nodes,
2836 nodesorder=None,
2843 nodesorder=None,
2837 revisiondata=False,
2844 revisiondata=False,
2838 assumehaveparentrevisions=False,
2845 assumehaveparentrevisions=False,
2839 deltamode=repository.CG_DELTAMODE_STD,
2846 deltamode=repository.CG_DELTAMODE_STD,
2840 sidedata_helpers=None,
2847 sidedata_helpers=None,
2841 ):
2848 ):
2842 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2849 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2843 raise error.ProgrammingError(
2850 raise error.ProgrammingError(
2844 b'unhandled value for nodesorder: %s' % nodesorder
2851 b'unhandled value for nodesorder: %s' % nodesorder
2845 )
2852 )
2846
2853
2847 if nodesorder is None and not self._generaldelta:
2854 if nodesorder is None and not self._generaldelta:
2848 nodesorder = b'storage'
2855 nodesorder = b'storage'
2849
2856
2850 if (
2857 if (
2851 not self._storedeltachains
2858 not self._storedeltachains
2852 and deltamode != repository.CG_DELTAMODE_PREV
2859 and deltamode != repository.CG_DELTAMODE_PREV
2853 ):
2860 ):
2854 deltamode = repository.CG_DELTAMODE_FULL
2861 deltamode = repository.CG_DELTAMODE_FULL
2855
2862
2856 return storageutil.emitrevisions(
2863 return storageutil.emitrevisions(
2857 self,
2864 self,
2858 nodes,
2865 nodes,
2859 nodesorder,
2866 nodesorder,
2860 revlogrevisiondelta,
2867 revlogrevisiondelta,
2861 deltaparentfn=self.deltaparent,
2868 deltaparentfn=self.deltaparent,
2862 candeltafn=self.candelta,
2869 candeltafn=self.candelta,
2863 rawsizefn=self.rawsize,
2870 rawsizefn=self.rawsize,
2864 revdifffn=self.revdiff,
2871 revdifffn=self.revdiff,
2865 flagsfn=self.flags,
2872 flagsfn=self.flags,
2866 deltamode=deltamode,
2873 deltamode=deltamode,
2867 revisiondata=revisiondata,
2874 revisiondata=revisiondata,
2868 assumehaveparentrevisions=assumehaveparentrevisions,
2875 assumehaveparentrevisions=assumehaveparentrevisions,
2869 sidedata_helpers=sidedata_helpers,
2876 sidedata_helpers=sidedata_helpers,
2870 )
2877 )
2871
2878
2872 DELTAREUSEALWAYS = b'always'
2879 DELTAREUSEALWAYS = b'always'
2873 DELTAREUSESAMEREVS = b'samerevs'
2880 DELTAREUSESAMEREVS = b'samerevs'
2874 DELTAREUSENEVER = b'never'
2881 DELTAREUSENEVER = b'never'
2875
2882
2876 DELTAREUSEFULLADD = b'fulladd'
2883 DELTAREUSEFULLADD = b'fulladd'
2877
2884
2878 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2885 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2879
2886
2880 def clone(
2887 def clone(
2881 self,
2888 self,
2882 tr,
2889 tr,
2883 destrevlog,
2890 destrevlog,
2884 addrevisioncb=None,
2891 addrevisioncb=None,
2885 deltareuse=DELTAREUSESAMEREVS,
2892 deltareuse=DELTAREUSESAMEREVS,
2886 forcedeltabothparents=None,
2893 forcedeltabothparents=None,
2887 sidedata_helpers=None,
2894 sidedata_helpers=None,
2888 ):
2895 ):
2889 """Copy this revlog to another, possibly with format changes.
2896 """Copy this revlog to another, possibly with format changes.
2890
2897
2891 The destination revlog will contain the same revisions and nodes.
2898 The destination revlog will contain the same revisions and nodes.
2892 However, it may not be bit-for-bit identical due to e.g. delta encoding
2899 However, it may not be bit-for-bit identical due to e.g. delta encoding
2893 differences.
2900 differences.
2894
2901
2895 The ``deltareuse`` argument control how deltas from the existing revlog
2902 The ``deltareuse`` argument control how deltas from the existing revlog
2896 are preserved in the destination revlog. The argument can have the
2903 are preserved in the destination revlog. The argument can have the
2897 following values:
2904 following values:
2898
2905
2899 DELTAREUSEALWAYS
2906 DELTAREUSEALWAYS
2900 Deltas will always be reused (if possible), even if the destination
2907 Deltas will always be reused (if possible), even if the destination
2901 revlog would not select the same revisions for the delta. This is the
2908 revlog would not select the same revisions for the delta. This is the
2902 fastest mode of operation.
2909 fastest mode of operation.
2903 DELTAREUSESAMEREVS
2910 DELTAREUSESAMEREVS
2904 Deltas will be reused if the destination revlog would pick the same
2911 Deltas will be reused if the destination revlog would pick the same
2905 revisions for the delta. This mode strikes a balance between speed
2912 revisions for the delta. This mode strikes a balance between speed
2906 and optimization.
2913 and optimization.
2907 DELTAREUSENEVER
2914 DELTAREUSENEVER
2908 Deltas will never be reused. This is the slowest mode of execution.
2915 Deltas will never be reused. This is the slowest mode of execution.
2909 This mode can be used to recompute deltas (e.g. if the diff/delta
2916 This mode can be used to recompute deltas (e.g. if the diff/delta
2910 algorithm changes).
2917 algorithm changes).
2911 DELTAREUSEFULLADD
2918 DELTAREUSEFULLADD
2912 Revision will be re-added as if their were new content. This is
2919 Revision will be re-added as if their were new content. This is
2913 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2920 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2914 eg: large file detection and handling.
2921 eg: large file detection and handling.
2915
2922
2916 Delta computation can be slow, so the choice of delta reuse policy can
2923 Delta computation can be slow, so the choice of delta reuse policy can
2917 significantly affect run time.
2924 significantly affect run time.
2918
2925
2919 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2926 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2920 two extremes. Deltas will be reused if they are appropriate. But if the
2927 two extremes. Deltas will be reused if they are appropriate. But if the
2921 delta could choose a better revision, it will do so. This means if you
2928 delta could choose a better revision, it will do so. This means if you
2922 are converting a non-generaldelta revlog to a generaldelta revlog,
2929 are converting a non-generaldelta revlog to a generaldelta revlog,
2923 deltas will be recomputed if the delta's parent isn't a parent of the
2930 deltas will be recomputed if the delta's parent isn't a parent of the
2924 revision.
2931 revision.
2925
2932
2926 In addition to the delta policy, the ``forcedeltabothparents``
2933 In addition to the delta policy, the ``forcedeltabothparents``
2927 argument controls whether to force compute deltas against both parents
2934 argument controls whether to force compute deltas against both parents
2928 for merges. By default, the current default is used.
2935 for merges. By default, the current default is used.
2929
2936
2930 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2937 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2931 `sidedata_helpers`.
2938 `sidedata_helpers`.
2932 """
2939 """
2933 if deltareuse not in self.DELTAREUSEALL:
2940 if deltareuse not in self.DELTAREUSEALL:
2934 raise ValueError(
2941 raise ValueError(
2935 _(b'value for deltareuse invalid: %s') % deltareuse
2942 _(b'value for deltareuse invalid: %s') % deltareuse
2936 )
2943 )
2937
2944
2938 if len(destrevlog):
2945 if len(destrevlog):
2939 raise ValueError(_(b'destination revlog is not empty'))
2946 raise ValueError(_(b'destination revlog is not empty'))
2940
2947
2941 if getattr(self, 'filteredrevs', None):
2948 if getattr(self, 'filteredrevs', None):
2942 raise ValueError(_(b'source revlog has filtered revisions'))
2949 raise ValueError(_(b'source revlog has filtered revisions'))
2943 if getattr(destrevlog, 'filteredrevs', None):
2950 if getattr(destrevlog, 'filteredrevs', None):
2944 raise ValueError(_(b'destination revlog has filtered revisions'))
2951 raise ValueError(_(b'destination revlog has filtered revisions'))
2945
2952
2946 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2953 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2947 # if possible.
2954 # if possible.
2948 oldlazydelta = destrevlog._lazydelta
2955 oldlazydelta = destrevlog._lazydelta
2949 oldlazydeltabase = destrevlog._lazydeltabase
2956 oldlazydeltabase = destrevlog._lazydeltabase
2950 oldamd = destrevlog._deltabothparents
2957 oldamd = destrevlog._deltabothparents
2951
2958
2952 try:
2959 try:
2953 if deltareuse == self.DELTAREUSEALWAYS:
2960 if deltareuse == self.DELTAREUSEALWAYS:
2954 destrevlog._lazydeltabase = True
2961 destrevlog._lazydeltabase = True
2955 destrevlog._lazydelta = True
2962 destrevlog._lazydelta = True
2956 elif deltareuse == self.DELTAREUSESAMEREVS:
2963 elif deltareuse == self.DELTAREUSESAMEREVS:
2957 destrevlog._lazydeltabase = False
2964 destrevlog._lazydeltabase = False
2958 destrevlog._lazydelta = True
2965 destrevlog._lazydelta = True
2959 elif deltareuse == self.DELTAREUSENEVER:
2966 elif deltareuse == self.DELTAREUSENEVER:
2960 destrevlog._lazydeltabase = False
2967 destrevlog._lazydeltabase = False
2961 destrevlog._lazydelta = False
2968 destrevlog._lazydelta = False
2962
2969
2963 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2970 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2964
2971
2965 self._clone(
2972 self._clone(
2966 tr,
2973 tr,
2967 destrevlog,
2974 destrevlog,
2968 addrevisioncb,
2975 addrevisioncb,
2969 deltareuse,
2976 deltareuse,
2970 forcedeltabothparents,
2977 forcedeltabothparents,
2971 sidedata_helpers,
2978 sidedata_helpers,
2972 )
2979 )
2973
2980
2974 finally:
2981 finally:
2975 destrevlog._lazydelta = oldlazydelta
2982 destrevlog._lazydelta = oldlazydelta
2976 destrevlog._lazydeltabase = oldlazydeltabase
2983 destrevlog._lazydeltabase = oldlazydeltabase
2977 destrevlog._deltabothparents = oldamd
2984 destrevlog._deltabothparents = oldamd
2978
2985
2979 def _clone(
2986 def _clone(
2980 self,
2987 self,
2981 tr,
2988 tr,
2982 destrevlog,
2989 destrevlog,
2983 addrevisioncb,
2990 addrevisioncb,
2984 deltareuse,
2991 deltareuse,
2985 forcedeltabothparents,
2992 forcedeltabothparents,
2986 sidedata_helpers,
2993 sidedata_helpers,
2987 ):
2994 ):
2988 """perform the core duty of `revlog.clone` after parameter processing"""
2995 """perform the core duty of `revlog.clone` after parameter processing"""
2989 deltacomputer = deltautil.deltacomputer(destrevlog)
2996 deltacomputer = deltautil.deltacomputer(destrevlog)
2990 index = self.index
2997 index = self.index
2991 for rev in self:
2998 for rev in self:
2992 entry = index[rev]
2999 entry = index[rev]
2993
3000
2994 # Some classes override linkrev to take filtered revs into
3001 # Some classes override linkrev to take filtered revs into
2995 # account. Use raw entry from index.
3002 # account. Use raw entry from index.
2996 flags = entry[0] & 0xFFFF
3003 flags = entry[0] & 0xFFFF
2997 linkrev = entry[4]
3004 linkrev = entry[4]
2998 p1 = index[entry[5]][7]
3005 p1 = index[entry[5]][7]
2999 p2 = index[entry[6]][7]
3006 p2 = index[entry[6]][7]
3000 node = entry[7]
3007 node = entry[7]
3001
3008
3002 # (Possibly) reuse the delta from the revlog if allowed and
3009 # (Possibly) reuse the delta from the revlog if allowed and
3003 # the revlog chunk is a delta.
3010 # the revlog chunk is a delta.
3004 cachedelta = None
3011 cachedelta = None
3005 rawtext = None
3012 rawtext = None
3006 if deltareuse == self.DELTAREUSEFULLADD:
3013 if deltareuse == self.DELTAREUSEFULLADD:
3007 text = self._revisiondata(rev)
3014 text = self._revisiondata(rev)
3008 sidedata = self.sidedata(rev)
3015 sidedata = self.sidedata(rev)
3009
3016
3010 if sidedata_helpers is not None:
3017 if sidedata_helpers is not None:
3011 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3018 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3012 self, sidedata_helpers, sidedata, rev
3019 self, sidedata_helpers, sidedata, rev
3013 )
3020 )
3014 flags = flags | new_flags[0] & ~new_flags[1]
3021 flags = flags | new_flags[0] & ~new_flags[1]
3015
3022
3016 destrevlog.addrevision(
3023 destrevlog.addrevision(
3017 text,
3024 text,
3018 tr,
3025 tr,
3019 linkrev,
3026 linkrev,
3020 p1,
3027 p1,
3021 p2,
3028 p2,
3022 cachedelta=cachedelta,
3029 cachedelta=cachedelta,
3023 node=node,
3030 node=node,
3024 flags=flags,
3031 flags=flags,
3025 deltacomputer=deltacomputer,
3032 deltacomputer=deltacomputer,
3026 sidedata=sidedata,
3033 sidedata=sidedata,
3027 )
3034 )
3028 else:
3035 else:
3029 if destrevlog._lazydelta:
3036 if destrevlog._lazydelta:
3030 dp = self.deltaparent(rev)
3037 dp = self.deltaparent(rev)
3031 if dp != nullrev:
3038 if dp != nullrev:
3032 cachedelta = (dp, bytes(self._chunk(rev)))
3039 cachedelta = (dp, bytes(self._chunk(rev)))
3033
3040
3034 sidedata = None
3041 sidedata = None
3035 if not cachedelta:
3042 if not cachedelta:
3036 rawtext = self._revisiondata(rev)
3043 rawtext = self._revisiondata(rev)
3037 sidedata = self.sidedata(rev)
3044 sidedata = self.sidedata(rev)
3038 if sidedata is None:
3045 if sidedata is None:
3039 sidedata = self.sidedata(rev)
3046 sidedata = self.sidedata(rev)
3040
3047
3041 if sidedata_helpers is not None:
3048 if sidedata_helpers is not None:
3042 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3049 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3043 self, sidedata_helpers, sidedata, rev
3050 self, sidedata_helpers, sidedata, rev
3044 )
3051 )
3045 flags = flags | new_flags[0] & ~new_flags[1]
3052 flags = flags | new_flags[0] & ~new_flags[1]
3046
3053
3047 with destrevlog._writing(tr):
3054 with destrevlog._writing(tr):
3048 destrevlog._addrevision(
3055 destrevlog._addrevision(
3049 node,
3056 node,
3050 rawtext,
3057 rawtext,
3051 tr,
3058 tr,
3052 linkrev,
3059 linkrev,
3053 p1,
3060 p1,
3054 p2,
3061 p2,
3055 flags,
3062 flags,
3056 cachedelta,
3063 cachedelta,
3057 deltacomputer=deltacomputer,
3064 deltacomputer=deltacomputer,
3058 sidedata=sidedata,
3065 sidedata=sidedata,
3059 )
3066 )
3060
3067
3061 if addrevisioncb:
3068 if addrevisioncb:
3062 addrevisioncb(self, rev, node)
3069 addrevisioncb(self, rev, node)
3063
3070
3064 def censorrevision(self, tr, censornode, tombstone=b''):
3071 def censorrevision(self, tr, censornode, tombstone=b''):
3065 if self._format_version == REVLOGV0:
3072 if self._format_version == REVLOGV0:
3066 raise error.RevlogError(
3073 raise error.RevlogError(
3067 _(b'cannot censor with version %d revlogs')
3074 _(b'cannot censor with version %d revlogs')
3068 % self._format_version
3075 % self._format_version
3069 )
3076 )
3070 elif self._format_version == REVLOGV1:
3077 elif self._format_version == REVLOGV1:
3071 censor.v1_censor(self, tr, censornode, tombstone)
3078 censor.v1_censor(self, tr, censornode, tombstone)
3072 else:
3079 else:
3073 censor.v2_censor(self, tr, censornode, tombstone)
3080 censor.v2_censor(self, tr, censornode, tombstone)
3074
3081
3075 def verifyintegrity(self, state):
3082 def verifyintegrity(self, state):
3076 """Verifies the integrity of the revlog.
3083 """Verifies the integrity of the revlog.
3077
3084
3078 Yields ``revlogproblem`` instances describing problems that are
3085 Yields ``revlogproblem`` instances describing problems that are
3079 found.
3086 found.
3080 """
3087 """
3081 dd, di = self.checksize()
3088 dd, di = self.checksize()
3082 if dd:
3089 if dd:
3083 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3090 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3084 if di:
3091 if di:
3085 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3092 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3086
3093
3087 version = self._format_version
3094 version = self._format_version
3088
3095
3089 # The verifier tells us what version revlog we should be.
3096 # The verifier tells us what version revlog we should be.
3090 if version != state[b'expectedversion']:
3097 if version != state[b'expectedversion']:
3091 yield revlogproblem(
3098 yield revlogproblem(
3092 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3099 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3093 % (self.display_id, version, state[b'expectedversion'])
3100 % (self.display_id, version, state[b'expectedversion'])
3094 )
3101 )
3095
3102
3096 state[b'skipread'] = set()
3103 state[b'skipread'] = set()
3097 state[b'safe_renamed'] = set()
3104 state[b'safe_renamed'] = set()
3098
3105
3099 for rev in self:
3106 for rev in self:
3100 node = self.node(rev)
3107 node = self.node(rev)
3101
3108
3102 # Verify contents. 4 cases to care about:
3109 # Verify contents. 4 cases to care about:
3103 #
3110 #
3104 # common: the most common case
3111 # common: the most common case
3105 # rename: with a rename
3112 # rename: with a rename
3106 # meta: file content starts with b'\1\n', the metadata
3113 # meta: file content starts with b'\1\n', the metadata
3107 # header defined in filelog.py, but without a rename
3114 # header defined in filelog.py, but without a rename
3108 # ext: content stored externally
3115 # ext: content stored externally
3109 #
3116 #
3110 # More formally, their differences are shown below:
3117 # More formally, their differences are shown below:
3111 #
3118 #
3112 # | common | rename | meta | ext
3119 # | common | rename | meta | ext
3113 # -------------------------------------------------------
3120 # -------------------------------------------------------
3114 # flags() | 0 | 0 | 0 | not 0
3121 # flags() | 0 | 0 | 0 | not 0
3115 # renamed() | False | True | False | ?
3122 # renamed() | False | True | False | ?
3116 # rawtext[0:2]=='\1\n'| False | True | True | ?
3123 # rawtext[0:2]=='\1\n'| False | True | True | ?
3117 #
3124 #
3118 # "rawtext" means the raw text stored in revlog data, which
3125 # "rawtext" means the raw text stored in revlog data, which
3119 # could be retrieved by "rawdata(rev)". "text"
3126 # could be retrieved by "rawdata(rev)". "text"
3120 # mentioned below is "revision(rev)".
3127 # mentioned below is "revision(rev)".
3121 #
3128 #
3122 # There are 3 different lengths stored physically:
3129 # There are 3 different lengths stored physically:
3123 # 1. L1: rawsize, stored in revlog index
3130 # 1. L1: rawsize, stored in revlog index
3124 # 2. L2: len(rawtext), stored in revlog data
3131 # 2. L2: len(rawtext), stored in revlog data
3125 # 3. L3: len(text), stored in revlog data if flags==0, or
3132 # 3. L3: len(text), stored in revlog data if flags==0, or
3126 # possibly somewhere else if flags!=0
3133 # possibly somewhere else if flags!=0
3127 #
3134 #
3128 # L1 should be equal to L2. L3 could be different from them.
3135 # L1 should be equal to L2. L3 could be different from them.
3129 # "text" may or may not affect commit hash depending on flag
3136 # "text" may or may not affect commit hash depending on flag
3130 # processors (see flagutil.addflagprocessor).
3137 # processors (see flagutil.addflagprocessor).
3131 #
3138 #
3132 # | common | rename | meta | ext
3139 # | common | rename | meta | ext
3133 # -------------------------------------------------
3140 # -------------------------------------------------
3134 # rawsize() | L1 | L1 | L1 | L1
3141 # rawsize() | L1 | L1 | L1 | L1
3135 # size() | L1 | L2-LM | L1(*) | L1 (?)
3142 # size() | L1 | L2-LM | L1(*) | L1 (?)
3136 # len(rawtext) | L2 | L2 | L2 | L2
3143 # len(rawtext) | L2 | L2 | L2 | L2
3137 # len(text) | L2 | L2 | L2 | L3
3144 # len(text) | L2 | L2 | L2 | L3
3138 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3145 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3139 #
3146 #
3140 # LM: length of metadata, depending on rawtext
3147 # LM: length of metadata, depending on rawtext
3141 # (*): not ideal, see comment in filelog.size
3148 # (*): not ideal, see comment in filelog.size
3142 # (?): could be "- len(meta)" if the resolved content has
3149 # (?): could be "- len(meta)" if the resolved content has
3143 # rename metadata
3150 # rename metadata
3144 #
3151 #
3145 # Checks needed to be done:
3152 # Checks needed to be done:
3146 # 1. length check: L1 == L2, in all cases.
3153 # 1. length check: L1 == L2, in all cases.
3147 # 2. hash check: depending on flag processor, we may need to
3154 # 2. hash check: depending on flag processor, we may need to
3148 # use either "text" (external), or "rawtext" (in revlog).
3155 # use either "text" (external), or "rawtext" (in revlog).
3149
3156
3150 try:
3157 try:
3151 skipflags = state.get(b'skipflags', 0)
3158 skipflags = state.get(b'skipflags', 0)
3152 if skipflags:
3159 if skipflags:
3153 skipflags &= self.flags(rev)
3160 skipflags &= self.flags(rev)
3154
3161
3155 _verify_revision(self, skipflags, state, node)
3162 _verify_revision(self, skipflags, state, node)
3156
3163
3157 l1 = self.rawsize(rev)
3164 l1 = self.rawsize(rev)
3158 l2 = len(self.rawdata(node))
3165 l2 = len(self.rawdata(node))
3159
3166
3160 if l1 != l2:
3167 if l1 != l2:
3161 yield revlogproblem(
3168 yield revlogproblem(
3162 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3169 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3163 node=node,
3170 node=node,
3164 )
3171 )
3165
3172
3166 except error.CensoredNodeError:
3173 except error.CensoredNodeError:
3167 if state[b'erroroncensored']:
3174 if state[b'erroroncensored']:
3168 yield revlogproblem(
3175 yield revlogproblem(
3169 error=_(b'censored file data'), node=node
3176 error=_(b'censored file data'), node=node
3170 )
3177 )
3171 state[b'skipread'].add(node)
3178 state[b'skipread'].add(node)
3172 except Exception as e:
3179 except Exception as e:
3173 yield revlogproblem(
3180 yield revlogproblem(
3174 error=_(b'unpacking %s: %s')
3181 error=_(b'unpacking %s: %s')
3175 % (short(node), stringutil.forcebytestr(e)),
3182 % (short(node), stringutil.forcebytestr(e)),
3176 node=node,
3183 node=node,
3177 )
3184 )
3178 state[b'skipread'].add(node)
3185 state[b'skipread'].add(node)
3179
3186
3180 def storageinfo(
3187 def storageinfo(
3181 self,
3188 self,
3182 exclusivefiles=False,
3189 exclusivefiles=False,
3183 sharedfiles=False,
3190 sharedfiles=False,
3184 revisionscount=False,
3191 revisionscount=False,
3185 trackedsize=False,
3192 trackedsize=False,
3186 storedsize=False,
3193 storedsize=False,
3187 ):
3194 ):
3188 d = {}
3195 d = {}
3189
3196
3190 if exclusivefiles:
3197 if exclusivefiles:
3191 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3198 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3192 if not self._inline:
3199 if not self._inline:
3193 d[b'exclusivefiles'].append((self.opener, self._datafile))
3200 d[b'exclusivefiles'].append((self.opener, self._datafile))
3194
3201
3195 if sharedfiles:
3202 if sharedfiles:
3196 d[b'sharedfiles'] = []
3203 d[b'sharedfiles'] = []
3197
3204
3198 if revisionscount:
3205 if revisionscount:
3199 d[b'revisionscount'] = len(self)
3206 d[b'revisionscount'] = len(self)
3200
3207
3201 if trackedsize:
3208 if trackedsize:
3202 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3209 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3203
3210
3204 if storedsize:
3211 if storedsize:
3205 d[b'storedsize'] = sum(
3212 d[b'storedsize'] = sum(
3206 self.opener.stat(path).st_size for path in self.files()
3213 self.opener.stat(path).st_size for path in self.files()
3207 )
3214 )
3208
3215
3209 return d
3216 return d
3210
3217
3211 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3218 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3212 if not self.hassidedata:
3219 if not self.hassidedata:
3213 return
3220 return
3214 # revlog formats with sidedata support does not support inline
3221 # revlog formats with sidedata support does not support inline
3215 assert not self._inline
3222 assert not self._inline
3216 if not helpers[1] and not helpers[2]:
3223 if not helpers[1] and not helpers[2]:
3217 # Nothing to generate or remove
3224 # Nothing to generate or remove
3218 return
3225 return
3219
3226
3220 new_entries = []
3227 new_entries = []
3221 # append the new sidedata
3228 # append the new sidedata
3222 with self._writing(transaction):
3229 with self._writing(transaction):
3223 ifh, dfh, sdfh = self._writinghandles
3230 ifh, dfh, sdfh = self._writinghandles
3224 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3231 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3225
3232
3226 current_offset = sdfh.tell()
3233 current_offset = sdfh.tell()
3227 for rev in range(startrev, endrev + 1):
3234 for rev in range(startrev, endrev + 1):
3228 entry = self.index[rev]
3235 entry = self.index[rev]
3229 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3236 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3230 store=self,
3237 store=self,
3231 sidedata_helpers=helpers,
3238 sidedata_helpers=helpers,
3232 sidedata={},
3239 sidedata={},
3233 rev=rev,
3240 rev=rev,
3234 )
3241 )
3235
3242
3236 serialized_sidedata = sidedatautil.serialize_sidedata(
3243 serialized_sidedata = sidedatautil.serialize_sidedata(
3237 new_sidedata
3244 new_sidedata
3238 )
3245 )
3239
3246
3240 sidedata_compression_mode = COMP_MODE_INLINE
3247 sidedata_compression_mode = COMP_MODE_INLINE
3241 if serialized_sidedata and self.hassidedata:
3248 if serialized_sidedata and self.hassidedata:
3242 sidedata_compression_mode = COMP_MODE_PLAIN
3249 sidedata_compression_mode = COMP_MODE_PLAIN
3243 h, comp_sidedata = self.compress(serialized_sidedata)
3250 h, comp_sidedata = self.compress(serialized_sidedata)
3244 if (
3251 if (
3245 h != b'u'
3252 h != b'u'
3246 and comp_sidedata[0] != b'\0'
3253 and comp_sidedata[0] != b'\0'
3247 and len(comp_sidedata) < len(serialized_sidedata)
3254 and len(comp_sidedata) < len(serialized_sidedata)
3248 ):
3255 ):
3249 assert not h
3256 assert not h
3250 if (
3257 if (
3251 comp_sidedata[0]
3258 comp_sidedata[0]
3252 == self._docket.default_compression_header
3259 == self._docket.default_compression_header
3253 ):
3260 ):
3254 sidedata_compression_mode = COMP_MODE_DEFAULT
3261 sidedata_compression_mode = COMP_MODE_DEFAULT
3255 serialized_sidedata = comp_sidedata
3262 serialized_sidedata = comp_sidedata
3256 else:
3263 else:
3257 sidedata_compression_mode = COMP_MODE_INLINE
3264 sidedata_compression_mode = COMP_MODE_INLINE
3258 serialized_sidedata = comp_sidedata
3265 serialized_sidedata = comp_sidedata
3259 if entry[8] != 0 or entry[9] != 0:
3266 if entry[8] != 0 or entry[9] != 0:
3260 # rewriting entries that already have sidedata is not
3267 # rewriting entries that already have sidedata is not
3261 # supported yet, because it introduces garbage data in the
3268 # supported yet, because it introduces garbage data in the
3262 # revlog.
3269 # revlog.
3263 msg = b"rewriting existing sidedata is not supported yet"
3270 msg = b"rewriting existing sidedata is not supported yet"
3264 raise error.Abort(msg)
3271 raise error.Abort(msg)
3265
3272
3266 # Apply (potential) flags to add and to remove after running
3273 # Apply (potential) flags to add and to remove after running
3267 # the sidedata helpers
3274 # the sidedata helpers
3268 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3275 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3269 entry_update = (
3276 entry_update = (
3270 current_offset,
3277 current_offset,
3271 len(serialized_sidedata),
3278 len(serialized_sidedata),
3272 new_offset_flags,
3279 new_offset_flags,
3273 sidedata_compression_mode,
3280 sidedata_compression_mode,
3274 )
3281 )
3275
3282
3276 # the sidedata computation might have move the file cursors around
3283 # the sidedata computation might have move the file cursors around
3277 sdfh.seek(current_offset, os.SEEK_SET)
3284 sdfh.seek(current_offset, os.SEEK_SET)
3278 sdfh.write(serialized_sidedata)
3285 sdfh.write(serialized_sidedata)
3279 new_entries.append(entry_update)
3286 new_entries.append(entry_update)
3280 current_offset += len(serialized_sidedata)
3287 current_offset += len(serialized_sidedata)
3281 self._docket.sidedata_end = sdfh.tell()
3288 self._docket.sidedata_end = sdfh.tell()
3282
3289
3283 # rewrite the new index entries
3290 # rewrite the new index entries
3284 ifh.seek(startrev * self.index.entry_size)
3291 ifh.seek(startrev * self.index.entry_size)
3285 for i, e in enumerate(new_entries):
3292 for i, e in enumerate(new_entries):
3286 rev = startrev + i
3293 rev = startrev + i
3287 self.index.replace_sidedata_info(rev, *e)
3294 self.index.replace_sidedata_info(rev, *e)
3288 packed = self.index.entry_binary(rev)
3295 packed = self.index.entry_binary(rev)
3289 if rev == 0 and self._docket is None:
3296 if rev == 0 and self._docket is None:
3290 header = self._format_flags | self._format_version
3297 header = self._format_flags | self._format_version
3291 header = self.index.pack_header(header)
3298 header = self.index.pack_header(header)
3292 packed = header + packed
3299 packed = header + packed
3293 ifh.write(packed)
3300 ifh.write(packed)
@@ -1,138 +1,159 b''
1 # Copyright Mercurial Contributors
1 # Copyright Mercurial Contributors
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 import contextlib
6 import contextlib
7
7
8 from ..i18n import _
8 from ..i18n import _
9 from .. import (
9 from .. import (
10 error,
10 error,
11 util,
11 util,
12 )
12 )
13
13
14
14
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16
16
17 PARTIAL_READ_MSG = _(
17 PARTIAL_READ_MSG = _(
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 )
19 )
20
20
21
21
22 def _is_power_of_two(n):
22 def _is_power_of_two(n):
23 return (n & (n - 1) == 0) and n != 0
23 return (n & (n - 1) == 0) and n != 0
24
24
25
25
26 class randomaccessfile(object):
26 class randomaccessfile(object):
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28
28
29 def __init__(
29 def __init__(
30 self,
30 self,
31 opener,
31 opener,
32 filename,
32 filename,
33 default_cached_chunk_size,
33 default_cached_chunk_size,
34 initial_cache=None,
34 initial_cache=None,
35 ):
35 ):
36 # Required by bitwise manipulation below
36 # Required by bitwise manipulation below
37 assert _is_power_of_two(default_cached_chunk_size)
37 assert _is_power_of_two(default_cached_chunk_size)
38
38
39 self.opener = opener
39 self.opener = opener
40 self.filename = filename
40 self.filename = filename
41 self.default_cached_chunk_size = default_cached_chunk_size
41 self.default_cached_chunk_size = default_cached_chunk_size
42 self.writing_handle = None # This is set from revlog.py
42 self.writing_handle = None # This is set from revlog.py
43 self.reading_handle = None
43 self._cached_chunk = b''
44 self._cached_chunk = b''
44 self._cached_chunk_position = 0 # Offset from the start of the file
45 self._cached_chunk_position = 0 # Offset from the start of the file
45 if initial_cache:
46 if initial_cache:
46 self._cached_chunk_position, self._cached_chunk = initial_cache
47 self._cached_chunk_position, self._cached_chunk = initial_cache
47
48
48 def clear_cache(self):
49 def clear_cache(self):
49 self._cached_chunk = b''
50 self._cached_chunk = b''
50 self._cached_chunk_position = 0
51 self._cached_chunk_position = 0
51
52
52 def _open(self, mode=b'r'):
53 def _open(self, mode=b'r'):
53 """Return a file object"""
54 """Return a file object"""
54 return self.opener(self.filename, mode=mode)
55 return self.opener(self.filename, mode=mode)
55
56
56 @contextlib.contextmanager
57 @contextlib.contextmanager
57 def _open_read(self, existing_file_obj=None):
58 def _open_read(self, existing_file_obj=None):
58 """File object suitable for reading data"""
59 """File object suitable for reading data"""
59 # Use explicit file handle, if given.
60 # Use explicit file handle, if given.
60 if existing_file_obj is not None:
61 if existing_file_obj is not None:
61 yield existing_file_obj
62 yield existing_file_obj
62
63
63 # Use a file handle being actively used for writes, if available.
64 # Use a file handle being actively used for writes, if available.
64 # There is some danger to doing this because reads will seek the
65 # There is some danger to doing this because reads will seek the
65 # file. However, revlog._writeentry performs a SEEK_END before all
66 # file. However, revlog._writeentry performs a SEEK_END before all
66 # writes, so we should be safe.
67 # writes, so we should be safe.
67 elif self.writing_handle:
68 elif self.writing_handle:
68 yield self.writing_handle
69 yield self.writing_handle
69
70
71 elif self.reading_handle:
72 yield self.reading_handle
73
70 # Otherwise open a new file handle.
74 # Otherwise open a new file handle.
71 else:
75 else:
72 with self._open() as fp:
76 with self._open() as fp:
73 yield fp
77 yield fp
74
78
79 @contextlib.contextmanager
80 def reading(self):
81 """Context manager that keeps the file open for reading"""
82 if (
83 self.reading_handle is None
84 and self.writing_handle is None
85 and self.filename is not None
86 ):
87 with self._open() as fp:
88 self.reading_handle = fp
89 try:
90 yield
91 finally:
92 self.reading_handle = None
93 else:
94 yield
95
75 def read_chunk(self, offset, length, existing_file_obj=None):
96 def read_chunk(self, offset, length, existing_file_obj=None):
76 """Read a chunk of bytes from the file.
97 """Read a chunk of bytes from the file.
77
98
78 Accepts an absolute offset, length to read, and an optional existing
99 Accepts an absolute offset, length to read, and an optional existing
79 file handle to read from.
100 file handle to read from.
80
101
81 If an existing file handle is passed, it will be seeked and the
102 If an existing file handle is passed, it will be seeked and the
82 original seek position will NOT be restored.
103 original seek position will NOT be restored.
83
104
84 Returns a str or buffer of raw byte data.
105 Returns a str or buffer of raw byte data.
85
106
86 Raises if the requested number of bytes could not be read.
107 Raises if the requested number of bytes could not be read.
87 """
108 """
88 end = offset + length
109 end = offset + length
89 cache_start = self._cached_chunk_position
110 cache_start = self._cached_chunk_position
90 cache_end = cache_start + len(self._cached_chunk)
111 cache_end = cache_start + len(self._cached_chunk)
91 # Is the requested chunk within the cache?
112 # Is the requested chunk within the cache?
92 if cache_start <= offset and end <= cache_end:
113 if cache_start <= offset and end <= cache_end:
93 if cache_start == offset and end == cache_end:
114 if cache_start == offset and end == cache_end:
94 return self._cached_chunk # avoid a copy
115 return self._cached_chunk # avoid a copy
95 relative_start = offset - cache_start
116 relative_start = offset - cache_start
96 return util.buffer(self._cached_chunk, relative_start, length)
117 return util.buffer(self._cached_chunk, relative_start, length)
97
118
98 return self._read_and_update_cache(offset, length, existing_file_obj)
119 return self._read_and_update_cache(offset, length, existing_file_obj)
99
120
100 def _read_and_update_cache(self, offset, length, existing_file_obj=None):
121 def _read_and_update_cache(self, offset, length, existing_file_obj=None):
101 # Cache data both forward and backward around the requested
122 # Cache data both forward and backward around the requested
102 # data, in a fixed size window. This helps speed up operations
123 # data, in a fixed size window. This helps speed up operations
103 # involving reading the revlog backwards.
124 # involving reading the revlog backwards.
104 real_offset = offset & ~(self.default_cached_chunk_size - 1)
125 real_offset = offset & ~(self.default_cached_chunk_size - 1)
105 real_length = (
126 real_length = (
106 (offset + length + self.default_cached_chunk_size)
127 (offset + length + self.default_cached_chunk_size)
107 & ~(self.default_cached_chunk_size - 1)
128 & ~(self.default_cached_chunk_size - 1)
108 ) - real_offset
129 ) - real_offset
109 with self._open_read(existing_file_obj) as file_obj:
130 with self._open_read(existing_file_obj) as file_obj:
110 file_obj.seek(real_offset)
131 file_obj.seek(real_offset)
111 data = file_obj.read(real_length)
132 data = file_obj.read(real_length)
112
133
113 self._add_cached_chunk(real_offset, data)
134 self._add_cached_chunk(real_offset, data)
114
135
115 relative_offset = offset - real_offset
136 relative_offset = offset - real_offset
116 got = len(data) - relative_offset
137 got = len(data) - relative_offset
117 if got < length:
138 if got < length:
118 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
139 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
119 raise error.RevlogError(message)
140 raise error.RevlogError(message)
120
141
121 if offset != real_offset or real_length != length:
142 if offset != real_offset or real_length != length:
122 return util.buffer(data, relative_offset, length)
143 return util.buffer(data, relative_offset, length)
123 return data
144 return data
124
145
125 def _add_cached_chunk(self, offset, data):
146 def _add_cached_chunk(self, offset, data):
126 """Add to or replace the cached data chunk.
147 """Add to or replace the cached data chunk.
127
148
128 Accepts an absolute offset and the data that is at that location.
149 Accepts an absolute offset and the data that is at that location.
129 """
150 """
130 if (
151 if (
131 self._cached_chunk_position + len(self._cached_chunk) == offset
152 self._cached_chunk_position + len(self._cached_chunk) == offset
132 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
153 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
133 ):
154 ):
134 # add to existing cache
155 # add to existing cache
135 self._cached_chunk += data
156 self._cached_chunk += data
136 else:
157 else:
137 self._cached_chunk = data
158 self._cached_chunk = data
138 self._cached_chunk_position = offset
159 self._cached_chunk_position = offset
General Comments 0
You need to be logged in to leave comments. Login now