##// END OF EJS Templates
revlog: move _getsegmentforrevs on the internal object...
marmoute -
r51980:df50a159 default
parent child Browse files
Show More
@@ -1,4626 +1,4633 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance
2 '''helper extension to measure performance
3
3
4 Configurations
4 Configurations
5 ==============
5 ==============
6
6
7 ``perf``
7 ``perf``
8 --------
8 --------
9
9
10 ``all-timing``
10 ``all-timing``
11 When set, additional statistics will be reported for each benchmark: best,
11 When set, additional statistics will be reported for each benchmark: best,
12 worst, median average. If not set only the best timing is reported
12 worst, median average. If not set only the best timing is reported
13 (default: off).
13 (default: off).
14
14
15 ``presleep``
15 ``presleep``
16 number of second to wait before any group of runs (default: 1)
16 number of second to wait before any group of runs (default: 1)
17
17
18 ``pre-run``
18 ``pre-run``
19 number of run to perform before starting measurement.
19 number of run to perform before starting measurement.
20
20
21 ``profile-benchmark``
21 ``profile-benchmark``
22 Enable profiling for the benchmarked section.
22 Enable profiling for the benchmarked section.
23 (The first iteration is benchmarked)
23 (The first iteration is benchmarked)
24
24
25 ``run-limits``
25 ``run-limits``
26 Control the number of runs each benchmark will perform. The option value
26 Control the number of runs each benchmark will perform. The option value
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 conditions are considered in order with the following logic:
28 conditions are considered in order with the following logic:
29
29
30 If benchmark has been running for <time> seconds, and we have performed
30 If benchmark has been running for <time> seconds, and we have performed
31 <numberofrun> iterations, stop the benchmark,
31 <numberofrun> iterations, stop the benchmark,
32
32
33 The default value is: `3.0-100, 10.0-3`
33 The default value is: `3.0-100, 10.0-3`
34
34
35 ``stub``
35 ``stub``
36 When set, benchmarks will only be run once, useful for testing
36 When set, benchmarks will only be run once, useful for testing
37 (default: off)
37 (default: off)
38 '''
38 '''
39
39
40 # "historical portability" policy of perf.py:
40 # "historical portability" policy of perf.py:
41 #
41 #
42 # We have to do:
42 # We have to do:
43 # - make perf.py "loadable" with as wide Mercurial version as possible
43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 # This doesn't mean that perf commands work correctly with that Mercurial.
44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 # - make historical perf command work correctly with as wide Mercurial
46 # - make historical perf command work correctly with as wide Mercurial
47 # version as possible
47 # version as possible
48 #
48 #
49 # We have to do, if possible with reasonable cost:
49 # We have to do, if possible with reasonable cost:
50 # - make recent perf command for historical feature work correctly
50 # - make recent perf command for historical feature work correctly
51 # with early Mercurial
51 # with early Mercurial
52 #
52 #
53 # We don't have to do:
53 # We don't have to do:
54 # - make perf command for recent feature work correctly with early
54 # - make perf command for recent feature work correctly with early
55 # Mercurial
55 # Mercurial
56
56
57 import contextlib
57 import contextlib
58 import functools
58 import functools
59 import gc
59 import gc
60 import os
60 import os
61 import random
61 import random
62 import shutil
62 import shutil
63 import struct
63 import struct
64 import sys
64 import sys
65 import tempfile
65 import tempfile
66 import threading
66 import threading
67 import time
67 import time
68
68
69 import mercurial.revlog
69 import mercurial.revlog
70 from mercurial import (
70 from mercurial import (
71 changegroup,
71 changegroup,
72 cmdutil,
72 cmdutil,
73 commands,
73 commands,
74 copies,
74 copies,
75 error,
75 error,
76 extensions,
76 extensions,
77 hg,
77 hg,
78 mdiff,
78 mdiff,
79 merge,
79 merge,
80 util,
80 util,
81 )
81 )
82
82
83 # for "historical portability":
83 # for "historical portability":
84 # try to import modules separately (in dict order), and ignore
84 # try to import modules separately (in dict order), and ignore
85 # failure, because these aren't available with early Mercurial
85 # failure, because these aren't available with early Mercurial
86 try:
86 try:
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 except ImportError:
88 except ImportError:
89 pass
89 pass
90 try:
90 try:
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 except ImportError:
92 except ImportError:
93 pass
93 pass
94 try:
94 try:
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96
96
97 dir(registrar) # forcibly load it
97 dir(registrar) # forcibly load it
98 except ImportError:
98 except ImportError:
99 registrar = None
99 registrar = None
100 try:
100 try:
101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 except ImportError:
102 except ImportError:
103 pass
103 pass
104 try:
104 try:
105 from mercurial.utils import repoviewutil # since 5.0
105 from mercurial.utils import repoviewutil # since 5.0
106 except ImportError:
106 except ImportError:
107 repoviewutil = None
107 repoviewutil = None
108 try:
108 try:
109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 except ImportError:
110 except ImportError:
111 pass
111 pass
112 try:
112 try:
113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 except ImportError:
114 except ImportError:
115 pass
115 pass
116
116
117 try:
117 try:
118 from mercurial import profiling
118 from mercurial import profiling
119 except ImportError:
119 except ImportError:
120 profiling = None
120 profiling = None
121
121
122 try:
122 try:
123 from mercurial.revlogutils import constants as revlog_constants
123 from mercurial.revlogutils import constants as revlog_constants
124
124
125 perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
125 perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
126
126
127 def revlog(opener, *args, **kwargs):
127 def revlog(opener, *args, **kwargs):
128 return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
128 return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
129
129
130
130
131 except (ImportError, AttributeError):
131 except (ImportError, AttributeError):
132 perf_rl_kind = None
132 perf_rl_kind = None
133
133
134 def revlog(opener, *args, **kwargs):
134 def revlog(opener, *args, **kwargs):
135 return mercurial.revlog.revlog(opener, *args, **kwargs)
135 return mercurial.revlog.revlog(opener, *args, **kwargs)
136
136
137
137
138 def identity(a):
138 def identity(a):
139 return a
139 return a
140
140
141
141
142 try:
142 try:
143 from mercurial import pycompat
143 from mercurial import pycompat
144
144
145 getargspec = pycompat.getargspec # added to module after 4.5
145 getargspec = pycompat.getargspec # added to module after 4.5
146 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
146 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
147 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
147 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
148 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
148 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
149 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
149 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
150 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
150 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
151 if pycompat.ispy3:
151 if pycompat.ispy3:
152 _maxint = sys.maxsize # per py3 docs for replacing maxint
152 _maxint = sys.maxsize # per py3 docs for replacing maxint
153 else:
153 else:
154 _maxint = sys.maxint
154 _maxint = sys.maxint
155 except (NameError, ImportError, AttributeError):
155 except (NameError, ImportError, AttributeError):
156 import inspect
156 import inspect
157
157
158 getargspec = inspect.getargspec
158 getargspec = inspect.getargspec
159 _byteskwargs = identity
159 _byteskwargs = identity
160 _bytestr = str
160 _bytestr = str
161 fsencode = identity # no py3 support
161 fsencode = identity # no py3 support
162 _maxint = sys.maxint # no py3 support
162 _maxint = sys.maxint # no py3 support
163 _sysstr = lambda x: x # no py3 support
163 _sysstr = lambda x: x # no py3 support
164 _xrange = xrange
164 _xrange = xrange
165
165
166 try:
166 try:
167 # 4.7+
167 # 4.7+
168 queue = pycompat.queue.Queue
168 queue = pycompat.queue.Queue
169 except (NameError, AttributeError, ImportError):
169 except (NameError, AttributeError, ImportError):
170 # <4.7.
170 # <4.7.
171 try:
171 try:
172 queue = pycompat.queue
172 queue = pycompat.queue
173 except (NameError, AttributeError, ImportError):
173 except (NameError, AttributeError, ImportError):
174 import Queue as queue
174 import Queue as queue
175
175
176 try:
176 try:
177 from mercurial import logcmdutil
177 from mercurial import logcmdutil
178
178
179 makelogtemplater = logcmdutil.maketemplater
179 makelogtemplater = logcmdutil.maketemplater
180 except (AttributeError, ImportError):
180 except (AttributeError, ImportError):
181 try:
181 try:
182 makelogtemplater = cmdutil.makelogtemplater
182 makelogtemplater = cmdutil.makelogtemplater
183 except (AttributeError, ImportError):
183 except (AttributeError, ImportError):
184 makelogtemplater = None
184 makelogtemplater = None
185
185
186 # for "historical portability":
186 # for "historical portability":
187 # define util.safehasattr forcibly, because util.safehasattr has been
187 # define util.safehasattr forcibly, because util.safehasattr has been
188 # available since 1.9.3 (or 94b200a11cf7)
188 # available since 1.9.3 (or 94b200a11cf7)
189 _undefined = object()
189 _undefined = object()
190
190
191
191
192 def safehasattr(thing, attr):
192 def safehasattr(thing, attr):
193 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
193 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
194
194
195
195
196 setattr(util, 'safehasattr', safehasattr)
196 setattr(util, 'safehasattr', safehasattr)
197
197
198 # for "historical portability":
198 # for "historical portability":
199 # define util.timer forcibly, because util.timer has been available
199 # define util.timer forcibly, because util.timer has been available
200 # since ae5d60bb70c9
200 # since ae5d60bb70c9
201 if safehasattr(time, 'perf_counter'):
201 if safehasattr(time, 'perf_counter'):
202 util.timer = time.perf_counter
202 util.timer = time.perf_counter
203 elif os.name == b'nt':
203 elif os.name == b'nt':
204 util.timer = time.clock
204 util.timer = time.clock
205 else:
205 else:
206 util.timer = time.time
206 util.timer = time.time
207
207
208 # for "historical portability":
208 # for "historical portability":
209 # use locally defined empty option list, if formatteropts isn't
209 # use locally defined empty option list, if formatteropts isn't
210 # available, because commands.formatteropts has been available since
210 # available, because commands.formatteropts has been available since
211 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
211 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
212 # available since 2.2 (or ae5f92e154d3)
212 # available since 2.2 (or ae5f92e154d3)
213 formatteropts = getattr(
213 formatteropts = getattr(
214 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
214 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
215 )
215 )
216
216
217 # for "historical portability":
217 # for "historical portability":
218 # use locally defined option list, if debugrevlogopts isn't available,
218 # use locally defined option list, if debugrevlogopts isn't available,
219 # because commands.debugrevlogopts has been available since 3.7 (or
219 # because commands.debugrevlogopts has been available since 3.7 (or
220 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
220 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
221 # since 1.9 (or a79fea6b3e77).
221 # since 1.9 (or a79fea6b3e77).
222 revlogopts = getattr(
222 revlogopts = getattr(
223 cmdutil,
223 cmdutil,
224 "debugrevlogopts",
224 "debugrevlogopts",
225 getattr(
225 getattr(
226 commands,
226 commands,
227 "debugrevlogopts",
227 "debugrevlogopts",
228 [
228 [
229 (b'c', b'changelog', False, b'open changelog'),
229 (b'c', b'changelog', False, b'open changelog'),
230 (b'm', b'manifest', False, b'open manifest'),
230 (b'm', b'manifest', False, b'open manifest'),
231 (b'', b'dir', False, b'open directory manifest'),
231 (b'', b'dir', False, b'open directory manifest'),
232 ],
232 ],
233 ),
233 ),
234 )
234 )
235
235
236 cmdtable = {}
236 cmdtable = {}
237
237
238
238
239 # for "historical portability":
239 # for "historical portability":
240 # define parsealiases locally, because cmdutil.parsealiases has been
240 # define parsealiases locally, because cmdutil.parsealiases has been
241 # available since 1.5 (or 6252852b4332)
241 # available since 1.5 (or 6252852b4332)
242 def parsealiases(cmd):
242 def parsealiases(cmd):
243 return cmd.split(b"|")
243 return cmd.split(b"|")
244
244
245
245
246 if safehasattr(registrar, 'command'):
246 if safehasattr(registrar, 'command'):
247 command = registrar.command(cmdtable)
247 command = registrar.command(cmdtable)
248 elif safehasattr(cmdutil, 'command'):
248 elif safehasattr(cmdutil, 'command'):
249 command = cmdutil.command(cmdtable)
249 command = cmdutil.command(cmdtable)
250 if 'norepo' not in getargspec(command).args:
250 if 'norepo' not in getargspec(command).args:
251 # for "historical portability":
251 # for "historical portability":
252 # wrap original cmdutil.command, because "norepo" option has
252 # wrap original cmdutil.command, because "norepo" option has
253 # been available since 3.1 (or 75a96326cecb)
253 # been available since 3.1 (or 75a96326cecb)
254 _command = command
254 _command = command
255
255
256 def command(name, options=(), synopsis=None, norepo=False):
256 def command(name, options=(), synopsis=None, norepo=False):
257 if norepo:
257 if norepo:
258 commands.norepo += b' %s' % b' '.join(parsealiases(name))
258 commands.norepo += b' %s' % b' '.join(parsealiases(name))
259 return _command(name, list(options), synopsis)
259 return _command(name, list(options), synopsis)
260
260
261
261
262 else:
262 else:
263 # for "historical portability":
263 # for "historical portability":
264 # define "@command" annotation locally, because cmdutil.command
264 # define "@command" annotation locally, because cmdutil.command
265 # has been available since 1.9 (or 2daa5179e73f)
265 # has been available since 1.9 (or 2daa5179e73f)
266 def command(name, options=(), synopsis=None, norepo=False):
266 def command(name, options=(), synopsis=None, norepo=False):
267 def decorator(func):
267 def decorator(func):
268 if synopsis:
268 if synopsis:
269 cmdtable[name] = func, list(options), synopsis
269 cmdtable[name] = func, list(options), synopsis
270 else:
270 else:
271 cmdtable[name] = func, list(options)
271 cmdtable[name] = func, list(options)
272 if norepo:
272 if norepo:
273 commands.norepo += b' %s' % b' '.join(parsealiases(name))
273 commands.norepo += b' %s' % b' '.join(parsealiases(name))
274 return func
274 return func
275
275
276 return decorator
276 return decorator
277
277
278
278
279 try:
279 try:
280 import mercurial.registrar
280 import mercurial.registrar
281 import mercurial.configitems
281 import mercurial.configitems
282
282
283 configtable = {}
283 configtable = {}
284 configitem = mercurial.registrar.configitem(configtable)
284 configitem = mercurial.registrar.configitem(configtable)
285 configitem(
285 configitem(
286 b'perf',
286 b'perf',
287 b'presleep',
287 b'presleep',
288 default=mercurial.configitems.dynamicdefault,
288 default=mercurial.configitems.dynamicdefault,
289 experimental=True,
289 experimental=True,
290 )
290 )
291 configitem(
291 configitem(
292 b'perf',
292 b'perf',
293 b'stub',
293 b'stub',
294 default=mercurial.configitems.dynamicdefault,
294 default=mercurial.configitems.dynamicdefault,
295 experimental=True,
295 experimental=True,
296 )
296 )
297 configitem(
297 configitem(
298 b'perf',
298 b'perf',
299 b'parentscount',
299 b'parentscount',
300 default=mercurial.configitems.dynamicdefault,
300 default=mercurial.configitems.dynamicdefault,
301 experimental=True,
301 experimental=True,
302 )
302 )
303 configitem(
303 configitem(
304 b'perf',
304 b'perf',
305 b'all-timing',
305 b'all-timing',
306 default=mercurial.configitems.dynamicdefault,
306 default=mercurial.configitems.dynamicdefault,
307 experimental=True,
307 experimental=True,
308 )
308 )
309 configitem(
309 configitem(
310 b'perf',
310 b'perf',
311 b'pre-run',
311 b'pre-run',
312 default=mercurial.configitems.dynamicdefault,
312 default=mercurial.configitems.dynamicdefault,
313 )
313 )
314 configitem(
314 configitem(
315 b'perf',
315 b'perf',
316 b'profile-benchmark',
316 b'profile-benchmark',
317 default=mercurial.configitems.dynamicdefault,
317 default=mercurial.configitems.dynamicdefault,
318 )
318 )
319 configitem(
319 configitem(
320 b'perf',
320 b'perf',
321 b'run-limits',
321 b'run-limits',
322 default=mercurial.configitems.dynamicdefault,
322 default=mercurial.configitems.dynamicdefault,
323 experimental=True,
323 experimental=True,
324 )
324 )
325 except (ImportError, AttributeError):
325 except (ImportError, AttributeError):
326 pass
326 pass
327 except TypeError:
327 except TypeError:
328 # compatibility fix for a11fd395e83f
328 # compatibility fix for a11fd395e83f
329 # hg version: 5.2
329 # hg version: 5.2
330 configitem(
330 configitem(
331 b'perf',
331 b'perf',
332 b'presleep',
332 b'presleep',
333 default=mercurial.configitems.dynamicdefault,
333 default=mercurial.configitems.dynamicdefault,
334 )
334 )
335 configitem(
335 configitem(
336 b'perf',
336 b'perf',
337 b'stub',
337 b'stub',
338 default=mercurial.configitems.dynamicdefault,
338 default=mercurial.configitems.dynamicdefault,
339 )
339 )
340 configitem(
340 configitem(
341 b'perf',
341 b'perf',
342 b'parentscount',
342 b'parentscount',
343 default=mercurial.configitems.dynamicdefault,
343 default=mercurial.configitems.dynamicdefault,
344 )
344 )
345 configitem(
345 configitem(
346 b'perf',
346 b'perf',
347 b'all-timing',
347 b'all-timing',
348 default=mercurial.configitems.dynamicdefault,
348 default=mercurial.configitems.dynamicdefault,
349 )
349 )
350 configitem(
350 configitem(
351 b'perf',
351 b'perf',
352 b'pre-run',
352 b'pre-run',
353 default=mercurial.configitems.dynamicdefault,
353 default=mercurial.configitems.dynamicdefault,
354 )
354 )
355 configitem(
355 configitem(
356 b'perf',
356 b'perf',
357 b'profile-benchmark',
357 b'profile-benchmark',
358 default=mercurial.configitems.dynamicdefault,
358 default=mercurial.configitems.dynamicdefault,
359 )
359 )
360 configitem(
360 configitem(
361 b'perf',
361 b'perf',
362 b'run-limits',
362 b'run-limits',
363 default=mercurial.configitems.dynamicdefault,
363 default=mercurial.configitems.dynamicdefault,
364 )
364 )
365
365
366
366
367 def getlen(ui):
367 def getlen(ui):
368 if ui.configbool(b"perf", b"stub", False):
368 if ui.configbool(b"perf", b"stub", False):
369 return lambda x: 1
369 return lambda x: 1
370 return len
370 return len
371
371
372
372
373 class noop:
373 class noop:
374 """dummy context manager"""
374 """dummy context manager"""
375
375
376 def __enter__(self):
376 def __enter__(self):
377 pass
377 pass
378
378
379 def __exit__(self, *args):
379 def __exit__(self, *args):
380 pass
380 pass
381
381
382
382
383 NOOPCTX = noop()
383 NOOPCTX = noop()
384
384
385
385
386 def gettimer(ui, opts=None):
386 def gettimer(ui, opts=None):
387 """return a timer function and formatter: (timer, formatter)
387 """return a timer function and formatter: (timer, formatter)
388
388
389 This function exists to gather the creation of formatter in a single
389 This function exists to gather the creation of formatter in a single
390 place instead of duplicating it in all performance commands."""
390 place instead of duplicating it in all performance commands."""
391
391
392 # enforce an idle period before execution to counteract power management
392 # enforce an idle period before execution to counteract power management
393 # experimental config: perf.presleep
393 # experimental config: perf.presleep
394 time.sleep(getint(ui, b"perf", b"presleep", 1))
394 time.sleep(getint(ui, b"perf", b"presleep", 1))
395
395
396 if opts is None:
396 if opts is None:
397 opts = {}
397 opts = {}
398 # redirect all to stderr unless buffer api is in use
398 # redirect all to stderr unless buffer api is in use
399 if not ui._buffers:
399 if not ui._buffers:
400 ui = ui.copy()
400 ui = ui.copy()
401 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
401 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
402 if uifout:
402 if uifout:
403 # for "historical portability":
403 # for "historical portability":
404 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
404 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
405 uifout.set(ui.ferr)
405 uifout.set(ui.ferr)
406
406
407 # get a formatter
407 # get a formatter
408 uiformatter = getattr(ui, 'formatter', None)
408 uiformatter = getattr(ui, 'formatter', None)
409 if uiformatter:
409 if uiformatter:
410 fm = uiformatter(b'perf', opts)
410 fm = uiformatter(b'perf', opts)
411 else:
411 else:
412 # for "historical portability":
412 # for "historical portability":
413 # define formatter locally, because ui.formatter has been
413 # define formatter locally, because ui.formatter has been
414 # available since 2.2 (or ae5f92e154d3)
414 # available since 2.2 (or ae5f92e154d3)
415 from mercurial import node
415 from mercurial import node
416
416
417 class defaultformatter:
417 class defaultformatter:
418 """Minimized composition of baseformatter and plainformatter"""
418 """Minimized composition of baseformatter and plainformatter"""
419
419
420 def __init__(self, ui, topic, opts):
420 def __init__(self, ui, topic, opts):
421 self._ui = ui
421 self._ui = ui
422 if ui.debugflag:
422 if ui.debugflag:
423 self.hexfunc = node.hex
423 self.hexfunc = node.hex
424 else:
424 else:
425 self.hexfunc = node.short
425 self.hexfunc = node.short
426
426
427 def __nonzero__(self):
427 def __nonzero__(self):
428 return False
428 return False
429
429
430 __bool__ = __nonzero__
430 __bool__ = __nonzero__
431
431
432 def startitem(self):
432 def startitem(self):
433 pass
433 pass
434
434
435 def data(self, **data):
435 def data(self, **data):
436 pass
436 pass
437
437
438 def write(self, fields, deftext, *fielddata, **opts):
438 def write(self, fields, deftext, *fielddata, **opts):
439 self._ui.write(deftext % fielddata, **opts)
439 self._ui.write(deftext % fielddata, **opts)
440
440
441 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
441 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
442 if cond:
442 if cond:
443 self._ui.write(deftext % fielddata, **opts)
443 self._ui.write(deftext % fielddata, **opts)
444
444
445 def plain(self, text, **opts):
445 def plain(self, text, **opts):
446 self._ui.write(text, **opts)
446 self._ui.write(text, **opts)
447
447
448 def end(self):
448 def end(self):
449 pass
449 pass
450
450
451 fm = defaultformatter(ui, b'perf', opts)
451 fm = defaultformatter(ui, b'perf', opts)
452
452
453 # stub function, runs code only once instead of in a loop
453 # stub function, runs code only once instead of in a loop
454 # experimental config: perf.stub
454 # experimental config: perf.stub
455 if ui.configbool(b"perf", b"stub", False):
455 if ui.configbool(b"perf", b"stub", False):
456 return functools.partial(stub_timer, fm), fm
456 return functools.partial(stub_timer, fm), fm
457
457
458 # experimental config: perf.all-timing
458 # experimental config: perf.all-timing
459 displayall = ui.configbool(b"perf", b"all-timing", True)
459 displayall = ui.configbool(b"perf", b"all-timing", True)
460
460
461 # experimental config: perf.run-limits
461 # experimental config: perf.run-limits
462 limitspec = ui.configlist(b"perf", b"run-limits", [])
462 limitspec = ui.configlist(b"perf", b"run-limits", [])
463 limits = []
463 limits = []
464 for item in limitspec:
464 for item in limitspec:
465 parts = item.split(b'-', 1)
465 parts = item.split(b'-', 1)
466 if len(parts) < 2:
466 if len(parts) < 2:
467 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
467 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
468 continue
468 continue
469 try:
469 try:
470 time_limit = float(_sysstr(parts[0]))
470 time_limit = float(_sysstr(parts[0]))
471 except ValueError as e:
471 except ValueError as e:
472 ui.warn(
472 ui.warn(
473 (
473 (
474 b'malformatted run limit entry, %s: %s\n'
474 b'malformatted run limit entry, %s: %s\n'
475 % (_bytestr(e), item)
475 % (_bytestr(e), item)
476 )
476 )
477 )
477 )
478 continue
478 continue
479 try:
479 try:
480 run_limit = int(_sysstr(parts[1]))
480 run_limit = int(_sysstr(parts[1]))
481 except ValueError as e:
481 except ValueError as e:
482 ui.warn(
482 ui.warn(
483 (
483 (
484 b'malformatted run limit entry, %s: %s\n'
484 b'malformatted run limit entry, %s: %s\n'
485 % (_bytestr(e), item)
485 % (_bytestr(e), item)
486 )
486 )
487 )
487 )
488 continue
488 continue
489 limits.append((time_limit, run_limit))
489 limits.append((time_limit, run_limit))
490 if not limits:
490 if not limits:
491 limits = DEFAULTLIMITS
491 limits = DEFAULTLIMITS
492
492
493 profiler = None
493 profiler = None
494 if profiling is not None:
494 if profiling is not None:
495 if ui.configbool(b"perf", b"profile-benchmark", False):
495 if ui.configbool(b"perf", b"profile-benchmark", False):
496 profiler = profiling.profile(ui)
496 profiler = profiling.profile(ui)
497
497
498 prerun = getint(ui, b"perf", b"pre-run", 0)
498 prerun = getint(ui, b"perf", b"pre-run", 0)
499 t = functools.partial(
499 t = functools.partial(
500 _timer,
500 _timer,
501 fm,
501 fm,
502 displayall=displayall,
502 displayall=displayall,
503 limits=limits,
503 limits=limits,
504 prerun=prerun,
504 prerun=prerun,
505 profiler=profiler,
505 profiler=profiler,
506 )
506 )
507 return t, fm
507 return t, fm
508
508
509
509
510 def stub_timer(fm, func, setup=None, title=None):
510 def stub_timer(fm, func, setup=None, title=None):
511 if setup is not None:
511 if setup is not None:
512 setup()
512 setup()
513 func()
513 func()
514
514
515
515
516 @contextlib.contextmanager
516 @contextlib.contextmanager
517 def timeone():
517 def timeone():
518 r = []
518 r = []
519 ostart = os.times()
519 ostart = os.times()
520 cstart = util.timer()
520 cstart = util.timer()
521 yield r
521 yield r
522 cstop = util.timer()
522 cstop = util.timer()
523 ostop = os.times()
523 ostop = os.times()
524 a, b = ostart, ostop
524 a, b = ostart, ostop
525 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
525 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
526
526
527
527
528 # list of stop condition (elapsed time, minimal run count)
528 # list of stop condition (elapsed time, minimal run count)
529 DEFAULTLIMITS = (
529 DEFAULTLIMITS = (
530 (3.0, 100),
530 (3.0, 100),
531 (10.0, 3),
531 (10.0, 3),
532 )
532 )
533
533
534
534
535 @contextlib.contextmanager
535 @contextlib.contextmanager
536 def noop_context():
536 def noop_context():
537 yield
537 yield
538
538
539
539
540 def _timer(
540 def _timer(
541 fm,
541 fm,
542 func,
542 func,
543 setup=None,
543 setup=None,
544 context=noop_context,
544 context=noop_context,
545 title=None,
545 title=None,
546 displayall=False,
546 displayall=False,
547 limits=DEFAULTLIMITS,
547 limits=DEFAULTLIMITS,
548 prerun=0,
548 prerun=0,
549 profiler=None,
549 profiler=None,
550 ):
550 ):
551 gc.collect()
551 gc.collect()
552 results = []
552 results = []
553 begin = util.timer()
553 begin = util.timer()
554 count = 0
554 count = 0
555 if profiler is None:
555 if profiler is None:
556 profiler = NOOPCTX
556 profiler = NOOPCTX
557 for i in range(prerun):
557 for i in range(prerun):
558 if setup is not None:
558 if setup is not None:
559 setup()
559 setup()
560 with context():
560 with context():
561 func()
561 func()
562 keepgoing = True
562 keepgoing = True
563 while keepgoing:
563 while keepgoing:
564 if setup is not None:
564 if setup is not None:
565 setup()
565 setup()
566 with context():
566 with context():
567 with profiler:
567 with profiler:
568 with timeone() as item:
568 with timeone() as item:
569 r = func()
569 r = func()
570 profiler = NOOPCTX
570 profiler = NOOPCTX
571 count += 1
571 count += 1
572 results.append(item[0])
572 results.append(item[0])
573 cstop = util.timer()
573 cstop = util.timer()
574 # Look for a stop condition.
574 # Look for a stop condition.
575 elapsed = cstop - begin
575 elapsed = cstop - begin
576 for t, mincount in limits:
576 for t, mincount in limits:
577 if elapsed >= t and count >= mincount:
577 if elapsed >= t and count >= mincount:
578 keepgoing = False
578 keepgoing = False
579 break
579 break
580
580
581 formatone(fm, results, title=title, result=r, displayall=displayall)
581 formatone(fm, results, title=title, result=r, displayall=displayall)
582
582
583
583
584 def formatone(fm, timings, title=None, result=None, displayall=False):
584 def formatone(fm, timings, title=None, result=None, displayall=False):
585 count = len(timings)
585 count = len(timings)
586
586
587 fm.startitem()
587 fm.startitem()
588
588
589 if title:
589 if title:
590 fm.write(b'title', b'! %s\n', title)
590 fm.write(b'title', b'! %s\n', title)
591 if result:
591 if result:
592 fm.write(b'result', b'! result: %s\n', result)
592 fm.write(b'result', b'! result: %s\n', result)
593
593
594 def display(role, entry):
594 def display(role, entry):
595 prefix = b''
595 prefix = b''
596 if role != b'best':
596 if role != b'best':
597 prefix = b'%s.' % role
597 prefix = b'%s.' % role
598 fm.plain(b'!')
598 fm.plain(b'!')
599 fm.write(prefix + b'wall', b' wall %f', entry[0])
599 fm.write(prefix + b'wall', b' wall %f', entry[0])
600 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
600 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
601 fm.write(prefix + b'user', b' user %f', entry[1])
601 fm.write(prefix + b'user', b' user %f', entry[1])
602 fm.write(prefix + b'sys', b' sys %f', entry[2])
602 fm.write(prefix + b'sys', b' sys %f', entry[2])
603 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
603 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
604 fm.plain(b'\n')
604 fm.plain(b'\n')
605
605
606 timings.sort()
606 timings.sort()
607 min_val = timings[0]
607 min_val = timings[0]
608 display(b'best', min_val)
608 display(b'best', min_val)
609 if displayall:
609 if displayall:
610 max_val = timings[-1]
610 max_val = timings[-1]
611 display(b'max', max_val)
611 display(b'max', max_val)
612 avg = tuple([sum(x) / count for x in zip(*timings)])
612 avg = tuple([sum(x) / count for x in zip(*timings)])
613 display(b'avg', avg)
613 display(b'avg', avg)
614 median = timings[len(timings) // 2]
614 median = timings[len(timings) // 2]
615 display(b'median', median)
615 display(b'median', median)
616
616
617
617
618 # utilities for historical portability
618 # utilities for historical portability
619
619
620
620
621 def getint(ui, section, name, default):
621 def getint(ui, section, name, default):
622 # for "historical portability":
622 # for "historical portability":
623 # ui.configint has been available since 1.9 (or fa2b596db182)
623 # ui.configint has been available since 1.9 (or fa2b596db182)
624 v = ui.config(section, name, None)
624 v = ui.config(section, name, None)
625 if v is None:
625 if v is None:
626 return default
626 return default
627 try:
627 try:
628 return int(v)
628 return int(v)
629 except ValueError:
629 except ValueError:
630 raise error.ConfigError(
630 raise error.ConfigError(
631 b"%s.%s is not an integer ('%s')" % (section, name, v)
631 b"%s.%s is not an integer ('%s')" % (section, name, v)
632 )
632 )
633
633
634
634
635 def safeattrsetter(obj, name, ignoremissing=False):
635 def safeattrsetter(obj, name, ignoremissing=False):
636 """Ensure that 'obj' has 'name' attribute before subsequent setattr
636 """Ensure that 'obj' has 'name' attribute before subsequent setattr
637
637
638 This function is aborted, if 'obj' doesn't have 'name' attribute
638 This function is aborted, if 'obj' doesn't have 'name' attribute
639 at runtime. This avoids overlooking removal of an attribute, which
639 at runtime. This avoids overlooking removal of an attribute, which
640 breaks assumption of performance measurement, in the future.
640 breaks assumption of performance measurement, in the future.
641
641
642 This function returns the object to (1) assign a new value, and
642 This function returns the object to (1) assign a new value, and
643 (2) restore an original value to the attribute.
643 (2) restore an original value to the attribute.
644
644
645 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
645 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
646 abortion, and this function returns None. This is useful to
646 abortion, and this function returns None. This is useful to
647 examine an attribute, which isn't ensured in all Mercurial
647 examine an attribute, which isn't ensured in all Mercurial
648 versions.
648 versions.
649 """
649 """
650 if not util.safehasattr(obj, name):
650 if not util.safehasattr(obj, name):
651 if ignoremissing:
651 if ignoremissing:
652 return None
652 return None
653 raise error.Abort(
653 raise error.Abort(
654 (
654 (
655 b"missing attribute %s of %s might break assumption"
655 b"missing attribute %s of %s might break assumption"
656 b" of performance measurement"
656 b" of performance measurement"
657 )
657 )
658 % (name, obj)
658 % (name, obj)
659 )
659 )
660
660
661 origvalue = getattr(obj, _sysstr(name))
661 origvalue = getattr(obj, _sysstr(name))
662
662
663 class attrutil:
663 class attrutil:
664 def set(self, newvalue):
664 def set(self, newvalue):
665 setattr(obj, _sysstr(name), newvalue)
665 setattr(obj, _sysstr(name), newvalue)
666
666
667 def restore(self):
667 def restore(self):
668 setattr(obj, _sysstr(name), origvalue)
668 setattr(obj, _sysstr(name), origvalue)
669
669
670 return attrutil()
670 return attrutil()
671
671
672
672
673 # utilities to examine each internal API changes
673 # utilities to examine each internal API changes
674
674
675
675
676 def getbranchmapsubsettable():
676 def getbranchmapsubsettable():
677 # for "historical portability":
677 # for "historical portability":
678 # subsettable is defined in:
678 # subsettable is defined in:
679 # - branchmap since 2.9 (or 175c6fd8cacc)
679 # - branchmap since 2.9 (or 175c6fd8cacc)
680 # - repoview since 2.5 (or 59a9f18d4587)
680 # - repoview since 2.5 (or 59a9f18d4587)
681 # - repoviewutil since 5.0
681 # - repoviewutil since 5.0
682 for mod in (branchmap, repoview, repoviewutil):
682 for mod in (branchmap, repoview, repoviewutil):
683 subsettable = getattr(mod, 'subsettable', None)
683 subsettable = getattr(mod, 'subsettable', None)
684 if subsettable:
684 if subsettable:
685 return subsettable
685 return subsettable
686
686
687 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
687 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
688 # branchmap and repoview modules exist, but subsettable attribute
688 # branchmap and repoview modules exist, but subsettable attribute
689 # doesn't)
689 # doesn't)
690 raise error.Abort(
690 raise error.Abort(
691 b"perfbranchmap not available with this Mercurial",
691 b"perfbranchmap not available with this Mercurial",
692 hint=b"use 2.5 or later",
692 hint=b"use 2.5 or later",
693 )
693 )
694
694
695
695
696 def getsvfs(repo):
696 def getsvfs(repo):
697 """Return appropriate object to access files under .hg/store"""
697 """Return appropriate object to access files under .hg/store"""
698 # for "historical portability":
698 # for "historical portability":
699 # repo.svfs has been available since 2.3 (or 7034365089bf)
699 # repo.svfs has been available since 2.3 (or 7034365089bf)
700 svfs = getattr(repo, 'svfs', None)
700 svfs = getattr(repo, 'svfs', None)
701 if svfs:
701 if svfs:
702 return svfs
702 return svfs
703 else:
703 else:
704 return getattr(repo, 'sopener')
704 return getattr(repo, 'sopener')
705
705
706
706
707 def getvfs(repo):
707 def getvfs(repo):
708 """Return appropriate object to access files under .hg"""
708 """Return appropriate object to access files under .hg"""
709 # for "historical portability":
709 # for "historical portability":
710 # repo.vfs has been available since 2.3 (or 7034365089bf)
710 # repo.vfs has been available since 2.3 (or 7034365089bf)
711 vfs = getattr(repo, 'vfs', None)
711 vfs = getattr(repo, 'vfs', None)
712 if vfs:
712 if vfs:
713 return vfs
713 return vfs
714 else:
714 else:
715 return getattr(repo, 'opener')
715 return getattr(repo, 'opener')
716
716
717
717
718 def repocleartagscachefunc(repo):
718 def repocleartagscachefunc(repo):
719 """Return the function to clear tags cache according to repo internal API"""
719 """Return the function to clear tags cache according to repo internal API"""
720 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
720 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
721 # in this case, setattr(repo, '_tagscache', None) or so isn't
721 # in this case, setattr(repo, '_tagscache', None) or so isn't
722 # correct way to clear tags cache, because existing code paths
722 # correct way to clear tags cache, because existing code paths
723 # expect _tagscache to be a structured object.
723 # expect _tagscache to be a structured object.
724 def clearcache():
724 def clearcache():
725 # _tagscache has been filteredpropertycache since 2.5 (or
725 # _tagscache has been filteredpropertycache since 2.5 (or
726 # 98c867ac1330), and delattr() can't work in such case
726 # 98c867ac1330), and delattr() can't work in such case
727 if '_tagscache' in vars(repo):
727 if '_tagscache' in vars(repo):
728 del repo.__dict__['_tagscache']
728 del repo.__dict__['_tagscache']
729
729
730 return clearcache
730 return clearcache
731
731
732 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
732 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
733 if repotags: # since 1.4 (or 5614a628d173)
733 if repotags: # since 1.4 (or 5614a628d173)
734 return lambda: repotags.set(None)
734 return lambda: repotags.set(None)
735
735
736 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
736 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
737 if repotagscache: # since 0.6 (or d7df759d0e97)
737 if repotagscache: # since 0.6 (or d7df759d0e97)
738 return lambda: repotagscache.set(None)
738 return lambda: repotagscache.set(None)
739
739
740 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
740 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
741 # this point, but it isn't so problematic, because:
741 # this point, but it isn't so problematic, because:
742 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
742 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
743 # in perftags() causes failure soon
743 # in perftags() causes failure soon
744 # - perf.py itself has been available since 1.1 (or eb240755386d)
744 # - perf.py itself has been available since 1.1 (or eb240755386d)
745 raise error.Abort(b"tags API of this hg command is unknown")
745 raise error.Abort(b"tags API of this hg command is unknown")
746
746
747
747
748 # utilities to clear cache
748 # utilities to clear cache
749
749
750
750
751 def clearfilecache(obj, attrname):
751 def clearfilecache(obj, attrname):
752 unfiltered = getattr(obj, 'unfiltered', None)
752 unfiltered = getattr(obj, 'unfiltered', None)
753 if unfiltered is not None:
753 if unfiltered is not None:
754 obj = obj.unfiltered()
754 obj = obj.unfiltered()
755 if attrname in vars(obj):
755 if attrname in vars(obj):
756 delattr(obj, attrname)
756 delattr(obj, attrname)
757 obj._filecache.pop(attrname, None)
757 obj._filecache.pop(attrname, None)
758
758
759
759
760 def clearchangelog(repo):
760 def clearchangelog(repo):
761 if repo is not repo.unfiltered():
761 if repo is not repo.unfiltered():
762 object.__setattr__(repo, '_clcachekey', None)
762 object.__setattr__(repo, '_clcachekey', None)
763 object.__setattr__(repo, '_clcache', None)
763 object.__setattr__(repo, '_clcache', None)
764 clearfilecache(repo.unfiltered(), 'changelog')
764 clearfilecache(repo.unfiltered(), 'changelog')
765
765
766
766
767 # perf commands
767 # perf commands
768
768
769
769
770 @command(b'perf::walk|perfwalk', formatteropts)
770 @command(b'perf::walk|perfwalk', formatteropts)
771 def perfwalk(ui, repo, *pats, **opts):
771 def perfwalk(ui, repo, *pats, **opts):
772 opts = _byteskwargs(opts)
772 opts = _byteskwargs(opts)
773 timer, fm = gettimer(ui, opts)
773 timer, fm = gettimer(ui, opts)
774 m = scmutil.match(repo[None], pats, {})
774 m = scmutil.match(repo[None], pats, {})
775 timer(
775 timer(
776 lambda: len(
776 lambda: len(
777 list(
777 list(
778 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
778 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
779 )
779 )
780 )
780 )
781 )
781 )
782 fm.end()
782 fm.end()
783
783
784
784
785 @command(b'perf::annotate|perfannotate', formatteropts)
785 @command(b'perf::annotate|perfannotate', formatteropts)
786 def perfannotate(ui, repo, f, **opts):
786 def perfannotate(ui, repo, f, **opts):
787 opts = _byteskwargs(opts)
787 opts = _byteskwargs(opts)
788 timer, fm = gettimer(ui, opts)
788 timer, fm = gettimer(ui, opts)
789 fc = repo[b'.'][f]
789 fc = repo[b'.'][f]
790 timer(lambda: len(fc.annotate(True)))
790 timer(lambda: len(fc.annotate(True)))
791 fm.end()
791 fm.end()
792
792
793
793
794 @command(
794 @command(
795 b'perf::status|perfstatus',
795 b'perf::status|perfstatus',
796 [
796 [
797 (b'u', b'unknown', False, b'ask status to look for unknown files'),
797 (b'u', b'unknown', False, b'ask status to look for unknown files'),
798 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
798 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
799 ]
799 ]
800 + formatteropts,
800 + formatteropts,
801 )
801 )
802 def perfstatus(ui, repo, **opts):
802 def perfstatus(ui, repo, **opts):
803 """benchmark the performance of a single status call
803 """benchmark the performance of a single status call
804
804
805 The repository data are preserved between each call.
805 The repository data are preserved between each call.
806
806
807 By default, only the status of the tracked file are requested. If
807 By default, only the status of the tracked file are requested. If
808 `--unknown` is passed, the "unknown" files are also tracked.
808 `--unknown` is passed, the "unknown" files are also tracked.
809 """
809 """
810 opts = _byteskwargs(opts)
810 opts = _byteskwargs(opts)
811 # m = match.always(repo.root, repo.getcwd())
811 # m = match.always(repo.root, repo.getcwd())
812 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
812 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
813 # False))))
813 # False))))
814 timer, fm = gettimer(ui, opts)
814 timer, fm = gettimer(ui, opts)
815 if opts[b'dirstate']:
815 if opts[b'dirstate']:
816 dirstate = repo.dirstate
816 dirstate = repo.dirstate
817 m = scmutil.matchall(repo)
817 m = scmutil.matchall(repo)
818 unknown = opts[b'unknown']
818 unknown = opts[b'unknown']
819
819
820 def status_dirstate():
820 def status_dirstate():
821 s = dirstate.status(
821 s = dirstate.status(
822 m, subrepos=[], ignored=False, clean=False, unknown=unknown
822 m, subrepos=[], ignored=False, clean=False, unknown=unknown
823 )
823 )
824 sum(map(bool, s))
824 sum(map(bool, s))
825
825
826 if util.safehasattr(dirstate, 'running_status'):
826 if util.safehasattr(dirstate, 'running_status'):
827 with dirstate.running_status(repo):
827 with dirstate.running_status(repo):
828 timer(status_dirstate)
828 timer(status_dirstate)
829 dirstate.invalidate()
829 dirstate.invalidate()
830 else:
830 else:
831 timer(status_dirstate)
831 timer(status_dirstate)
832 else:
832 else:
833 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
833 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
834 fm.end()
834 fm.end()
835
835
836
836
837 @command(b'perf::addremove|perfaddremove', formatteropts)
837 @command(b'perf::addremove|perfaddremove', formatteropts)
838 def perfaddremove(ui, repo, **opts):
838 def perfaddremove(ui, repo, **opts):
839 opts = _byteskwargs(opts)
839 opts = _byteskwargs(opts)
840 timer, fm = gettimer(ui, opts)
840 timer, fm = gettimer(ui, opts)
841 try:
841 try:
842 oldquiet = repo.ui.quiet
842 oldquiet = repo.ui.quiet
843 repo.ui.quiet = True
843 repo.ui.quiet = True
844 matcher = scmutil.match(repo[None])
844 matcher = scmutil.match(repo[None])
845 opts[b'dry_run'] = True
845 opts[b'dry_run'] = True
846 if 'uipathfn' in getargspec(scmutil.addremove).args:
846 if 'uipathfn' in getargspec(scmutil.addremove).args:
847 uipathfn = scmutil.getuipathfn(repo)
847 uipathfn = scmutil.getuipathfn(repo)
848 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
848 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
849 else:
849 else:
850 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
850 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
851 finally:
851 finally:
852 repo.ui.quiet = oldquiet
852 repo.ui.quiet = oldquiet
853 fm.end()
853 fm.end()
854
854
855
855
856 def clearcaches(cl):
856 def clearcaches(cl):
857 # behave somewhat consistently across internal API changes
857 # behave somewhat consistently across internal API changes
858 if util.safehasattr(cl, b'clearcaches'):
858 if util.safehasattr(cl, b'clearcaches'):
859 cl.clearcaches()
859 cl.clearcaches()
860 elif util.safehasattr(cl, b'_nodecache'):
860 elif util.safehasattr(cl, b'_nodecache'):
861 # <= hg-5.2
861 # <= hg-5.2
862 from mercurial.node import nullid, nullrev
862 from mercurial.node import nullid, nullrev
863
863
864 cl._nodecache = {nullid: nullrev}
864 cl._nodecache = {nullid: nullrev}
865 cl._nodepos = None
865 cl._nodepos = None
866
866
867
867
868 @command(b'perf::heads|perfheads', formatteropts)
868 @command(b'perf::heads|perfheads', formatteropts)
869 def perfheads(ui, repo, **opts):
869 def perfheads(ui, repo, **opts):
870 """benchmark the computation of a changelog heads"""
870 """benchmark the computation of a changelog heads"""
871 opts = _byteskwargs(opts)
871 opts = _byteskwargs(opts)
872 timer, fm = gettimer(ui, opts)
872 timer, fm = gettimer(ui, opts)
873 cl = repo.changelog
873 cl = repo.changelog
874
874
875 def s():
875 def s():
876 clearcaches(cl)
876 clearcaches(cl)
877
877
878 def d():
878 def d():
879 len(cl.headrevs())
879 len(cl.headrevs())
880
880
881 timer(d, setup=s)
881 timer(d, setup=s)
882 fm.end()
882 fm.end()
883
883
884
884
885 def _default_clear_on_disk_tags_cache(repo):
885 def _default_clear_on_disk_tags_cache(repo):
886 from mercurial import tags
886 from mercurial import tags
887
887
888 repo.cachevfs.tryunlink(tags._filename(repo))
888 repo.cachevfs.tryunlink(tags._filename(repo))
889
889
890
890
891 def _default_clear_on_disk_tags_fnodes_cache(repo):
891 def _default_clear_on_disk_tags_fnodes_cache(repo):
892 from mercurial import tags
892 from mercurial import tags
893
893
894 repo.cachevfs.tryunlink(tags._fnodescachefile)
894 repo.cachevfs.tryunlink(tags._fnodescachefile)
895
895
896
896
897 def _default_forget_fnodes(repo, revs):
897 def _default_forget_fnodes(repo, revs):
898 """function used by the perf extension to prune some entries from the
898 """function used by the perf extension to prune some entries from the
899 fnodes cache"""
899 fnodes cache"""
900 from mercurial import tags
900 from mercurial import tags
901
901
902 missing_1 = b'\xff' * 4
902 missing_1 = b'\xff' * 4
903 missing_2 = b'\xff' * 20
903 missing_2 = b'\xff' * 20
904 cache = tags.hgtagsfnodescache(repo.unfiltered())
904 cache = tags.hgtagsfnodescache(repo.unfiltered())
905 for r in revs:
905 for r in revs:
906 cache._writeentry(r * tags._fnodesrecsize, missing_1, missing_2)
906 cache._writeentry(r * tags._fnodesrecsize, missing_1, missing_2)
907 cache.write()
907 cache.write()
908
908
909
909
910 @command(
910 @command(
911 b'perf::tags|perftags',
911 b'perf::tags|perftags',
912 formatteropts
912 formatteropts
913 + [
913 + [
914 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
914 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
915 (
915 (
916 b'',
916 b'',
917 b'clear-on-disk-cache',
917 b'clear-on-disk-cache',
918 False,
918 False,
919 b'clear on disk tags cache (DESTRUCTIVE)',
919 b'clear on disk tags cache (DESTRUCTIVE)',
920 ),
920 ),
921 (
921 (
922 b'',
922 b'',
923 b'clear-fnode-cache-all',
923 b'clear-fnode-cache-all',
924 False,
924 False,
925 b'clear on disk file node cache (DESTRUCTIVE),',
925 b'clear on disk file node cache (DESTRUCTIVE),',
926 ),
926 ),
927 (
927 (
928 b'',
928 b'',
929 b'clear-fnode-cache-rev',
929 b'clear-fnode-cache-rev',
930 [],
930 [],
931 b'clear on disk file node cache (DESTRUCTIVE),',
931 b'clear on disk file node cache (DESTRUCTIVE),',
932 b'REVS',
932 b'REVS',
933 ),
933 ),
934 (
934 (
935 b'',
935 b'',
936 b'update-last',
936 b'update-last',
937 b'',
937 b'',
938 b'simulate an update over the last N revisions (DESTRUCTIVE),',
938 b'simulate an update over the last N revisions (DESTRUCTIVE),',
939 b'N',
939 b'N',
940 ),
940 ),
941 ],
941 ],
942 )
942 )
943 def perftags(ui, repo, **opts):
943 def perftags(ui, repo, **opts):
944 """Benchmark tags retrieval in various situation
944 """Benchmark tags retrieval in various situation
945
945
946 The option marked as (DESTRUCTIVE) will alter the on-disk cache, possibly
946 The option marked as (DESTRUCTIVE) will alter the on-disk cache, possibly
947 altering performance after the command was run. However, it does not
947 altering performance after the command was run. However, it does not
948 destroy any stored data.
948 destroy any stored data.
949 """
949 """
950 from mercurial import tags
950 from mercurial import tags
951
951
952 opts = _byteskwargs(opts)
952 opts = _byteskwargs(opts)
953 timer, fm = gettimer(ui, opts)
953 timer, fm = gettimer(ui, opts)
954 repocleartagscache = repocleartagscachefunc(repo)
954 repocleartagscache = repocleartagscachefunc(repo)
955 clearrevlogs = opts[b'clear_revlogs']
955 clearrevlogs = opts[b'clear_revlogs']
956 clear_disk = opts[b'clear_on_disk_cache']
956 clear_disk = opts[b'clear_on_disk_cache']
957 clear_fnode = opts[b'clear_fnode_cache_all']
957 clear_fnode = opts[b'clear_fnode_cache_all']
958
958
959 clear_fnode_revs = opts[b'clear_fnode_cache_rev']
959 clear_fnode_revs = opts[b'clear_fnode_cache_rev']
960 update_last_str = opts[b'update_last']
960 update_last_str = opts[b'update_last']
961 update_last = None
961 update_last = None
962 if update_last_str:
962 if update_last_str:
963 try:
963 try:
964 update_last = int(update_last_str)
964 update_last = int(update_last_str)
965 except ValueError:
965 except ValueError:
966 msg = b'could not parse value for update-last: "%s"'
966 msg = b'could not parse value for update-last: "%s"'
967 msg %= update_last_str
967 msg %= update_last_str
968 hint = b'value should be an integer'
968 hint = b'value should be an integer'
969 raise error.Abort(msg, hint=hint)
969 raise error.Abort(msg, hint=hint)
970
970
971 clear_disk_fn = getattr(
971 clear_disk_fn = getattr(
972 tags,
972 tags,
973 "clear_cache_on_disk",
973 "clear_cache_on_disk",
974 _default_clear_on_disk_tags_cache,
974 _default_clear_on_disk_tags_cache,
975 )
975 )
976 clear_fnodes_fn = getattr(
976 clear_fnodes_fn = getattr(
977 tags,
977 tags,
978 "clear_cache_fnodes",
978 "clear_cache_fnodes",
979 _default_clear_on_disk_tags_fnodes_cache,
979 _default_clear_on_disk_tags_fnodes_cache,
980 )
980 )
981 clear_fnodes_rev_fn = getattr(
981 clear_fnodes_rev_fn = getattr(
982 tags,
982 tags,
983 "forget_fnodes",
983 "forget_fnodes",
984 _default_forget_fnodes,
984 _default_forget_fnodes,
985 )
985 )
986
986
987 clear_revs = []
987 clear_revs = []
988 if clear_fnode_revs:
988 if clear_fnode_revs:
989 clear_revs.extends(scmutil.revrange(repo, clear_fnode_revs))
989 clear_revs.extends(scmutil.revrange(repo, clear_fnode_revs))
990
990
991 if update_last:
991 if update_last:
992 revset = b'last(all(), %d)' % update_last
992 revset = b'last(all(), %d)' % update_last
993 last_revs = repo.unfiltered().revs(revset)
993 last_revs = repo.unfiltered().revs(revset)
994 clear_revs.extend(last_revs)
994 clear_revs.extend(last_revs)
995
995
996 from mercurial import repoview
996 from mercurial import repoview
997
997
998 rev_filter = {(b'experimental', b'extra-filter-revs'): revset}
998 rev_filter = {(b'experimental', b'extra-filter-revs'): revset}
999 with repo.ui.configoverride(rev_filter, source=b"perf"):
999 with repo.ui.configoverride(rev_filter, source=b"perf"):
1000 filter_id = repoview.extrafilter(repo.ui)
1000 filter_id = repoview.extrafilter(repo.ui)
1001
1001
1002 filter_name = b'%s%%%s' % (repo.filtername, filter_id)
1002 filter_name = b'%s%%%s' % (repo.filtername, filter_id)
1003 pre_repo = repo.filtered(filter_name)
1003 pre_repo = repo.filtered(filter_name)
1004 pre_repo.tags() # warm the cache
1004 pre_repo.tags() # warm the cache
1005 old_tags_path = repo.cachevfs.join(tags._filename(pre_repo))
1005 old_tags_path = repo.cachevfs.join(tags._filename(pre_repo))
1006 new_tags_path = repo.cachevfs.join(tags._filename(repo))
1006 new_tags_path = repo.cachevfs.join(tags._filename(repo))
1007
1007
1008 clear_revs = sorted(set(clear_revs))
1008 clear_revs = sorted(set(clear_revs))
1009
1009
1010 def s():
1010 def s():
1011 if update_last:
1011 if update_last:
1012 util.copyfile(old_tags_path, new_tags_path)
1012 util.copyfile(old_tags_path, new_tags_path)
1013 if clearrevlogs:
1013 if clearrevlogs:
1014 clearchangelog(repo)
1014 clearchangelog(repo)
1015 clearfilecache(repo.unfiltered(), 'manifest')
1015 clearfilecache(repo.unfiltered(), 'manifest')
1016 if clear_disk:
1016 if clear_disk:
1017 clear_disk_fn(repo)
1017 clear_disk_fn(repo)
1018 if clear_fnode:
1018 if clear_fnode:
1019 clear_fnodes_fn(repo)
1019 clear_fnodes_fn(repo)
1020 elif clear_revs:
1020 elif clear_revs:
1021 clear_fnodes_rev_fn(repo, clear_revs)
1021 clear_fnodes_rev_fn(repo, clear_revs)
1022 repocleartagscache()
1022 repocleartagscache()
1023
1023
1024 def t():
1024 def t():
1025 len(repo.tags())
1025 len(repo.tags())
1026
1026
1027 timer(t, setup=s)
1027 timer(t, setup=s)
1028 fm.end()
1028 fm.end()
1029
1029
1030
1030
1031 @command(b'perf::ancestors|perfancestors', formatteropts)
1031 @command(b'perf::ancestors|perfancestors', formatteropts)
1032 def perfancestors(ui, repo, **opts):
1032 def perfancestors(ui, repo, **opts):
1033 opts = _byteskwargs(opts)
1033 opts = _byteskwargs(opts)
1034 timer, fm = gettimer(ui, opts)
1034 timer, fm = gettimer(ui, opts)
1035 heads = repo.changelog.headrevs()
1035 heads = repo.changelog.headrevs()
1036
1036
1037 def d():
1037 def d():
1038 for a in repo.changelog.ancestors(heads):
1038 for a in repo.changelog.ancestors(heads):
1039 pass
1039 pass
1040
1040
1041 timer(d)
1041 timer(d)
1042 fm.end()
1042 fm.end()
1043
1043
1044
1044
1045 @command(b'perf::ancestorset|perfancestorset', formatteropts)
1045 @command(b'perf::ancestorset|perfancestorset', formatteropts)
1046 def perfancestorset(ui, repo, revset, **opts):
1046 def perfancestorset(ui, repo, revset, **opts):
1047 opts = _byteskwargs(opts)
1047 opts = _byteskwargs(opts)
1048 timer, fm = gettimer(ui, opts)
1048 timer, fm = gettimer(ui, opts)
1049 revs = repo.revs(revset)
1049 revs = repo.revs(revset)
1050 heads = repo.changelog.headrevs()
1050 heads = repo.changelog.headrevs()
1051
1051
1052 def d():
1052 def d():
1053 s = repo.changelog.ancestors(heads)
1053 s = repo.changelog.ancestors(heads)
1054 for rev in revs:
1054 for rev in revs:
1055 rev in s
1055 rev in s
1056
1056
1057 timer(d)
1057 timer(d)
1058 fm.end()
1058 fm.end()
1059
1059
1060
1060
1061 @command(
1061 @command(
1062 b'perf::delta-find',
1062 b'perf::delta-find',
1063 revlogopts + formatteropts,
1063 revlogopts + formatteropts,
1064 b'-c|-m|FILE REV',
1064 b'-c|-m|FILE REV',
1065 )
1065 )
1066 def perf_delta_find(ui, repo, arg_1, arg_2=None, **opts):
1066 def perf_delta_find(ui, repo, arg_1, arg_2=None, **opts):
1067 """benchmark the process of finding a valid delta for a revlog revision
1067 """benchmark the process of finding a valid delta for a revlog revision
1068
1068
1069 When a revlog receives a new revision (e.g. from a commit, or from an
1069 When a revlog receives a new revision (e.g. from a commit, or from an
1070 incoming bundle), it searches for a suitable delta-base to produce a delta.
1070 incoming bundle), it searches for a suitable delta-base to produce a delta.
1071 This perf command measures how much time we spend in this process. It
1071 This perf command measures how much time we spend in this process. It
1072 operates on an already stored revision.
1072 operates on an already stored revision.
1073
1073
1074 See `hg help debug-delta-find` for another related command.
1074 See `hg help debug-delta-find` for another related command.
1075 """
1075 """
1076 from mercurial import revlogutils
1076 from mercurial import revlogutils
1077 import mercurial.revlogutils.deltas as deltautil
1077 import mercurial.revlogutils.deltas as deltautil
1078
1078
1079 opts = _byteskwargs(opts)
1079 opts = _byteskwargs(opts)
1080 if arg_2 is None:
1080 if arg_2 is None:
1081 file_ = None
1081 file_ = None
1082 rev = arg_1
1082 rev = arg_1
1083 else:
1083 else:
1084 file_ = arg_1
1084 file_ = arg_1
1085 rev = arg_2
1085 rev = arg_2
1086
1086
1087 repo = repo.unfiltered()
1087 repo = repo.unfiltered()
1088
1088
1089 timer, fm = gettimer(ui, opts)
1089 timer, fm = gettimer(ui, opts)
1090
1090
1091 rev = int(rev)
1091 rev = int(rev)
1092
1092
1093 revlog = cmdutil.openrevlog(repo, b'perf::delta-find', file_, opts)
1093 revlog = cmdutil.openrevlog(repo, b'perf::delta-find', file_, opts)
1094
1094
1095 deltacomputer = deltautil.deltacomputer(revlog)
1095 deltacomputer = deltautil.deltacomputer(revlog)
1096
1096
1097 node = revlog.node(rev)
1097 node = revlog.node(rev)
1098 p1r, p2r = revlog.parentrevs(rev)
1098 p1r, p2r = revlog.parentrevs(rev)
1099 p1 = revlog.node(p1r)
1099 p1 = revlog.node(p1r)
1100 p2 = revlog.node(p2r)
1100 p2 = revlog.node(p2r)
1101 full_text = revlog.revision(rev)
1101 full_text = revlog.revision(rev)
1102 textlen = len(full_text)
1102 textlen = len(full_text)
1103 cachedelta = None
1103 cachedelta = None
1104 flags = revlog.flags(rev)
1104 flags = revlog.flags(rev)
1105
1105
1106 revinfo = revlogutils.revisioninfo(
1106 revinfo = revlogutils.revisioninfo(
1107 node,
1107 node,
1108 p1,
1108 p1,
1109 p2,
1109 p2,
1110 [full_text], # btext
1110 [full_text], # btext
1111 textlen,
1111 textlen,
1112 cachedelta,
1112 cachedelta,
1113 flags,
1113 flags,
1114 )
1114 )
1115
1115
1116 # Note: we should probably purge the potential caches (like the full
1116 # Note: we should probably purge the potential caches (like the full
1117 # manifest cache) between runs.
1117 # manifest cache) between runs.
1118 def find_one():
1118 def find_one():
1119 with revlog._datafp() as fh:
1119 with revlog._datafp() as fh:
1120 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
1120 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
1121
1121
1122 timer(find_one)
1122 timer(find_one)
1123 fm.end()
1123 fm.end()
1124
1124
1125
1125
1126 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
1126 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
1127 def perfdiscovery(ui, repo, path, **opts):
1127 def perfdiscovery(ui, repo, path, **opts):
1128 """benchmark discovery between local repo and the peer at given path"""
1128 """benchmark discovery between local repo and the peer at given path"""
1129 repos = [repo, None]
1129 repos = [repo, None]
1130 timer, fm = gettimer(ui, opts)
1130 timer, fm = gettimer(ui, opts)
1131
1131
1132 try:
1132 try:
1133 from mercurial.utils.urlutil import get_unique_pull_path_obj
1133 from mercurial.utils.urlutil import get_unique_pull_path_obj
1134
1134
1135 path = get_unique_pull_path_obj(b'perfdiscovery', ui, path)
1135 path = get_unique_pull_path_obj(b'perfdiscovery', ui, path)
1136 except ImportError:
1136 except ImportError:
1137 try:
1137 try:
1138 from mercurial.utils.urlutil import get_unique_pull_path
1138 from mercurial.utils.urlutil import get_unique_pull_path
1139
1139
1140 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
1140 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
1141 except ImportError:
1141 except ImportError:
1142 path = ui.expandpath(path)
1142 path = ui.expandpath(path)
1143
1143
1144 def s():
1144 def s():
1145 repos[1] = hg.peer(ui, opts, path)
1145 repos[1] = hg.peer(ui, opts, path)
1146
1146
1147 def d():
1147 def d():
1148 setdiscovery.findcommonheads(ui, *repos)
1148 setdiscovery.findcommonheads(ui, *repos)
1149
1149
1150 timer(d, setup=s)
1150 timer(d, setup=s)
1151 fm.end()
1151 fm.end()
1152
1152
1153
1153
1154 @command(
1154 @command(
1155 b'perf::bookmarks|perfbookmarks',
1155 b'perf::bookmarks|perfbookmarks',
1156 formatteropts
1156 formatteropts
1157 + [
1157 + [
1158 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
1158 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
1159 ],
1159 ],
1160 )
1160 )
1161 def perfbookmarks(ui, repo, **opts):
1161 def perfbookmarks(ui, repo, **opts):
1162 """benchmark parsing bookmarks from disk to memory"""
1162 """benchmark parsing bookmarks from disk to memory"""
1163 opts = _byteskwargs(opts)
1163 opts = _byteskwargs(opts)
1164 timer, fm = gettimer(ui, opts)
1164 timer, fm = gettimer(ui, opts)
1165
1165
1166 clearrevlogs = opts[b'clear_revlogs']
1166 clearrevlogs = opts[b'clear_revlogs']
1167
1167
1168 def s():
1168 def s():
1169 if clearrevlogs:
1169 if clearrevlogs:
1170 clearchangelog(repo)
1170 clearchangelog(repo)
1171 clearfilecache(repo, b'_bookmarks')
1171 clearfilecache(repo, b'_bookmarks')
1172
1172
1173 def d():
1173 def d():
1174 repo._bookmarks
1174 repo._bookmarks
1175
1175
1176 timer(d, setup=s)
1176 timer(d, setup=s)
1177 fm.end()
1177 fm.end()
1178
1178
1179
1179
1180 @command(
1180 @command(
1181 b'perf::bundle',
1181 b'perf::bundle',
1182 [
1182 [
1183 (
1183 (
1184 b'r',
1184 b'r',
1185 b'rev',
1185 b'rev',
1186 [],
1186 [],
1187 b'changesets to bundle',
1187 b'changesets to bundle',
1188 b'REV',
1188 b'REV',
1189 ),
1189 ),
1190 (
1190 (
1191 b't',
1191 b't',
1192 b'type',
1192 b'type',
1193 b'none',
1193 b'none',
1194 b'bundlespec to use (see `hg help bundlespec`)',
1194 b'bundlespec to use (see `hg help bundlespec`)',
1195 b'TYPE',
1195 b'TYPE',
1196 ),
1196 ),
1197 ]
1197 ]
1198 + formatteropts,
1198 + formatteropts,
1199 b'REVS',
1199 b'REVS',
1200 )
1200 )
1201 def perfbundle(ui, repo, *revs, **opts):
1201 def perfbundle(ui, repo, *revs, **opts):
1202 """benchmark the creation of a bundle from a repository
1202 """benchmark the creation of a bundle from a repository
1203
1203
1204 For now, this only supports "none" compression.
1204 For now, this only supports "none" compression.
1205 """
1205 """
1206 try:
1206 try:
1207 from mercurial import bundlecaches
1207 from mercurial import bundlecaches
1208
1208
1209 parsebundlespec = bundlecaches.parsebundlespec
1209 parsebundlespec = bundlecaches.parsebundlespec
1210 except ImportError:
1210 except ImportError:
1211 from mercurial import exchange
1211 from mercurial import exchange
1212
1212
1213 parsebundlespec = exchange.parsebundlespec
1213 parsebundlespec = exchange.parsebundlespec
1214
1214
1215 from mercurial import discovery
1215 from mercurial import discovery
1216 from mercurial import bundle2
1216 from mercurial import bundle2
1217
1217
1218 opts = _byteskwargs(opts)
1218 opts = _byteskwargs(opts)
1219 timer, fm = gettimer(ui, opts)
1219 timer, fm = gettimer(ui, opts)
1220
1220
1221 cl = repo.changelog
1221 cl = repo.changelog
1222 revs = list(revs)
1222 revs = list(revs)
1223 revs.extend(opts.get(b'rev', ()))
1223 revs.extend(opts.get(b'rev', ()))
1224 revs = scmutil.revrange(repo, revs)
1224 revs = scmutil.revrange(repo, revs)
1225 if not revs:
1225 if not revs:
1226 raise error.Abort(b"not revision specified")
1226 raise error.Abort(b"not revision specified")
1227 # make it a consistent set (ie: without topological gaps)
1227 # make it a consistent set (ie: without topological gaps)
1228 old_len = len(revs)
1228 old_len = len(revs)
1229 revs = list(repo.revs(b"%ld::%ld", revs, revs))
1229 revs = list(repo.revs(b"%ld::%ld", revs, revs))
1230 if old_len != len(revs):
1230 if old_len != len(revs):
1231 new_count = len(revs) - old_len
1231 new_count = len(revs) - old_len
1232 msg = b"add %d new revisions to make it a consistent set\n"
1232 msg = b"add %d new revisions to make it a consistent set\n"
1233 ui.write_err(msg % new_count)
1233 ui.write_err(msg % new_count)
1234
1234
1235 targets = [cl.node(r) for r in repo.revs(b"heads(::%ld)", revs)]
1235 targets = [cl.node(r) for r in repo.revs(b"heads(::%ld)", revs)]
1236 bases = [cl.node(r) for r in repo.revs(b"heads(::%ld - %ld)", revs, revs)]
1236 bases = [cl.node(r) for r in repo.revs(b"heads(::%ld - %ld)", revs, revs)]
1237 outgoing = discovery.outgoing(repo, bases, targets)
1237 outgoing = discovery.outgoing(repo, bases, targets)
1238
1238
1239 bundle_spec = opts.get(b'type')
1239 bundle_spec = opts.get(b'type')
1240
1240
1241 bundle_spec = parsebundlespec(repo, bundle_spec, strict=False)
1241 bundle_spec = parsebundlespec(repo, bundle_spec, strict=False)
1242
1242
1243 cgversion = bundle_spec.params.get(b"cg.version")
1243 cgversion = bundle_spec.params.get(b"cg.version")
1244 if cgversion is None:
1244 if cgversion is None:
1245 if bundle_spec.version == b'v1':
1245 if bundle_spec.version == b'v1':
1246 cgversion = b'01'
1246 cgversion = b'01'
1247 if bundle_spec.version == b'v2':
1247 if bundle_spec.version == b'v2':
1248 cgversion = b'02'
1248 cgversion = b'02'
1249 if cgversion not in changegroup.supportedoutgoingversions(repo):
1249 if cgversion not in changegroup.supportedoutgoingversions(repo):
1250 err = b"repository does not support bundle version %s"
1250 err = b"repository does not support bundle version %s"
1251 raise error.Abort(err % cgversion)
1251 raise error.Abort(err % cgversion)
1252
1252
1253 if cgversion == b'01': # bundle1
1253 if cgversion == b'01': # bundle1
1254 bversion = b'HG10' + bundle_spec.wirecompression
1254 bversion = b'HG10' + bundle_spec.wirecompression
1255 bcompression = None
1255 bcompression = None
1256 elif cgversion in (b'02', b'03'):
1256 elif cgversion in (b'02', b'03'):
1257 bversion = b'HG20'
1257 bversion = b'HG20'
1258 bcompression = bundle_spec.wirecompression
1258 bcompression = bundle_spec.wirecompression
1259 else:
1259 else:
1260 err = b'perf::bundle: unexpected changegroup version %s'
1260 err = b'perf::bundle: unexpected changegroup version %s'
1261 raise error.ProgrammingError(err % cgversion)
1261 raise error.ProgrammingError(err % cgversion)
1262
1262
1263 if bcompression is None:
1263 if bcompression is None:
1264 bcompression = b'UN'
1264 bcompression = b'UN'
1265
1265
1266 if bcompression != b'UN':
1266 if bcompression != b'UN':
1267 err = b'perf::bundle: compression currently unsupported: %s'
1267 err = b'perf::bundle: compression currently unsupported: %s'
1268 raise error.ProgrammingError(err % bcompression)
1268 raise error.ProgrammingError(err % bcompression)
1269
1269
1270 def do_bundle():
1270 def do_bundle():
1271 bundle2.writenewbundle(
1271 bundle2.writenewbundle(
1272 ui,
1272 ui,
1273 repo,
1273 repo,
1274 b'perf::bundle',
1274 b'perf::bundle',
1275 os.devnull,
1275 os.devnull,
1276 bversion,
1276 bversion,
1277 outgoing,
1277 outgoing,
1278 bundle_spec.params,
1278 bundle_spec.params,
1279 )
1279 )
1280
1280
1281 timer(do_bundle)
1281 timer(do_bundle)
1282 fm.end()
1282 fm.end()
1283
1283
1284
1284
1285 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
1285 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
1286 def perfbundleread(ui, repo, bundlepath, **opts):
1286 def perfbundleread(ui, repo, bundlepath, **opts):
1287 """Benchmark reading of bundle files.
1287 """Benchmark reading of bundle files.
1288
1288
1289 This command is meant to isolate the I/O part of bundle reading as
1289 This command is meant to isolate the I/O part of bundle reading as
1290 much as possible.
1290 much as possible.
1291 """
1291 """
1292 from mercurial import (
1292 from mercurial import (
1293 bundle2,
1293 bundle2,
1294 exchange,
1294 exchange,
1295 streamclone,
1295 streamclone,
1296 )
1296 )
1297
1297
1298 opts = _byteskwargs(opts)
1298 opts = _byteskwargs(opts)
1299
1299
1300 def makebench(fn):
1300 def makebench(fn):
1301 def run():
1301 def run():
1302 with open(bundlepath, b'rb') as fh:
1302 with open(bundlepath, b'rb') as fh:
1303 bundle = exchange.readbundle(ui, fh, bundlepath)
1303 bundle = exchange.readbundle(ui, fh, bundlepath)
1304 fn(bundle)
1304 fn(bundle)
1305
1305
1306 return run
1306 return run
1307
1307
1308 def makereadnbytes(size):
1308 def makereadnbytes(size):
1309 def run():
1309 def run():
1310 with open(bundlepath, b'rb') as fh:
1310 with open(bundlepath, b'rb') as fh:
1311 bundle = exchange.readbundle(ui, fh, bundlepath)
1311 bundle = exchange.readbundle(ui, fh, bundlepath)
1312 while bundle.read(size):
1312 while bundle.read(size):
1313 pass
1313 pass
1314
1314
1315 return run
1315 return run
1316
1316
1317 def makestdioread(size):
1317 def makestdioread(size):
1318 def run():
1318 def run():
1319 with open(bundlepath, b'rb') as fh:
1319 with open(bundlepath, b'rb') as fh:
1320 while fh.read(size):
1320 while fh.read(size):
1321 pass
1321 pass
1322
1322
1323 return run
1323 return run
1324
1324
1325 # bundle1
1325 # bundle1
1326
1326
1327 def deltaiter(bundle):
1327 def deltaiter(bundle):
1328 for delta in bundle.deltaiter():
1328 for delta in bundle.deltaiter():
1329 pass
1329 pass
1330
1330
1331 def iterchunks(bundle):
1331 def iterchunks(bundle):
1332 for chunk in bundle.getchunks():
1332 for chunk in bundle.getchunks():
1333 pass
1333 pass
1334
1334
1335 # bundle2
1335 # bundle2
1336
1336
1337 def forwardchunks(bundle):
1337 def forwardchunks(bundle):
1338 for chunk in bundle._forwardchunks():
1338 for chunk in bundle._forwardchunks():
1339 pass
1339 pass
1340
1340
1341 def iterparts(bundle):
1341 def iterparts(bundle):
1342 for part in bundle.iterparts():
1342 for part in bundle.iterparts():
1343 pass
1343 pass
1344
1344
1345 def iterpartsseekable(bundle):
1345 def iterpartsseekable(bundle):
1346 for part in bundle.iterparts(seekable=True):
1346 for part in bundle.iterparts(seekable=True):
1347 pass
1347 pass
1348
1348
1349 def seek(bundle):
1349 def seek(bundle):
1350 for part in bundle.iterparts(seekable=True):
1350 for part in bundle.iterparts(seekable=True):
1351 part.seek(0, os.SEEK_END)
1351 part.seek(0, os.SEEK_END)
1352
1352
1353 def makepartreadnbytes(size):
1353 def makepartreadnbytes(size):
1354 def run():
1354 def run():
1355 with open(bundlepath, b'rb') as fh:
1355 with open(bundlepath, b'rb') as fh:
1356 bundle = exchange.readbundle(ui, fh, bundlepath)
1356 bundle = exchange.readbundle(ui, fh, bundlepath)
1357 for part in bundle.iterparts():
1357 for part in bundle.iterparts():
1358 while part.read(size):
1358 while part.read(size):
1359 pass
1359 pass
1360
1360
1361 return run
1361 return run
1362
1362
1363 benches = [
1363 benches = [
1364 (makestdioread(8192), b'read(8k)'),
1364 (makestdioread(8192), b'read(8k)'),
1365 (makestdioread(16384), b'read(16k)'),
1365 (makestdioread(16384), b'read(16k)'),
1366 (makestdioread(32768), b'read(32k)'),
1366 (makestdioread(32768), b'read(32k)'),
1367 (makestdioread(131072), b'read(128k)'),
1367 (makestdioread(131072), b'read(128k)'),
1368 ]
1368 ]
1369
1369
1370 with open(bundlepath, b'rb') as fh:
1370 with open(bundlepath, b'rb') as fh:
1371 bundle = exchange.readbundle(ui, fh, bundlepath)
1371 bundle = exchange.readbundle(ui, fh, bundlepath)
1372
1372
1373 if isinstance(bundle, changegroup.cg1unpacker):
1373 if isinstance(bundle, changegroup.cg1unpacker):
1374 benches.extend(
1374 benches.extend(
1375 [
1375 [
1376 (makebench(deltaiter), b'cg1 deltaiter()'),
1376 (makebench(deltaiter), b'cg1 deltaiter()'),
1377 (makebench(iterchunks), b'cg1 getchunks()'),
1377 (makebench(iterchunks), b'cg1 getchunks()'),
1378 (makereadnbytes(8192), b'cg1 read(8k)'),
1378 (makereadnbytes(8192), b'cg1 read(8k)'),
1379 (makereadnbytes(16384), b'cg1 read(16k)'),
1379 (makereadnbytes(16384), b'cg1 read(16k)'),
1380 (makereadnbytes(32768), b'cg1 read(32k)'),
1380 (makereadnbytes(32768), b'cg1 read(32k)'),
1381 (makereadnbytes(131072), b'cg1 read(128k)'),
1381 (makereadnbytes(131072), b'cg1 read(128k)'),
1382 ]
1382 ]
1383 )
1383 )
1384 elif isinstance(bundle, bundle2.unbundle20):
1384 elif isinstance(bundle, bundle2.unbundle20):
1385 benches.extend(
1385 benches.extend(
1386 [
1386 [
1387 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1387 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1388 (makebench(iterparts), b'bundle2 iterparts()'),
1388 (makebench(iterparts), b'bundle2 iterparts()'),
1389 (
1389 (
1390 makebench(iterpartsseekable),
1390 makebench(iterpartsseekable),
1391 b'bundle2 iterparts() seekable',
1391 b'bundle2 iterparts() seekable',
1392 ),
1392 ),
1393 (makebench(seek), b'bundle2 part seek()'),
1393 (makebench(seek), b'bundle2 part seek()'),
1394 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1394 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1395 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1395 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1396 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1396 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1397 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1397 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1398 ]
1398 ]
1399 )
1399 )
1400 elif isinstance(bundle, streamclone.streamcloneapplier):
1400 elif isinstance(bundle, streamclone.streamcloneapplier):
1401 raise error.Abort(b'stream clone bundles not supported')
1401 raise error.Abort(b'stream clone bundles not supported')
1402 else:
1402 else:
1403 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1403 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1404
1404
1405 for fn, title in benches:
1405 for fn, title in benches:
1406 timer, fm = gettimer(ui, opts)
1406 timer, fm = gettimer(ui, opts)
1407 timer(fn, title=title)
1407 timer(fn, title=title)
1408 fm.end()
1408 fm.end()
1409
1409
1410
1410
1411 @command(
1411 @command(
1412 b'perf::changegroupchangelog|perfchangegroupchangelog',
1412 b'perf::changegroupchangelog|perfchangegroupchangelog',
1413 formatteropts
1413 formatteropts
1414 + [
1414 + [
1415 (b'', b'cgversion', b'02', b'changegroup version'),
1415 (b'', b'cgversion', b'02', b'changegroup version'),
1416 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1416 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1417 ],
1417 ],
1418 )
1418 )
1419 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1419 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1420 """Benchmark producing a changelog group for a changegroup.
1420 """Benchmark producing a changelog group for a changegroup.
1421
1421
1422 This measures the time spent processing the changelog during a
1422 This measures the time spent processing the changelog during a
1423 bundle operation. This occurs during `hg bundle` and on a server
1423 bundle operation. This occurs during `hg bundle` and on a server
1424 processing a `getbundle` wire protocol request (handles clones
1424 processing a `getbundle` wire protocol request (handles clones
1425 and pull requests).
1425 and pull requests).
1426
1426
1427 By default, all revisions are added to the changegroup.
1427 By default, all revisions are added to the changegroup.
1428 """
1428 """
1429 opts = _byteskwargs(opts)
1429 opts = _byteskwargs(opts)
1430 cl = repo.changelog
1430 cl = repo.changelog
1431 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1431 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1432 bundler = changegroup.getbundler(cgversion, repo)
1432 bundler = changegroup.getbundler(cgversion, repo)
1433
1433
1434 def d():
1434 def d():
1435 state, chunks = bundler._generatechangelog(cl, nodes)
1435 state, chunks = bundler._generatechangelog(cl, nodes)
1436 for chunk in chunks:
1436 for chunk in chunks:
1437 pass
1437 pass
1438
1438
1439 timer, fm = gettimer(ui, opts)
1439 timer, fm = gettimer(ui, opts)
1440
1440
1441 # Terminal printing can interfere with timing. So disable it.
1441 # Terminal printing can interfere with timing. So disable it.
1442 with ui.configoverride({(b'progress', b'disable'): True}):
1442 with ui.configoverride({(b'progress', b'disable'): True}):
1443 timer(d)
1443 timer(d)
1444
1444
1445 fm.end()
1445 fm.end()
1446
1446
1447
1447
1448 @command(b'perf::dirs|perfdirs', formatteropts)
1448 @command(b'perf::dirs|perfdirs', formatteropts)
1449 def perfdirs(ui, repo, **opts):
1449 def perfdirs(ui, repo, **opts):
1450 opts = _byteskwargs(opts)
1450 opts = _byteskwargs(opts)
1451 timer, fm = gettimer(ui, opts)
1451 timer, fm = gettimer(ui, opts)
1452 dirstate = repo.dirstate
1452 dirstate = repo.dirstate
1453 b'a' in dirstate
1453 b'a' in dirstate
1454
1454
1455 def d():
1455 def d():
1456 dirstate.hasdir(b'a')
1456 dirstate.hasdir(b'a')
1457 try:
1457 try:
1458 del dirstate._map._dirs
1458 del dirstate._map._dirs
1459 except AttributeError:
1459 except AttributeError:
1460 pass
1460 pass
1461
1461
1462 timer(d)
1462 timer(d)
1463 fm.end()
1463 fm.end()
1464
1464
1465
1465
1466 @command(
1466 @command(
1467 b'perf::dirstate|perfdirstate',
1467 b'perf::dirstate|perfdirstate',
1468 [
1468 [
1469 (
1469 (
1470 b'',
1470 b'',
1471 b'iteration',
1471 b'iteration',
1472 None,
1472 None,
1473 b'benchmark a full iteration for the dirstate',
1473 b'benchmark a full iteration for the dirstate',
1474 ),
1474 ),
1475 (
1475 (
1476 b'',
1476 b'',
1477 b'contains',
1477 b'contains',
1478 None,
1478 None,
1479 b'benchmark a large amount of `nf in dirstate` calls',
1479 b'benchmark a large amount of `nf in dirstate` calls',
1480 ),
1480 ),
1481 ]
1481 ]
1482 + formatteropts,
1482 + formatteropts,
1483 )
1483 )
1484 def perfdirstate(ui, repo, **opts):
1484 def perfdirstate(ui, repo, **opts):
1485 """benchmap the time of various distate operations
1485 """benchmap the time of various distate operations
1486
1486
1487 By default benchmark the time necessary to load a dirstate from scratch.
1487 By default benchmark the time necessary to load a dirstate from scratch.
1488 The dirstate is loaded to the point were a "contains" request can be
1488 The dirstate is loaded to the point were a "contains" request can be
1489 answered.
1489 answered.
1490 """
1490 """
1491 opts = _byteskwargs(opts)
1491 opts = _byteskwargs(opts)
1492 timer, fm = gettimer(ui, opts)
1492 timer, fm = gettimer(ui, opts)
1493 b"a" in repo.dirstate
1493 b"a" in repo.dirstate
1494
1494
1495 if opts[b'iteration'] and opts[b'contains']:
1495 if opts[b'iteration'] and opts[b'contains']:
1496 msg = b'only specify one of --iteration or --contains'
1496 msg = b'only specify one of --iteration or --contains'
1497 raise error.Abort(msg)
1497 raise error.Abort(msg)
1498
1498
1499 if opts[b'iteration']:
1499 if opts[b'iteration']:
1500 setup = None
1500 setup = None
1501 dirstate = repo.dirstate
1501 dirstate = repo.dirstate
1502
1502
1503 def d():
1503 def d():
1504 for f in dirstate:
1504 for f in dirstate:
1505 pass
1505 pass
1506
1506
1507 elif opts[b'contains']:
1507 elif opts[b'contains']:
1508 setup = None
1508 setup = None
1509 dirstate = repo.dirstate
1509 dirstate = repo.dirstate
1510 allfiles = list(dirstate)
1510 allfiles = list(dirstate)
1511 # also add file path that will be "missing" from the dirstate
1511 # also add file path that will be "missing" from the dirstate
1512 allfiles.extend([f[::-1] for f in allfiles])
1512 allfiles.extend([f[::-1] for f in allfiles])
1513
1513
1514 def d():
1514 def d():
1515 for f in allfiles:
1515 for f in allfiles:
1516 f in dirstate
1516 f in dirstate
1517
1517
1518 else:
1518 else:
1519
1519
1520 def setup():
1520 def setup():
1521 repo.dirstate.invalidate()
1521 repo.dirstate.invalidate()
1522
1522
1523 def d():
1523 def d():
1524 b"a" in repo.dirstate
1524 b"a" in repo.dirstate
1525
1525
1526 timer(d, setup=setup)
1526 timer(d, setup=setup)
1527 fm.end()
1527 fm.end()
1528
1528
1529
1529
1530 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1530 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1531 def perfdirstatedirs(ui, repo, **opts):
1531 def perfdirstatedirs(ui, repo, **opts):
1532 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1532 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1533 opts = _byteskwargs(opts)
1533 opts = _byteskwargs(opts)
1534 timer, fm = gettimer(ui, opts)
1534 timer, fm = gettimer(ui, opts)
1535 repo.dirstate.hasdir(b"a")
1535 repo.dirstate.hasdir(b"a")
1536
1536
1537 def setup():
1537 def setup():
1538 try:
1538 try:
1539 del repo.dirstate._map._dirs
1539 del repo.dirstate._map._dirs
1540 except AttributeError:
1540 except AttributeError:
1541 pass
1541 pass
1542
1542
1543 def d():
1543 def d():
1544 repo.dirstate.hasdir(b"a")
1544 repo.dirstate.hasdir(b"a")
1545
1545
1546 timer(d, setup=setup)
1546 timer(d, setup=setup)
1547 fm.end()
1547 fm.end()
1548
1548
1549
1549
1550 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1550 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1551 def perfdirstatefoldmap(ui, repo, **opts):
1551 def perfdirstatefoldmap(ui, repo, **opts):
1552 """benchmap a `dirstate._map.filefoldmap.get()` request
1552 """benchmap a `dirstate._map.filefoldmap.get()` request
1553
1553
1554 The dirstate filefoldmap cache is dropped between every request.
1554 The dirstate filefoldmap cache is dropped between every request.
1555 """
1555 """
1556 opts = _byteskwargs(opts)
1556 opts = _byteskwargs(opts)
1557 timer, fm = gettimer(ui, opts)
1557 timer, fm = gettimer(ui, opts)
1558 dirstate = repo.dirstate
1558 dirstate = repo.dirstate
1559 dirstate._map.filefoldmap.get(b'a')
1559 dirstate._map.filefoldmap.get(b'a')
1560
1560
1561 def setup():
1561 def setup():
1562 del dirstate._map.filefoldmap
1562 del dirstate._map.filefoldmap
1563
1563
1564 def d():
1564 def d():
1565 dirstate._map.filefoldmap.get(b'a')
1565 dirstate._map.filefoldmap.get(b'a')
1566
1566
1567 timer(d, setup=setup)
1567 timer(d, setup=setup)
1568 fm.end()
1568 fm.end()
1569
1569
1570
1570
1571 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1571 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1572 def perfdirfoldmap(ui, repo, **opts):
1572 def perfdirfoldmap(ui, repo, **opts):
1573 """benchmap a `dirstate._map.dirfoldmap.get()` request
1573 """benchmap a `dirstate._map.dirfoldmap.get()` request
1574
1574
1575 The dirstate dirfoldmap cache is dropped between every request.
1575 The dirstate dirfoldmap cache is dropped between every request.
1576 """
1576 """
1577 opts = _byteskwargs(opts)
1577 opts = _byteskwargs(opts)
1578 timer, fm = gettimer(ui, opts)
1578 timer, fm = gettimer(ui, opts)
1579 dirstate = repo.dirstate
1579 dirstate = repo.dirstate
1580 dirstate._map.dirfoldmap.get(b'a')
1580 dirstate._map.dirfoldmap.get(b'a')
1581
1581
1582 def setup():
1582 def setup():
1583 del dirstate._map.dirfoldmap
1583 del dirstate._map.dirfoldmap
1584 try:
1584 try:
1585 del dirstate._map._dirs
1585 del dirstate._map._dirs
1586 except AttributeError:
1586 except AttributeError:
1587 pass
1587 pass
1588
1588
1589 def d():
1589 def d():
1590 dirstate._map.dirfoldmap.get(b'a')
1590 dirstate._map.dirfoldmap.get(b'a')
1591
1591
1592 timer(d, setup=setup)
1592 timer(d, setup=setup)
1593 fm.end()
1593 fm.end()
1594
1594
1595
1595
1596 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1596 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1597 def perfdirstatewrite(ui, repo, **opts):
1597 def perfdirstatewrite(ui, repo, **opts):
1598 """benchmap the time it take to write a dirstate on disk"""
1598 """benchmap the time it take to write a dirstate on disk"""
1599 opts = _byteskwargs(opts)
1599 opts = _byteskwargs(opts)
1600 timer, fm = gettimer(ui, opts)
1600 timer, fm = gettimer(ui, opts)
1601 ds = repo.dirstate
1601 ds = repo.dirstate
1602 b"a" in ds
1602 b"a" in ds
1603
1603
1604 def setup():
1604 def setup():
1605 ds._dirty = True
1605 ds._dirty = True
1606
1606
1607 def d():
1607 def d():
1608 ds.write(repo.currenttransaction())
1608 ds.write(repo.currenttransaction())
1609
1609
1610 with repo.wlock():
1610 with repo.wlock():
1611 timer(d, setup=setup)
1611 timer(d, setup=setup)
1612 fm.end()
1612 fm.end()
1613
1613
1614
1614
1615 def _getmergerevs(repo, opts):
1615 def _getmergerevs(repo, opts):
1616 """parse command argument to return rev involved in merge
1616 """parse command argument to return rev involved in merge
1617
1617
1618 input: options dictionnary with `rev`, `from` and `bse`
1618 input: options dictionnary with `rev`, `from` and `bse`
1619 output: (localctx, otherctx, basectx)
1619 output: (localctx, otherctx, basectx)
1620 """
1620 """
1621 if opts[b'from']:
1621 if opts[b'from']:
1622 fromrev = scmutil.revsingle(repo, opts[b'from'])
1622 fromrev = scmutil.revsingle(repo, opts[b'from'])
1623 wctx = repo[fromrev]
1623 wctx = repo[fromrev]
1624 else:
1624 else:
1625 wctx = repo[None]
1625 wctx = repo[None]
1626 # we don't want working dir files to be stat'd in the benchmark, so
1626 # we don't want working dir files to be stat'd in the benchmark, so
1627 # prime that cache
1627 # prime that cache
1628 wctx.dirty()
1628 wctx.dirty()
1629 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1629 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1630 if opts[b'base']:
1630 if opts[b'base']:
1631 fromrev = scmutil.revsingle(repo, opts[b'base'])
1631 fromrev = scmutil.revsingle(repo, opts[b'base'])
1632 ancestor = repo[fromrev]
1632 ancestor = repo[fromrev]
1633 else:
1633 else:
1634 ancestor = wctx.ancestor(rctx)
1634 ancestor = wctx.ancestor(rctx)
1635 return (wctx, rctx, ancestor)
1635 return (wctx, rctx, ancestor)
1636
1636
1637
1637
1638 @command(
1638 @command(
1639 b'perf::mergecalculate|perfmergecalculate',
1639 b'perf::mergecalculate|perfmergecalculate',
1640 [
1640 [
1641 (b'r', b'rev', b'.', b'rev to merge against'),
1641 (b'r', b'rev', b'.', b'rev to merge against'),
1642 (b'', b'from', b'', b'rev to merge from'),
1642 (b'', b'from', b'', b'rev to merge from'),
1643 (b'', b'base', b'', b'the revision to use as base'),
1643 (b'', b'base', b'', b'the revision to use as base'),
1644 ]
1644 ]
1645 + formatteropts,
1645 + formatteropts,
1646 )
1646 )
1647 def perfmergecalculate(ui, repo, **opts):
1647 def perfmergecalculate(ui, repo, **opts):
1648 opts = _byteskwargs(opts)
1648 opts = _byteskwargs(opts)
1649 timer, fm = gettimer(ui, opts)
1649 timer, fm = gettimer(ui, opts)
1650
1650
1651 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1651 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1652
1652
1653 def d():
1653 def d():
1654 # acceptremote is True because we don't want prompts in the middle of
1654 # acceptremote is True because we don't want prompts in the middle of
1655 # our benchmark
1655 # our benchmark
1656 merge.calculateupdates(
1656 merge.calculateupdates(
1657 repo,
1657 repo,
1658 wctx,
1658 wctx,
1659 rctx,
1659 rctx,
1660 [ancestor],
1660 [ancestor],
1661 branchmerge=False,
1661 branchmerge=False,
1662 force=False,
1662 force=False,
1663 acceptremote=True,
1663 acceptremote=True,
1664 followcopies=True,
1664 followcopies=True,
1665 )
1665 )
1666
1666
1667 timer(d)
1667 timer(d)
1668 fm.end()
1668 fm.end()
1669
1669
1670
1670
1671 @command(
1671 @command(
1672 b'perf::mergecopies|perfmergecopies',
1672 b'perf::mergecopies|perfmergecopies',
1673 [
1673 [
1674 (b'r', b'rev', b'.', b'rev to merge against'),
1674 (b'r', b'rev', b'.', b'rev to merge against'),
1675 (b'', b'from', b'', b'rev to merge from'),
1675 (b'', b'from', b'', b'rev to merge from'),
1676 (b'', b'base', b'', b'the revision to use as base'),
1676 (b'', b'base', b'', b'the revision to use as base'),
1677 ]
1677 ]
1678 + formatteropts,
1678 + formatteropts,
1679 )
1679 )
1680 def perfmergecopies(ui, repo, **opts):
1680 def perfmergecopies(ui, repo, **opts):
1681 """measure runtime of `copies.mergecopies`"""
1681 """measure runtime of `copies.mergecopies`"""
1682 opts = _byteskwargs(opts)
1682 opts = _byteskwargs(opts)
1683 timer, fm = gettimer(ui, opts)
1683 timer, fm = gettimer(ui, opts)
1684 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1684 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1685
1685
1686 def d():
1686 def d():
1687 # acceptremote is True because we don't want prompts in the middle of
1687 # acceptremote is True because we don't want prompts in the middle of
1688 # our benchmark
1688 # our benchmark
1689 copies.mergecopies(repo, wctx, rctx, ancestor)
1689 copies.mergecopies(repo, wctx, rctx, ancestor)
1690
1690
1691 timer(d)
1691 timer(d)
1692 fm.end()
1692 fm.end()
1693
1693
1694
1694
1695 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1695 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1696 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1696 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1697 """benchmark the copy tracing logic"""
1697 """benchmark the copy tracing logic"""
1698 opts = _byteskwargs(opts)
1698 opts = _byteskwargs(opts)
1699 timer, fm = gettimer(ui, opts)
1699 timer, fm = gettimer(ui, opts)
1700 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1700 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1701 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1701 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1702
1702
1703 def d():
1703 def d():
1704 copies.pathcopies(ctx1, ctx2)
1704 copies.pathcopies(ctx1, ctx2)
1705
1705
1706 timer(d)
1706 timer(d)
1707 fm.end()
1707 fm.end()
1708
1708
1709
1709
1710 @command(
1710 @command(
1711 b'perf::phases|perfphases',
1711 b'perf::phases|perfphases',
1712 [
1712 [
1713 (b'', b'full', False, b'include file reading time too'),
1713 (b'', b'full', False, b'include file reading time too'),
1714 ],
1714 ],
1715 b"",
1715 b"",
1716 )
1716 )
1717 def perfphases(ui, repo, **opts):
1717 def perfphases(ui, repo, **opts):
1718 """benchmark phasesets computation"""
1718 """benchmark phasesets computation"""
1719 opts = _byteskwargs(opts)
1719 opts = _byteskwargs(opts)
1720 timer, fm = gettimer(ui, opts)
1720 timer, fm = gettimer(ui, opts)
1721 _phases = repo._phasecache
1721 _phases = repo._phasecache
1722 full = opts.get(b'full')
1722 full = opts.get(b'full')
1723
1723
1724 def d():
1724 def d():
1725 phases = _phases
1725 phases = _phases
1726 if full:
1726 if full:
1727 clearfilecache(repo, b'_phasecache')
1727 clearfilecache(repo, b'_phasecache')
1728 phases = repo._phasecache
1728 phases = repo._phasecache
1729 phases.invalidate()
1729 phases.invalidate()
1730 phases.loadphaserevs(repo)
1730 phases.loadphaserevs(repo)
1731
1731
1732 timer(d)
1732 timer(d)
1733 fm.end()
1733 fm.end()
1734
1734
1735
1735
1736 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1736 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1737 def perfphasesremote(ui, repo, dest=None, **opts):
1737 def perfphasesremote(ui, repo, dest=None, **opts):
1738 """benchmark time needed to analyse phases of the remote server"""
1738 """benchmark time needed to analyse phases of the remote server"""
1739 from mercurial.node import bin
1739 from mercurial.node import bin
1740 from mercurial import (
1740 from mercurial import (
1741 exchange,
1741 exchange,
1742 hg,
1742 hg,
1743 phases,
1743 phases,
1744 )
1744 )
1745
1745
1746 opts = _byteskwargs(opts)
1746 opts = _byteskwargs(opts)
1747 timer, fm = gettimer(ui, opts)
1747 timer, fm = gettimer(ui, opts)
1748
1748
1749 path = ui.getpath(dest, default=(b'default-push', b'default'))
1749 path = ui.getpath(dest, default=(b'default-push', b'default'))
1750 if not path:
1750 if not path:
1751 raise error.Abort(
1751 raise error.Abort(
1752 b'default repository not configured!',
1752 b'default repository not configured!',
1753 hint=b"see 'hg help config.paths'",
1753 hint=b"see 'hg help config.paths'",
1754 )
1754 )
1755 if util.safehasattr(path, 'main_path'):
1755 if util.safehasattr(path, 'main_path'):
1756 path = path.get_push_variant()
1756 path = path.get_push_variant()
1757 dest = path.loc
1757 dest = path.loc
1758 else:
1758 else:
1759 dest = path.pushloc or path.loc
1759 dest = path.pushloc or path.loc
1760 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1760 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1761 other = hg.peer(repo, opts, dest)
1761 other = hg.peer(repo, opts, dest)
1762
1762
1763 # easier to perform discovery through the operation
1763 # easier to perform discovery through the operation
1764 op = exchange.pushoperation(repo, other)
1764 op = exchange.pushoperation(repo, other)
1765 exchange._pushdiscoverychangeset(op)
1765 exchange._pushdiscoverychangeset(op)
1766
1766
1767 remotesubset = op.fallbackheads
1767 remotesubset = op.fallbackheads
1768
1768
1769 with other.commandexecutor() as e:
1769 with other.commandexecutor() as e:
1770 remotephases = e.callcommand(
1770 remotephases = e.callcommand(
1771 b'listkeys', {b'namespace': b'phases'}
1771 b'listkeys', {b'namespace': b'phases'}
1772 ).result()
1772 ).result()
1773 del other
1773 del other
1774 publishing = remotephases.get(b'publishing', False)
1774 publishing = remotephases.get(b'publishing', False)
1775 if publishing:
1775 if publishing:
1776 ui.statusnoi18n(b'publishing: yes\n')
1776 ui.statusnoi18n(b'publishing: yes\n')
1777 else:
1777 else:
1778 ui.statusnoi18n(b'publishing: no\n')
1778 ui.statusnoi18n(b'publishing: no\n')
1779
1779
1780 has_node = getattr(repo.changelog.index, 'has_node', None)
1780 has_node = getattr(repo.changelog.index, 'has_node', None)
1781 if has_node is None:
1781 if has_node is None:
1782 has_node = repo.changelog.nodemap.__contains__
1782 has_node = repo.changelog.nodemap.__contains__
1783 nonpublishroots = 0
1783 nonpublishroots = 0
1784 for nhex, phase in remotephases.iteritems():
1784 for nhex, phase in remotephases.iteritems():
1785 if nhex == b'publishing': # ignore data related to publish option
1785 if nhex == b'publishing': # ignore data related to publish option
1786 continue
1786 continue
1787 node = bin(nhex)
1787 node = bin(nhex)
1788 if has_node(node) and int(phase):
1788 if has_node(node) and int(phase):
1789 nonpublishroots += 1
1789 nonpublishroots += 1
1790 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1790 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1791 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1791 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1792
1792
1793 def d():
1793 def d():
1794 phases.remotephasessummary(repo, remotesubset, remotephases)
1794 phases.remotephasessummary(repo, remotesubset, remotephases)
1795
1795
1796 timer(d)
1796 timer(d)
1797 fm.end()
1797 fm.end()
1798
1798
1799
1799
1800 @command(
1800 @command(
1801 b'perf::manifest|perfmanifest',
1801 b'perf::manifest|perfmanifest',
1802 [
1802 [
1803 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1803 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1804 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1804 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1805 ]
1805 ]
1806 + formatteropts,
1806 + formatteropts,
1807 b'REV|NODE',
1807 b'REV|NODE',
1808 )
1808 )
1809 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1809 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1810 """benchmark the time to read a manifest from disk and return a usable
1810 """benchmark the time to read a manifest from disk and return a usable
1811 dict-like object
1811 dict-like object
1812
1812
1813 Manifest caches are cleared before retrieval."""
1813 Manifest caches are cleared before retrieval."""
1814 opts = _byteskwargs(opts)
1814 opts = _byteskwargs(opts)
1815 timer, fm = gettimer(ui, opts)
1815 timer, fm = gettimer(ui, opts)
1816 if not manifest_rev:
1816 if not manifest_rev:
1817 ctx = scmutil.revsingle(repo, rev, rev)
1817 ctx = scmutil.revsingle(repo, rev, rev)
1818 t = ctx.manifestnode()
1818 t = ctx.manifestnode()
1819 else:
1819 else:
1820 from mercurial.node import bin
1820 from mercurial.node import bin
1821
1821
1822 if len(rev) == 40:
1822 if len(rev) == 40:
1823 t = bin(rev)
1823 t = bin(rev)
1824 else:
1824 else:
1825 try:
1825 try:
1826 rev = int(rev)
1826 rev = int(rev)
1827
1827
1828 if util.safehasattr(repo.manifestlog, b'getstorage'):
1828 if util.safehasattr(repo.manifestlog, b'getstorage'):
1829 t = repo.manifestlog.getstorage(b'').node(rev)
1829 t = repo.manifestlog.getstorage(b'').node(rev)
1830 else:
1830 else:
1831 t = repo.manifestlog._revlog.lookup(rev)
1831 t = repo.manifestlog._revlog.lookup(rev)
1832 except ValueError:
1832 except ValueError:
1833 raise error.Abort(
1833 raise error.Abort(
1834 b'manifest revision must be integer or full node'
1834 b'manifest revision must be integer or full node'
1835 )
1835 )
1836
1836
1837 def d():
1837 def d():
1838 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1838 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1839 repo.manifestlog[t].read()
1839 repo.manifestlog[t].read()
1840
1840
1841 timer(d)
1841 timer(d)
1842 fm.end()
1842 fm.end()
1843
1843
1844
1844
1845 @command(b'perf::changeset|perfchangeset', formatteropts)
1845 @command(b'perf::changeset|perfchangeset', formatteropts)
1846 def perfchangeset(ui, repo, rev, **opts):
1846 def perfchangeset(ui, repo, rev, **opts):
1847 opts = _byteskwargs(opts)
1847 opts = _byteskwargs(opts)
1848 timer, fm = gettimer(ui, opts)
1848 timer, fm = gettimer(ui, opts)
1849 n = scmutil.revsingle(repo, rev).node()
1849 n = scmutil.revsingle(repo, rev).node()
1850
1850
1851 def d():
1851 def d():
1852 repo.changelog.read(n)
1852 repo.changelog.read(n)
1853 # repo.changelog._cache = None
1853 # repo.changelog._cache = None
1854
1854
1855 timer(d)
1855 timer(d)
1856 fm.end()
1856 fm.end()
1857
1857
1858
1858
1859 @command(b'perf::ignore|perfignore', formatteropts)
1859 @command(b'perf::ignore|perfignore', formatteropts)
1860 def perfignore(ui, repo, **opts):
1860 def perfignore(ui, repo, **opts):
1861 """benchmark operation related to computing ignore"""
1861 """benchmark operation related to computing ignore"""
1862 opts = _byteskwargs(opts)
1862 opts = _byteskwargs(opts)
1863 timer, fm = gettimer(ui, opts)
1863 timer, fm = gettimer(ui, opts)
1864 dirstate = repo.dirstate
1864 dirstate = repo.dirstate
1865
1865
1866 def setupone():
1866 def setupone():
1867 dirstate.invalidate()
1867 dirstate.invalidate()
1868 clearfilecache(dirstate, b'_ignore')
1868 clearfilecache(dirstate, b'_ignore')
1869
1869
1870 def runone():
1870 def runone():
1871 dirstate._ignore
1871 dirstate._ignore
1872
1872
1873 timer(runone, setup=setupone, title=b"load")
1873 timer(runone, setup=setupone, title=b"load")
1874 fm.end()
1874 fm.end()
1875
1875
1876
1876
1877 @command(
1877 @command(
1878 b'perf::index|perfindex',
1878 b'perf::index|perfindex',
1879 [
1879 [
1880 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1880 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1881 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1881 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1882 ]
1882 ]
1883 + formatteropts,
1883 + formatteropts,
1884 )
1884 )
1885 def perfindex(ui, repo, **opts):
1885 def perfindex(ui, repo, **opts):
1886 """benchmark index creation time followed by a lookup
1886 """benchmark index creation time followed by a lookup
1887
1887
1888 The default is to look `tip` up. Depending on the index implementation,
1888 The default is to look `tip` up. Depending on the index implementation,
1889 the revision looked up can matters. For example, an implementation
1889 the revision looked up can matters. For example, an implementation
1890 scanning the index will have a faster lookup time for `--rev tip` than for
1890 scanning the index will have a faster lookup time for `--rev tip` than for
1891 `--rev 0`. The number of looked up revisions and their order can also
1891 `--rev 0`. The number of looked up revisions and their order can also
1892 matters.
1892 matters.
1893
1893
1894 Example of useful set to test:
1894 Example of useful set to test:
1895
1895
1896 * tip
1896 * tip
1897 * 0
1897 * 0
1898 * -10:
1898 * -10:
1899 * :10
1899 * :10
1900 * -10: + :10
1900 * -10: + :10
1901 * :10: + -10:
1901 * :10: + -10:
1902 * -10000:
1902 * -10000:
1903 * -10000: + 0
1903 * -10000: + 0
1904
1904
1905 It is not currently possible to check for lookup of a missing node. For
1905 It is not currently possible to check for lookup of a missing node. For
1906 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1906 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1907 import mercurial.revlog
1907 import mercurial.revlog
1908
1908
1909 opts = _byteskwargs(opts)
1909 opts = _byteskwargs(opts)
1910 timer, fm = gettimer(ui, opts)
1910 timer, fm = gettimer(ui, opts)
1911 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1911 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1912 if opts[b'no_lookup']:
1912 if opts[b'no_lookup']:
1913 if opts['rev']:
1913 if opts['rev']:
1914 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1914 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1915 nodes = []
1915 nodes = []
1916 elif not opts[b'rev']:
1916 elif not opts[b'rev']:
1917 nodes = [repo[b"tip"].node()]
1917 nodes = [repo[b"tip"].node()]
1918 else:
1918 else:
1919 revs = scmutil.revrange(repo, opts[b'rev'])
1919 revs = scmutil.revrange(repo, opts[b'rev'])
1920 cl = repo.changelog
1920 cl = repo.changelog
1921 nodes = [cl.node(r) for r in revs]
1921 nodes = [cl.node(r) for r in revs]
1922
1922
1923 unfi = repo.unfiltered()
1923 unfi = repo.unfiltered()
1924 # find the filecache func directly
1924 # find the filecache func directly
1925 # This avoid polluting the benchmark with the filecache logic
1925 # This avoid polluting the benchmark with the filecache logic
1926 makecl = unfi.__class__.changelog.func
1926 makecl = unfi.__class__.changelog.func
1927
1927
1928 def setup():
1928 def setup():
1929 # probably not necessary, but for good measure
1929 # probably not necessary, but for good measure
1930 clearchangelog(unfi)
1930 clearchangelog(unfi)
1931
1931
1932 def d():
1932 def d():
1933 cl = makecl(unfi)
1933 cl = makecl(unfi)
1934 for n in nodes:
1934 for n in nodes:
1935 cl.rev(n)
1935 cl.rev(n)
1936
1936
1937 timer(d, setup=setup)
1937 timer(d, setup=setup)
1938 fm.end()
1938 fm.end()
1939
1939
1940
1940
1941 @command(
1941 @command(
1942 b'perf::nodemap|perfnodemap',
1942 b'perf::nodemap|perfnodemap',
1943 [
1943 [
1944 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1944 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1945 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1945 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1946 ]
1946 ]
1947 + formatteropts,
1947 + formatteropts,
1948 )
1948 )
1949 def perfnodemap(ui, repo, **opts):
1949 def perfnodemap(ui, repo, **opts):
1950 """benchmark the time necessary to look up revision from a cold nodemap
1950 """benchmark the time necessary to look up revision from a cold nodemap
1951
1951
1952 Depending on the implementation, the amount and order of revision we look
1952 Depending on the implementation, the amount and order of revision we look
1953 up can varies. Example of useful set to test:
1953 up can varies. Example of useful set to test:
1954 * tip
1954 * tip
1955 * 0
1955 * 0
1956 * -10:
1956 * -10:
1957 * :10
1957 * :10
1958 * -10: + :10
1958 * -10: + :10
1959 * :10: + -10:
1959 * :10: + -10:
1960 * -10000:
1960 * -10000:
1961 * -10000: + 0
1961 * -10000: + 0
1962
1962
1963 The command currently focus on valid binary lookup. Benchmarking for
1963 The command currently focus on valid binary lookup. Benchmarking for
1964 hexlookup, prefix lookup and missing lookup would also be valuable.
1964 hexlookup, prefix lookup and missing lookup would also be valuable.
1965 """
1965 """
1966 import mercurial.revlog
1966 import mercurial.revlog
1967
1967
1968 opts = _byteskwargs(opts)
1968 opts = _byteskwargs(opts)
1969 timer, fm = gettimer(ui, opts)
1969 timer, fm = gettimer(ui, opts)
1970 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1970 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1971
1971
1972 unfi = repo.unfiltered()
1972 unfi = repo.unfiltered()
1973 clearcaches = opts[b'clear_caches']
1973 clearcaches = opts[b'clear_caches']
1974 # find the filecache func directly
1974 # find the filecache func directly
1975 # This avoid polluting the benchmark with the filecache logic
1975 # This avoid polluting the benchmark with the filecache logic
1976 makecl = unfi.__class__.changelog.func
1976 makecl = unfi.__class__.changelog.func
1977 if not opts[b'rev']:
1977 if not opts[b'rev']:
1978 raise error.Abort(b'use --rev to specify revisions to look up')
1978 raise error.Abort(b'use --rev to specify revisions to look up')
1979 revs = scmutil.revrange(repo, opts[b'rev'])
1979 revs = scmutil.revrange(repo, opts[b'rev'])
1980 cl = repo.changelog
1980 cl = repo.changelog
1981 nodes = [cl.node(r) for r in revs]
1981 nodes = [cl.node(r) for r in revs]
1982
1982
1983 # use a list to pass reference to a nodemap from one closure to the next
1983 # use a list to pass reference to a nodemap from one closure to the next
1984 nodeget = [None]
1984 nodeget = [None]
1985
1985
1986 def setnodeget():
1986 def setnodeget():
1987 # probably not necessary, but for good measure
1987 # probably not necessary, but for good measure
1988 clearchangelog(unfi)
1988 clearchangelog(unfi)
1989 cl = makecl(unfi)
1989 cl = makecl(unfi)
1990 if util.safehasattr(cl.index, 'get_rev'):
1990 if util.safehasattr(cl.index, 'get_rev'):
1991 nodeget[0] = cl.index.get_rev
1991 nodeget[0] = cl.index.get_rev
1992 else:
1992 else:
1993 nodeget[0] = cl.nodemap.get
1993 nodeget[0] = cl.nodemap.get
1994
1994
1995 def d():
1995 def d():
1996 get = nodeget[0]
1996 get = nodeget[0]
1997 for n in nodes:
1997 for n in nodes:
1998 get(n)
1998 get(n)
1999
1999
2000 setup = None
2000 setup = None
2001 if clearcaches:
2001 if clearcaches:
2002
2002
2003 def setup():
2003 def setup():
2004 setnodeget()
2004 setnodeget()
2005
2005
2006 else:
2006 else:
2007 setnodeget()
2007 setnodeget()
2008 d() # prewarm the data structure
2008 d() # prewarm the data structure
2009 timer(d, setup=setup)
2009 timer(d, setup=setup)
2010 fm.end()
2010 fm.end()
2011
2011
2012
2012
2013 @command(b'perf::startup|perfstartup', formatteropts)
2013 @command(b'perf::startup|perfstartup', formatteropts)
2014 def perfstartup(ui, repo, **opts):
2014 def perfstartup(ui, repo, **opts):
2015 opts = _byteskwargs(opts)
2015 opts = _byteskwargs(opts)
2016 timer, fm = gettimer(ui, opts)
2016 timer, fm = gettimer(ui, opts)
2017
2017
2018 def d():
2018 def d():
2019 if os.name != 'nt':
2019 if os.name != 'nt':
2020 os.system(
2020 os.system(
2021 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
2021 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
2022 )
2022 )
2023 else:
2023 else:
2024 os.environ['HGRCPATH'] = r' '
2024 os.environ['HGRCPATH'] = r' '
2025 os.system("%s version -q > NUL" % sys.argv[0])
2025 os.system("%s version -q > NUL" % sys.argv[0])
2026
2026
2027 timer(d)
2027 timer(d)
2028 fm.end()
2028 fm.end()
2029
2029
2030
2030
2031 def _find_stream_generator(version):
2031 def _find_stream_generator(version):
2032 """find the proper generator function for this stream version"""
2032 """find the proper generator function for this stream version"""
2033 import mercurial.streamclone
2033 import mercurial.streamclone
2034
2034
2035 available = {}
2035 available = {}
2036
2036
2037 # try to fetch a v1 generator
2037 # try to fetch a v1 generator
2038 generatev1 = getattr(mercurial.streamclone, "generatev1", None)
2038 generatev1 = getattr(mercurial.streamclone, "generatev1", None)
2039 if generatev1 is not None:
2039 if generatev1 is not None:
2040
2040
2041 def generate(repo):
2041 def generate(repo):
2042 entries, bytes, data = generatev2(repo, None, None, True)
2042 entries, bytes, data = generatev2(repo, None, None, True)
2043 return data
2043 return data
2044
2044
2045 available[b'v1'] = generatev1
2045 available[b'v1'] = generatev1
2046 # try to fetch a v2 generator
2046 # try to fetch a v2 generator
2047 generatev2 = getattr(mercurial.streamclone, "generatev2", None)
2047 generatev2 = getattr(mercurial.streamclone, "generatev2", None)
2048 if generatev2 is not None:
2048 if generatev2 is not None:
2049
2049
2050 def generate(repo):
2050 def generate(repo):
2051 entries, bytes, data = generatev2(repo, None, None, True)
2051 entries, bytes, data = generatev2(repo, None, None, True)
2052 return data
2052 return data
2053
2053
2054 available[b'v2'] = generate
2054 available[b'v2'] = generate
2055 # try to fetch a v3 generator
2055 # try to fetch a v3 generator
2056 generatev3 = getattr(mercurial.streamclone, "generatev3", None)
2056 generatev3 = getattr(mercurial.streamclone, "generatev3", None)
2057 if generatev3 is not None:
2057 if generatev3 is not None:
2058
2058
2059 def generate(repo):
2059 def generate(repo):
2060 entries, bytes, data = generatev3(repo, None, None, True)
2060 entries, bytes, data = generatev3(repo, None, None, True)
2061 return data
2061 return data
2062
2062
2063 available[b'v3-exp'] = generate
2063 available[b'v3-exp'] = generate
2064
2064
2065 # resolve the request
2065 # resolve the request
2066 if version == b"latest":
2066 if version == b"latest":
2067 # latest is the highest non experimental version
2067 # latest is the highest non experimental version
2068 latest_key = max(v for v in available if b'-exp' not in v)
2068 latest_key = max(v for v in available if b'-exp' not in v)
2069 return available[latest_key]
2069 return available[latest_key]
2070 elif version in available:
2070 elif version in available:
2071 return available[version]
2071 return available[version]
2072 else:
2072 else:
2073 msg = b"unkown or unavailable version: %s"
2073 msg = b"unkown or unavailable version: %s"
2074 msg %= version
2074 msg %= version
2075 hint = b"available versions: %s"
2075 hint = b"available versions: %s"
2076 hint %= b', '.join(sorted(available))
2076 hint %= b', '.join(sorted(available))
2077 raise error.Abort(msg, hint=hint)
2077 raise error.Abort(msg, hint=hint)
2078
2078
2079
2079
2080 @command(
2080 @command(
2081 b'perf::stream-locked-section',
2081 b'perf::stream-locked-section',
2082 [
2082 [
2083 (
2083 (
2084 b'',
2084 b'',
2085 b'stream-version',
2085 b'stream-version',
2086 b'latest',
2086 b'latest',
2087 b'stream version to use ("v1", "v2", "v3" or "latest", (the default))',
2087 b'stream version to use ("v1", "v2", "v3" or "latest", (the default))',
2088 ),
2088 ),
2089 ]
2089 ]
2090 + formatteropts,
2090 + formatteropts,
2091 )
2091 )
2092 def perf_stream_clone_scan(ui, repo, stream_version, **opts):
2092 def perf_stream_clone_scan(ui, repo, stream_version, **opts):
2093 """benchmark the initial, repo-locked, section of a stream-clone"""
2093 """benchmark the initial, repo-locked, section of a stream-clone"""
2094
2094
2095 opts = _byteskwargs(opts)
2095 opts = _byteskwargs(opts)
2096 timer, fm = gettimer(ui, opts)
2096 timer, fm = gettimer(ui, opts)
2097
2097
2098 # deletion of the generator may trigger some cleanup that we do not want to
2098 # deletion of the generator may trigger some cleanup that we do not want to
2099 # measure
2099 # measure
2100 result_holder = [None]
2100 result_holder = [None]
2101
2101
2102 def setupone():
2102 def setupone():
2103 result_holder[0] = None
2103 result_holder[0] = None
2104
2104
2105 generate = _find_stream_generator(stream_version)
2105 generate = _find_stream_generator(stream_version)
2106
2106
2107 def runone():
2107 def runone():
2108 # the lock is held for the duration the initialisation
2108 # the lock is held for the duration the initialisation
2109 result_holder[0] = generate(repo)
2109 result_holder[0] = generate(repo)
2110
2110
2111 timer(runone, setup=setupone, title=b"load")
2111 timer(runone, setup=setupone, title=b"load")
2112 fm.end()
2112 fm.end()
2113
2113
2114
2114
2115 @command(
2115 @command(
2116 b'perf::stream-generate',
2116 b'perf::stream-generate',
2117 [
2117 [
2118 (
2118 (
2119 b'',
2119 b'',
2120 b'stream-version',
2120 b'stream-version',
2121 b'latest',
2121 b'latest',
2122 b'stream version to us ("v1", "v2" or "latest", (the default))',
2122 b'stream version to us ("v1", "v2" or "latest", (the default))',
2123 ),
2123 ),
2124 ]
2124 ]
2125 + formatteropts,
2125 + formatteropts,
2126 )
2126 )
2127 def perf_stream_clone_generate(ui, repo, stream_version, **opts):
2127 def perf_stream_clone_generate(ui, repo, stream_version, **opts):
2128 """benchmark the full generation of a stream clone"""
2128 """benchmark the full generation of a stream clone"""
2129
2129
2130 opts = _byteskwargs(opts)
2130 opts = _byteskwargs(opts)
2131 timer, fm = gettimer(ui, opts)
2131 timer, fm = gettimer(ui, opts)
2132
2132
2133 # deletion of the generator may trigger some cleanup that we do not want to
2133 # deletion of the generator may trigger some cleanup that we do not want to
2134 # measure
2134 # measure
2135
2135
2136 generate = _find_stream_generator(stream_version)
2136 generate = _find_stream_generator(stream_version)
2137
2137
2138 def runone():
2138 def runone():
2139 # the lock is held for the duration the initialisation
2139 # the lock is held for the duration the initialisation
2140 for chunk in generate(repo):
2140 for chunk in generate(repo):
2141 pass
2141 pass
2142
2142
2143 timer(runone, title=b"generate")
2143 timer(runone, title=b"generate")
2144 fm.end()
2144 fm.end()
2145
2145
2146
2146
2147 @command(
2147 @command(
2148 b'perf::stream-consume',
2148 b'perf::stream-consume',
2149 formatteropts,
2149 formatteropts,
2150 )
2150 )
2151 def perf_stream_clone_consume(ui, repo, filename, **opts):
2151 def perf_stream_clone_consume(ui, repo, filename, **opts):
2152 """benchmark the full application of a stream clone
2152 """benchmark the full application of a stream clone
2153
2153
2154 This include the creation of the repository
2154 This include the creation of the repository
2155 """
2155 """
2156 # try except to appease check code
2156 # try except to appease check code
2157 msg = b"mercurial too old, missing necessary module: %s"
2157 msg = b"mercurial too old, missing necessary module: %s"
2158 try:
2158 try:
2159 from mercurial import bundle2
2159 from mercurial import bundle2
2160 except ImportError as exc:
2160 except ImportError as exc:
2161 msg %= _bytestr(exc)
2161 msg %= _bytestr(exc)
2162 raise error.Abort(msg)
2162 raise error.Abort(msg)
2163 try:
2163 try:
2164 from mercurial import exchange
2164 from mercurial import exchange
2165 except ImportError as exc:
2165 except ImportError as exc:
2166 msg %= _bytestr(exc)
2166 msg %= _bytestr(exc)
2167 raise error.Abort(msg)
2167 raise error.Abort(msg)
2168 try:
2168 try:
2169 from mercurial import hg
2169 from mercurial import hg
2170 except ImportError as exc:
2170 except ImportError as exc:
2171 msg %= _bytestr(exc)
2171 msg %= _bytestr(exc)
2172 raise error.Abort(msg)
2172 raise error.Abort(msg)
2173 try:
2173 try:
2174 from mercurial import localrepo
2174 from mercurial import localrepo
2175 except ImportError as exc:
2175 except ImportError as exc:
2176 msg %= _bytestr(exc)
2176 msg %= _bytestr(exc)
2177 raise error.Abort(msg)
2177 raise error.Abort(msg)
2178
2178
2179 opts = _byteskwargs(opts)
2179 opts = _byteskwargs(opts)
2180 timer, fm = gettimer(ui, opts)
2180 timer, fm = gettimer(ui, opts)
2181
2181
2182 # deletion of the generator may trigger some cleanup that we do not want to
2182 # deletion of the generator may trigger some cleanup that we do not want to
2183 # measure
2183 # measure
2184 if not (os.path.isfile(filename) and os.access(filename, os.R_OK)):
2184 if not (os.path.isfile(filename) and os.access(filename, os.R_OK)):
2185 raise error.Abort("not a readable file: %s" % filename)
2185 raise error.Abort("not a readable file: %s" % filename)
2186
2186
2187 run_variables = [None, None]
2187 run_variables = [None, None]
2188
2188
2189 @contextlib.contextmanager
2189 @contextlib.contextmanager
2190 def context():
2190 def context():
2191 with open(filename, mode='rb') as bundle:
2191 with open(filename, mode='rb') as bundle:
2192 with tempfile.TemporaryDirectory() as tmp_dir:
2192 with tempfile.TemporaryDirectory() as tmp_dir:
2193 tmp_dir = fsencode(tmp_dir)
2193 tmp_dir = fsencode(tmp_dir)
2194 run_variables[0] = bundle
2194 run_variables[0] = bundle
2195 run_variables[1] = tmp_dir
2195 run_variables[1] = tmp_dir
2196 yield
2196 yield
2197 run_variables[0] = None
2197 run_variables[0] = None
2198 run_variables[1] = None
2198 run_variables[1] = None
2199
2199
2200 def runone():
2200 def runone():
2201 bundle = run_variables[0]
2201 bundle = run_variables[0]
2202 tmp_dir = run_variables[1]
2202 tmp_dir = run_variables[1]
2203 # only pass ui when no srcrepo
2203 # only pass ui when no srcrepo
2204 localrepo.createrepository(
2204 localrepo.createrepository(
2205 repo.ui, tmp_dir, requirements=repo.requirements
2205 repo.ui, tmp_dir, requirements=repo.requirements
2206 )
2206 )
2207 target = hg.repository(repo.ui, tmp_dir)
2207 target = hg.repository(repo.ui, tmp_dir)
2208 gen = exchange.readbundle(target.ui, bundle, bundle.name)
2208 gen = exchange.readbundle(target.ui, bundle, bundle.name)
2209 # stream v1
2209 # stream v1
2210 if util.safehasattr(gen, 'apply'):
2210 if util.safehasattr(gen, 'apply'):
2211 gen.apply(target)
2211 gen.apply(target)
2212 else:
2212 else:
2213 with target.transaction(b"perf::stream-consume") as tr:
2213 with target.transaction(b"perf::stream-consume") as tr:
2214 bundle2.applybundle(
2214 bundle2.applybundle(
2215 target,
2215 target,
2216 gen,
2216 gen,
2217 tr,
2217 tr,
2218 source=b'unbundle',
2218 source=b'unbundle',
2219 url=filename,
2219 url=filename,
2220 )
2220 )
2221
2221
2222 timer(runone, context=context, title=b"consume")
2222 timer(runone, context=context, title=b"consume")
2223 fm.end()
2223 fm.end()
2224
2224
2225
2225
2226 @command(b'perf::parents|perfparents', formatteropts)
2226 @command(b'perf::parents|perfparents', formatteropts)
2227 def perfparents(ui, repo, **opts):
2227 def perfparents(ui, repo, **opts):
2228 """benchmark the time necessary to fetch one changeset's parents.
2228 """benchmark the time necessary to fetch one changeset's parents.
2229
2229
2230 The fetch is done using the `node identifier`, traversing all object layers
2230 The fetch is done using the `node identifier`, traversing all object layers
2231 from the repository object. The first N revisions will be used for this
2231 from the repository object. The first N revisions will be used for this
2232 benchmark. N is controlled by the ``perf.parentscount`` config option
2232 benchmark. N is controlled by the ``perf.parentscount`` config option
2233 (default: 1000).
2233 (default: 1000).
2234 """
2234 """
2235 opts = _byteskwargs(opts)
2235 opts = _byteskwargs(opts)
2236 timer, fm = gettimer(ui, opts)
2236 timer, fm = gettimer(ui, opts)
2237 # control the number of commits perfparents iterates over
2237 # control the number of commits perfparents iterates over
2238 # experimental config: perf.parentscount
2238 # experimental config: perf.parentscount
2239 count = getint(ui, b"perf", b"parentscount", 1000)
2239 count = getint(ui, b"perf", b"parentscount", 1000)
2240 if len(repo.changelog) < count:
2240 if len(repo.changelog) < count:
2241 raise error.Abort(b"repo needs %d commits for this test" % count)
2241 raise error.Abort(b"repo needs %d commits for this test" % count)
2242 repo = repo.unfiltered()
2242 repo = repo.unfiltered()
2243 nl = [repo.changelog.node(i) for i in _xrange(count)]
2243 nl = [repo.changelog.node(i) for i in _xrange(count)]
2244
2244
2245 def d():
2245 def d():
2246 for n in nl:
2246 for n in nl:
2247 repo.changelog.parents(n)
2247 repo.changelog.parents(n)
2248
2248
2249 timer(d)
2249 timer(d)
2250 fm.end()
2250 fm.end()
2251
2251
2252
2252
2253 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
2253 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
2254 def perfctxfiles(ui, repo, x, **opts):
2254 def perfctxfiles(ui, repo, x, **opts):
2255 opts = _byteskwargs(opts)
2255 opts = _byteskwargs(opts)
2256 x = int(x)
2256 x = int(x)
2257 timer, fm = gettimer(ui, opts)
2257 timer, fm = gettimer(ui, opts)
2258
2258
2259 def d():
2259 def d():
2260 len(repo[x].files())
2260 len(repo[x].files())
2261
2261
2262 timer(d)
2262 timer(d)
2263 fm.end()
2263 fm.end()
2264
2264
2265
2265
2266 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
2266 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
2267 def perfrawfiles(ui, repo, x, **opts):
2267 def perfrawfiles(ui, repo, x, **opts):
2268 opts = _byteskwargs(opts)
2268 opts = _byteskwargs(opts)
2269 x = int(x)
2269 x = int(x)
2270 timer, fm = gettimer(ui, opts)
2270 timer, fm = gettimer(ui, opts)
2271 cl = repo.changelog
2271 cl = repo.changelog
2272
2272
2273 def d():
2273 def d():
2274 len(cl.read(x)[3])
2274 len(cl.read(x)[3])
2275
2275
2276 timer(d)
2276 timer(d)
2277 fm.end()
2277 fm.end()
2278
2278
2279
2279
2280 @command(b'perf::lookup|perflookup', formatteropts)
2280 @command(b'perf::lookup|perflookup', formatteropts)
2281 def perflookup(ui, repo, rev, **opts):
2281 def perflookup(ui, repo, rev, **opts):
2282 opts = _byteskwargs(opts)
2282 opts = _byteskwargs(opts)
2283 timer, fm = gettimer(ui, opts)
2283 timer, fm = gettimer(ui, opts)
2284 timer(lambda: len(repo.lookup(rev)))
2284 timer(lambda: len(repo.lookup(rev)))
2285 fm.end()
2285 fm.end()
2286
2286
2287
2287
2288 @command(
2288 @command(
2289 b'perf::linelogedits|perflinelogedits',
2289 b'perf::linelogedits|perflinelogedits',
2290 [
2290 [
2291 (b'n', b'edits', 10000, b'number of edits'),
2291 (b'n', b'edits', 10000, b'number of edits'),
2292 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
2292 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
2293 ],
2293 ],
2294 norepo=True,
2294 norepo=True,
2295 )
2295 )
2296 def perflinelogedits(ui, **opts):
2296 def perflinelogedits(ui, **opts):
2297 from mercurial import linelog
2297 from mercurial import linelog
2298
2298
2299 opts = _byteskwargs(opts)
2299 opts = _byteskwargs(opts)
2300
2300
2301 edits = opts[b'edits']
2301 edits = opts[b'edits']
2302 maxhunklines = opts[b'max_hunk_lines']
2302 maxhunklines = opts[b'max_hunk_lines']
2303
2303
2304 maxb1 = 100000
2304 maxb1 = 100000
2305 random.seed(0)
2305 random.seed(0)
2306 randint = random.randint
2306 randint = random.randint
2307 currentlines = 0
2307 currentlines = 0
2308 arglist = []
2308 arglist = []
2309 for rev in _xrange(edits):
2309 for rev in _xrange(edits):
2310 a1 = randint(0, currentlines)
2310 a1 = randint(0, currentlines)
2311 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
2311 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
2312 b1 = randint(0, maxb1)
2312 b1 = randint(0, maxb1)
2313 b2 = randint(b1, b1 + maxhunklines)
2313 b2 = randint(b1, b1 + maxhunklines)
2314 currentlines += (b2 - b1) - (a2 - a1)
2314 currentlines += (b2 - b1) - (a2 - a1)
2315 arglist.append((rev, a1, a2, b1, b2))
2315 arglist.append((rev, a1, a2, b1, b2))
2316
2316
2317 def d():
2317 def d():
2318 ll = linelog.linelog()
2318 ll = linelog.linelog()
2319 for args in arglist:
2319 for args in arglist:
2320 ll.replacelines(*args)
2320 ll.replacelines(*args)
2321
2321
2322 timer, fm = gettimer(ui, opts)
2322 timer, fm = gettimer(ui, opts)
2323 timer(d)
2323 timer(d)
2324 fm.end()
2324 fm.end()
2325
2325
2326
2326
2327 @command(b'perf::revrange|perfrevrange', formatteropts)
2327 @command(b'perf::revrange|perfrevrange', formatteropts)
2328 def perfrevrange(ui, repo, *specs, **opts):
2328 def perfrevrange(ui, repo, *specs, **opts):
2329 opts = _byteskwargs(opts)
2329 opts = _byteskwargs(opts)
2330 timer, fm = gettimer(ui, opts)
2330 timer, fm = gettimer(ui, opts)
2331 revrange = scmutil.revrange
2331 revrange = scmutil.revrange
2332 timer(lambda: len(revrange(repo, specs)))
2332 timer(lambda: len(revrange(repo, specs)))
2333 fm.end()
2333 fm.end()
2334
2334
2335
2335
2336 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
2336 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
2337 def perfnodelookup(ui, repo, rev, **opts):
2337 def perfnodelookup(ui, repo, rev, **opts):
2338 opts = _byteskwargs(opts)
2338 opts = _byteskwargs(opts)
2339 timer, fm = gettimer(ui, opts)
2339 timer, fm = gettimer(ui, opts)
2340 import mercurial.revlog
2340 import mercurial.revlog
2341
2341
2342 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
2342 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
2343 n = scmutil.revsingle(repo, rev).node()
2343 n = scmutil.revsingle(repo, rev).node()
2344
2344
2345 try:
2345 try:
2346 cl = revlog(getsvfs(repo), radix=b"00changelog")
2346 cl = revlog(getsvfs(repo), radix=b"00changelog")
2347 except TypeError:
2347 except TypeError:
2348 cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
2348 cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
2349
2349
2350 def d():
2350 def d():
2351 cl.rev(n)
2351 cl.rev(n)
2352 clearcaches(cl)
2352 clearcaches(cl)
2353
2353
2354 timer(d)
2354 timer(d)
2355 fm.end()
2355 fm.end()
2356
2356
2357
2357
2358 @command(
2358 @command(
2359 b'perf::log|perflog',
2359 b'perf::log|perflog',
2360 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
2360 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
2361 )
2361 )
2362 def perflog(ui, repo, rev=None, **opts):
2362 def perflog(ui, repo, rev=None, **opts):
2363 opts = _byteskwargs(opts)
2363 opts = _byteskwargs(opts)
2364 if rev is None:
2364 if rev is None:
2365 rev = []
2365 rev = []
2366 timer, fm = gettimer(ui, opts)
2366 timer, fm = gettimer(ui, opts)
2367 ui.pushbuffer()
2367 ui.pushbuffer()
2368 timer(
2368 timer(
2369 lambda: commands.log(
2369 lambda: commands.log(
2370 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
2370 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
2371 )
2371 )
2372 )
2372 )
2373 ui.popbuffer()
2373 ui.popbuffer()
2374 fm.end()
2374 fm.end()
2375
2375
2376
2376
2377 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
2377 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
2378 def perfmoonwalk(ui, repo, **opts):
2378 def perfmoonwalk(ui, repo, **opts):
2379 """benchmark walking the changelog backwards
2379 """benchmark walking the changelog backwards
2380
2380
2381 This also loads the changelog data for each revision in the changelog.
2381 This also loads the changelog data for each revision in the changelog.
2382 """
2382 """
2383 opts = _byteskwargs(opts)
2383 opts = _byteskwargs(opts)
2384 timer, fm = gettimer(ui, opts)
2384 timer, fm = gettimer(ui, opts)
2385
2385
2386 def moonwalk():
2386 def moonwalk():
2387 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
2387 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
2388 ctx = repo[i]
2388 ctx = repo[i]
2389 ctx.branch() # read changelog data (in addition to the index)
2389 ctx.branch() # read changelog data (in addition to the index)
2390
2390
2391 timer(moonwalk)
2391 timer(moonwalk)
2392 fm.end()
2392 fm.end()
2393
2393
2394
2394
2395 @command(
2395 @command(
2396 b'perf::templating|perftemplating',
2396 b'perf::templating|perftemplating',
2397 [
2397 [
2398 (b'r', b'rev', [], b'revisions to run the template on'),
2398 (b'r', b'rev', [], b'revisions to run the template on'),
2399 ]
2399 ]
2400 + formatteropts,
2400 + formatteropts,
2401 )
2401 )
2402 def perftemplating(ui, repo, testedtemplate=None, **opts):
2402 def perftemplating(ui, repo, testedtemplate=None, **opts):
2403 """test the rendering time of a given template"""
2403 """test the rendering time of a given template"""
2404 if makelogtemplater is None:
2404 if makelogtemplater is None:
2405 raise error.Abort(
2405 raise error.Abort(
2406 b"perftemplating not available with this Mercurial",
2406 b"perftemplating not available with this Mercurial",
2407 hint=b"use 4.3 or later",
2407 hint=b"use 4.3 or later",
2408 )
2408 )
2409
2409
2410 opts = _byteskwargs(opts)
2410 opts = _byteskwargs(opts)
2411
2411
2412 nullui = ui.copy()
2412 nullui = ui.copy()
2413 nullui.fout = open(os.devnull, 'wb')
2413 nullui.fout = open(os.devnull, 'wb')
2414 nullui.disablepager()
2414 nullui.disablepager()
2415 revs = opts.get(b'rev')
2415 revs = opts.get(b'rev')
2416 if not revs:
2416 if not revs:
2417 revs = [b'all()']
2417 revs = [b'all()']
2418 revs = list(scmutil.revrange(repo, revs))
2418 revs = list(scmutil.revrange(repo, revs))
2419
2419
2420 defaulttemplate = (
2420 defaulttemplate = (
2421 b'{date|shortdate} [{rev}:{node|short}]'
2421 b'{date|shortdate} [{rev}:{node|short}]'
2422 b' {author|person}: {desc|firstline}\n'
2422 b' {author|person}: {desc|firstline}\n'
2423 )
2423 )
2424 if testedtemplate is None:
2424 if testedtemplate is None:
2425 testedtemplate = defaulttemplate
2425 testedtemplate = defaulttemplate
2426 displayer = makelogtemplater(nullui, repo, testedtemplate)
2426 displayer = makelogtemplater(nullui, repo, testedtemplate)
2427
2427
2428 def format():
2428 def format():
2429 for r in revs:
2429 for r in revs:
2430 ctx = repo[r]
2430 ctx = repo[r]
2431 displayer.show(ctx)
2431 displayer.show(ctx)
2432 displayer.flush(ctx)
2432 displayer.flush(ctx)
2433
2433
2434 timer, fm = gettimer(ui, opts)
2434 timer, fm = gettimer(ui, opts)
2435 timer(format)
2435 timer(format)
2436 fm.end()
2436 fm.end()
2437
2437
2438
2438
2439 def _displaystats(ui, opts, entries, data):
2439 def _displaystats(ui, opts, entries, data):
2440 # use a second formatter because the data are quite different, not sure
2440 # use a second formatter because the data are quite different, not sure
2441 # how it flies with the templater.
2441 # how it flies with the templater.
2442 fm = ui.formatter(b'perf-stats', opts)
2442 fm = ui.formatter(b'perf-stats', opts)
2443 for key, title in entries:
2443 for key, title in entries:
2444 values = data[key]
2444 values = data[key]
2445 nbvalues = len(data)
2445 nbvalues = len(data)
2446 values.sort()
2446 values.sort()
2447 stats = {
2447 stats = {
2448 'key': key,
2448 'key': key,
2449 'title': title,
2449 'title': title,
2450 'nbitems': len(values),
2450 'nbitems': len(values),
2451 'min': values[0][0],
2451 'min': values[0][0],
2452 '10%': values[(nbvalues * 10) // 100][0],
2452 '10%': values[(nbvalues * 10) // 100][0],
2453 '25%': values[(nbvalues * 25) // 100][0],
2453 '25%': values[(nbvalues * 25) // 100][0],
2454 '50%': values[(nbvalues * 50) // 100][0],
2454 '50%': values[(nbvalues * 50) // 100][0],
2455 '75%': values[(nbvalues * 75) // 100][0],
2455 '75%': values[(nbvalues * 75) // 100][0],
2456 '80%': values[(nbvalues * 80) // 100][0],
2456 '80%': values[(nbvalues * 80) // 100][0],
2457 '85%': values[(nbvalues * 85) // 100][0],
2457 '85%': values[(nbvalues * 85) // 100][0],
2458 '90%': values[(nbvalues * 90) // 100][0],
2458 '90%': values[(nbvalues * 90) // 100][0],
2459 '95%': values[(nbvalues * 95) // 100][0],
2459 '95%': values[(nbvalues * 95) // 100][0],
2460 '99%': values[(nbvalues * 99) // 100][0],
2460 '99%': values[(nbvalues * 99) // 100][0],
2461 'max': values[-1][0],
2461 'max': values[-1][0],
2462 }
2462 }
2463 fm.startitem()
2463 fm.startitem()
2464 fm.data(**stats)
2464 fm.data(**stats)
2465 # make node pretty for the human output
2465 # make node pretty for the human output
2466 fm.plain('### %s (%d items)\n' % (title, len(values)))
2466 fm.plain('### %s (%d items)\n' % (title, len(values)))
2467 lines = [
2467 lines = [
2468 'min',
2468 'min',
2469 '10%',
2469 '10%',
2470 '25%',
2470 '25%',
2471 '50%',
2471 '50%',
2472 '75%',
2472 '75%',
2473 '80%',
2473 '80%',
2474 '85%',
2474 '85%',
2475 '90%',
2475 '90%',
2476 '95%',
2476 '95%',
2477 '99%',
2477 '99%',
2478 'max',
2478 'max',
2479 ]
2479 ]
2480 for l in lines:
2480 for l in lines:
2481 fm.plain('%s: %s\n' % (l, stats[l]))
2481 fm.plain('%s: %s\n' % (l, stats[l]))
2482 fm.end()
2482 fm.end()
2483
2483
2484
2484
2485 @command(
2485 @command(
2486 b'perf::helper-mergecopies|perfhelper-mergecopies',
2486 b'perf::helper-mergecopies|perfhelper-mergecopies',
2487 formatteropts
2487 formatteropts
2488 + [
2488 + [
2489 (b'r', b'revs', [], b'restrict search to these revisions'),
2489 (b'r', b'revs', [], b'restrict search to these revisions'),
2490 (b'', b'timing', False, b'provides extra data (costly)'),
2490 (b'', b'timing', False, b'provides extra data (costly)'),
2491 (b'', b'stats', False, b'provides statistic about the measured data'),
2491 (b'', b'stats', False, b'provides statistic about the measured data'),
2492 ],
2492 ],
2493 )
2493 )
2494 def perfhelpermergecopies(ui, repo, revs=[], **opts):
2494 def perfhelpermergecopies(ui, repo, revs=[], **opts):
2495 """find statistics about potential parameters for `perfmergecopies`
2495 """find statistics about potential parameters for `perfmergecopies`
2496
2496
2497 This command find (base, p1, p2) triplet relevant for copytracing
2497 This command find (base, p1, p2) triplet relevant for copytracing
2498 benchmarking in the context of a merge. It reports values for some of the
2498 benchmarking in the context of a merge. It reports values for some of the
2499 parameters that impact merge copy tracing time during merge.
2499 parameters that impact merge copy tracing time during merge.
2500
2500
2501 If `--timing` is set, rename detection is run and the associated timing
2501 If `--timing` is set, rename detection is run and the associated timing
2502 will be reported. The extra details come at the cost of slower command
2502 will be reported. The extra details come at the cost of slower command
2503 execution.
2503 execution.
2504
2504
2505 Since rename detection is only run once, other factors might easily
2505 Since rename detection is only run once, other factors might easily
2506 affect the precision of the timing. However it should give a good
2506 affect the precision of the timing. However it should give a good
2507 approximation of which revision triplets are very costly.
2507 approximation of which revision triplets are very costly.
2508 """
2508 """
2509 opts = _byteskwargs(opts)
2509 opts = _byteskwargs(opts)
2510 fm = ui.formatter(b'perf', opts)
2510 fm = ui.formatter(b'perf', opts)
2511 dotiming = opts[b'timing']
2511 dotiming = opts[b'timing']
2512 dostats = opts[b'stats']
2512 dostats = opts[b'stats']
2513
2513
2514 output_template = [
2514 output_template = [
2515 ("base", "%(base)12s"),
2515 ("base", "%(base)12s"),
2516 ("p1", "%(p1.node)12s"),
2516 ("p1", "%(p1.node)12s"),
2517 ("p2", "%(p2.node)12s"),
2517 ("p2", "%(p2.node)12s"),
2518 ("p1.nb-revs", "%(p1.nbrevs)12d"),
2518 ("p1.nb-revs", "%(p1.nbrevs)12d"),
2519 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
2519 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
2520 ("p1.renames", "%(p1.renamedfiles)12d"),
2520 ("p1.renames", "%(p1.renamedfiles)12d"),
2521 ("p1.time", "%(p1.time)12.3f"),
2521 ("p1.time", "%(p1.time)12.3f"),
2522 ("p2.nb-revs", "%(p2.nbrevs)12d"),
2522 ("p2.nb-revs", "%(p2.nbrevs)12d"),
2523 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
2523 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
2524 ("p2.renames", "%(p2.renamedfiles)12d"),
2524 ("p2.renames", "%(p2.renamedfiles)12d"),
2525 ("p2.time", "%(p2.time)12.3f"),
2525 ("p2.time", "%(p2.time)12.3f"),
2526 ("renames", "%(nbrenamedfiles)12d"),
2526 ("renames", "%(nbrenamedfiles)12d"),
2527 ("total.time", "%(time)12.3f"),
2527 ("total.time", "%(time)12.3f"),
2528 ]
2528 ]
2529 if not dotiming:
2529 if not dotiming:
2530 output_template = [
2530 output_template = [
2531 i
2531 i
2532 for i in output_template
2532 for i in output_template
2533 if not ('time' in i[0] or 'renames' in i[0])
2533 if not ('time' in i[0] or 'renames' in i[0])
2534 ]
2534 ]
2535 header_names = [h for (h, v) in output_template]
2535 header_names = [h for (h, v) in output_template]
2536 output = ' '.join([v for (h, v) in output_template]) + '\n'
2536 output = ' '.join([v for (h, v) in output_template]) + '\n'
2537 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2537 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2538 fm.plain(header % tuple(header_names))
2538 fm.plain(header % tuple(header_names))
2539
2539
2540 if not revs:
2540 if not revs:
2541 revs = ['all()']
2541 revs = ['all()']
2542 revs = scmutil.revrange(repo, revs)
2542 revs = scmutil.revrange(repo, revs)
2543
2543
2544 if dostats:
2544 if dostats:
2545 alldata = {
2545 alldata = {
2546 'nbrevs': [],
2546 'nbrevs': [],
2547 'nbmissingfiles': [],
2547 'nbmissingfiles': [],
2548 }
2548 }
2549 if dotiming:
2549 if dotiming:
2550 alldata['parentnbrenames'] = []
2550 alldata['parentnbrenames'] = []
2551 alldata['totalnbrenames'] = []
2551 alldata['totalnbrenames'] = []
2552 alldata['parenttime'] = []
2552 alldata['parenttime'] = []
2553 alldata['totaltime'] = []
2553 alldata['totaltime'] = []
2554
2554
2555 roi = repo.revs('merge() and %ld', revs)
2555 roi = repo.revs('merge() and %ld', revs)
2556 for r in roi:
2556 for r in roi:
2557 ctx = repo[r]
2557 ctx = repo[r]
2558 p1 = ctx.p1()
2558 p1 = ctx.p1()
2559 p2 = ctx.p2()
2559 p2 = ctx.p2()
2560 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2560 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2561 for b in bases:
2561 for b in bases:
2562 b = repo[b]
2562 b = repo[b]
2563 p1missing = copies._computeforwardmissing(b, p1)
2563 p1missing = copies._computeforwardmissing(b, p1)
2564 p2missing = copies._computeforwardmissing(b, p2)
2564 p2missing = copies._computeforwardmissing(b, p2)
2565 data = {
2565 data = {
2566 b'base': b.hex(),
2566 b'base': b.hex(),
2567 b'p1.node': p1.hex(),
2567 b'p1.node': p1.hex(),
2568 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2568 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2569 b'p1.nbmissingfiles': len(p1missing),
2569 b'p1.nbmissingfiles': len(p1missing),
2570 b'p2.node': p2.hex(),
2570 b'p2.node': p2.hex(),
2571 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2571 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2572 b'p2.nbmissingfiles': len(p2missing),
2572 b'p2.nbmissingfiles': len(p2missing),
2573 }
2573 }
2574 if dostats:
2574 if dostats:
2575 if p1missing:
2575 if p1missing:
2576 alldata['nbrevs'].append(
2576 alldata['nbrevs'].append(
2577 (data['p1.nbrevs'], b.hex(), p1.hex())
2577 (data['p1.nbrevs'], b.hex(), p1.hex())
2578 )
2578 )
2579 alldata['nbmissingfiles'].append(
2579 alldata['nbmissingfiles'].append(
2580 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2580 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2581 )
2581 )
2582 if p2missing:
2582 if p2missing:
2583 alldata['nbrevs'].append(
2583 alldata['nbrevs'].append(
2584 (data['p2.nbrevs'], b.hex(), p2.hex())
2584 (data['p2.nbrevs'], b.hex(), p2.hex())
2585 )
2585 )
2586 alldata['nbmissingfiles'].append(
2586 alldata['nbmissingfiles'].append(
2587 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2587 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2588 )
2588 )
2589 if dotiming:
2589 if dotiming:
2590 begin = util.timer()
2590 begin = util.timer()
2591 mergedata = copies.mergecopies(repo, p1, p2, b)
2591 mergedata = copies.mergecopies(repo, p1, p2, b)
2592 end = util.timer()
2592 end = util.timer()
2593 # not very stable timing since we did only one run
2593 # not very stable timing since we did only one run
2594 data['time'] = end - begin
2594 data['time'] = end - begin
2595 # mergedata contains five dicts: "copy", "movewithdir",
2595 # mergedata contains five dicts: "copy", "movewithdir",
2596 # "diverge", "renamedelete" and "dirmove".
2596 # "diverge", "renamedelete" and "dirmove".
2597 # The first 4 are about renamed file so lets count that.
2597 # The first 4 are about renamed file so lets count that.
2598 renames = len(mergedata[0])
2598 renames = len(mergedata[0])
2599 renames += len(mergedata[1])
2599 renames += len(mergedata[1])
2600 renames += len(mergedata[2])
2600 renames += len(mergedata[2])
2601 renames += len(mergedata[3])
2601 renames += len(mergedata[3])
2602 data['nbrenamedfiles'] = renames
2602 data['nbrenamedfiles'] = renames
2603 begin = util.timer()
2603 begin = util.timer()
2604 p1renames = copies.pathcopies(b, p1)
2604 p1renames = copies.pathcopies(b, p1)
2605 end = util.timer()
2605 end = util.timer()
2606 data['p1.time'] = end - begin
2606 data['p1.time'] = end - begin
2607 begin = util.timer()
2607 begin = util.timer()
2608 p2renames = copies.pathcopies(b, p2)
2608 p2renames = copies.pathcopies(b, p2)
2609 end = util.timer()
2609 end = util.timer()
2610 data['p2.time'] = end - begin
2610 data['p2.time'] = end - begin
2611 data['p1.renamedfiles'] = len(p1renames)
2611 data['p1.renamedfiles'] = len(p1renames)
2612 data['p2.renamedfiles'] = len(p2renames)
2612 data['p2.renamedfiles'] = len(p2renames)
2613
2613
2614 if dostats:
2614 if dostats:
2615 if p1missing:
2615 if p1missing:
2616 alldata['parentnbrenames'].append(
2616 alldata['parentnbrenames'].append(
2617 (data['p1.renamedfiles'], b.hex(), p1.hex())
2617 (data['p1.renamedfiles'], b.hex(), p1.hex())
2618 )
2618 )
2619 alldata['parenttime'].append(
2619 alldata['parenttime'].append(
2620 (data['p1.time'], b.hex(), p1.hex())
2620 (data['p1.time'], b.hex(), p1.hex())
2621 )
2621 )
2622 if p2missing:
2622 if p2missing:
2623 alldata['parentnbrenames'].append(
2623 alldata['parentnbrenames'].append(
2624 (data['p2.renamedfiles'], b.hex(), p2.hex())
2624 (data['p2.renamedfiles'], b.hex(), p2.hex())
2625 )
2625 )
2626 alldata['parenttime'].append(
2626 alldata['parenttime'].append(
2627 (data['p2.time'], b.hex(), p2.hex())
2627 (data['p2.time'], b.hex(), p2.hex())
2628 )
2628 )
2629 if p1missing or p2missing:
2629 if p1missing or p2missing:
2630 alldata['totalnbrenames'].append(
2630 alldata['totalnbrenames'].append(
2631 (
2631 (
2632 data['nbrenamedfiles'],
2632 data['nbrenamedfiles'],
2633 b.hex(),
2633 b.hex(),
2634 p1.hex(),
2634 p1.hex(),
2635 p2.hex(),
2635 p2.hex(),
2636 )
2636 )
2637 )
2637 )
2638 alldata['totaltime'].append(
2638 alldata['totaltime'].append(
2639 (data['time'], b.hex(), p1.hex(), p2.hex())
2639 (data['time'], b.hex(), p1.hex(), p2.hex())
2640 )
2640 )
2641 fm.startitem()
2641 fm.startitem()
2642 fm.data(**data)
2642 fm.data(**data)
2643 # make node pretty for the human output
2643 # make node pretty for the human output
2644 out = data.copy()
2644 out = data.copy()
2645 out['base'] = fm.hexfunc(b.node())
2645 out['base'] = fm.hexfunc(b.node())
2646 out['p1.node'] = fm.hexfunc(p1.node())
2646 out['p1.node'] = fm.hexfunc(p1.node())
2647 out['p2.node'] = fm.hexfunc(p2.node())
2647 out['p2.node'] = fm.hexfunc(p2.node())
2648 fm.plain(output % out)
2648 fm.plain(output % out)
2649
2649
2650 fm.end()
2650 fm.end()
2651 if dostats:
2651 if dostats:
2652 # use a second formatter because the data are quite different, not sure
2652 # use a second formatter because the data are quite different, not sure
2653 # how it flies with the templater.
2653 # how it flies with the templater.
2654 entries = [
2654 entries = [
2655 ('nbrevs', 'number of revision covered'),
2655 ('nbrevs', 'number of revision covered'),
2656 ('nbmissingfiles', 'number of missing files at head'),
2656 ('nbmissingfiles', 'number of missing files at head'),
2657 ]
2657 ]
2658 if dotiming:
2658 if dotiming:
2659 entries.append(
2659 entries.append(
2660 ('parentnbrenames', 'rename from one parent to base')
2660 ('parentnbrenames', 'rename from one parent to base')
2661 )
2661 )
2662 entries.append(('totalnbrenames', 'total number of renames'))
2662 entries.append(('totalnbrenames', 'total number of renames'))
2663 entries.append(('parenttime', 'time for one parent'))
2663 entries.append(('parenttime', 'time for one parent'))
2664 entries.append(('totaltime', 'time for both parents'))
2664 entries.append(('totaltime', 'time for both parents'))
2665 _displaystats(ui, opts, entries, alldata)
2665 _displaystats(ui, opts, entries, alldata)
2666
2666
2667
2667
2668 @command(
2668 @command(
2669 b'perf::helper-pathcopies|perfhelper-pathcopies',
2669 b'perf::helper-pathcopies|perfhelper-pathcopies',
2670 formatteropts
2670 formatteropts
2671 + [
2671 + [
2672 (b'r', b'revs', [], b'restrict search to these revisions'),
2672 (b'r', b'revs', [], b'restrict search to these revisions'),
2673 (b'', b'timing', False, b'provides extra data (costly)'),
2673 (b'', b'timing', False, b'provides extra data (costly)'),
2674 (b'', b'stats', False, b'provides statistic about the measured data'),
2674 (b'', b'stats', False, b'provides statistic about the measured data'),
2675 ],
2675 ],
2676 )
2676 )
2677 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2677 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2678 """find statistic about potential parameters for the `perftracecopies`
2678 """find statistic about potential parameters for the `perftracecopies`
2679
2679
2680 This command find source-destination pair relevant for copytracing testing.
2680 This command find source-destination pair relevant for copytracing testing.
2681 It report value for some of the parameters that impact copy tracing time.
2681 It report value for some of the parameters that impact copy tracing time.
2682
2682
2683 If `--timing` is set, rename detection is run and the associated timing
2683 If `--timing` is set, rename detection is run and the associated timing
2684 will be reported. The extra details comes at the cost of a slower command
2684 will be reported. The extra details comes at the cost of a slower command
2685 execution.
2685 execution.
2686
2686
2687 Since the rename detection is only run once, other factors might easily
2687 Since the rename detection is only run once, other factors might easily
2688 affect the precision of the timing. However it should give a good
2688 affect the precision of the timing. However it should give a good
2689 approximation of which revision pairs are very costly.
2689 approximation of which revision pairs are very costly.
2690 """
2690 """
2691 opts = _byteskwargs(opts)
2691 opts = _byteskwargs(opts)
2692 fm = ui.formatter(b'perf', opts)
2692 fm = ui.formatter(b'perf', opts)
2693 dotiming = opts[b'timing']
2693 dotiming = opts[b'timing']
2694 dostats = opts[b'stats']
2694 dostats = opts[b'stats']
2695
2695
2696 if dotiming:
2696 if dotiming:
2697 header = '%12s %12s %12s %12s %12s %12s\n'
2697 header = '%12s %12s %12s %12s %12s %12s\n'
2698 output = (
2698 output = (
2699 "%(source)12s %(destination)12s "
2699 "%(source)12s %(destination)12s "
2700 "%(nbrevs)12d %(nbmissingfiles)12d "
2700 "%(nbrevs)12d %(nbmissingfiles)12d "
2701 "%(nbrenamedfiles)12d %(time)18.5f\n"
2701 "%(nbrenamedfiles)12d %(time)18.5f\n"
2702 )
2702 )
2703 header_names = (
2703 header_names = (
2704 "source",
2704 "source",
2705 "destination",
2705 "destination",
2706 "nb-revs",
2706 "nb-revs",
2707 "nb-files",
2707 "nb-files",
2708 "nb-renames",
2708 "nb-renames",
2709 "time",
2709 "time",
2710 )
2710 )
2711 fm.plain(header % header_names)
2711 fm.plain(header % header_names)
2712 else:
2712 else:
2713 header = '%12s %12s %12s %12s\n'
2713 header = '%12s %12s %12s %12s\n'
2714 output = (
2714 output = (
2715 "%(source)12s %(destination)12s "
2715 "%(source)12s %(destination)12s "
2716 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2716 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2717 )
2717 )
2718 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2718 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2719
2719
2720 if not revs:
2720 if not revs:
2721 revs = ['all()']
2721 revs = ['all()']
2722 revs = scmutil.revrange(repo, revs)
2722 revs = scmutil.revrange(repo, revs)
2723
2723
2724 if dostats:
2724 if dostats:
2725 alldata = {
2725 alldata = {
2726 'nbrevs': [],
2726 'nbrevs': [],
2727 'nbmissingfiles': [],
2727 'nbmissingfiles': [],
2728 }
2728 }
2729 if dotiming:
2729 if dotiming:
2730 alldata['nbrenames'] = []
2730 alldata['nbrenames'] = []
2731 alldata['time'] = []
2731 alldata['time'] = []
2732
2732
2733 roi = repo.revs('merge() and %ld', revs)
2733 roi = repo.revs('merge() and %ld', revs)
2734 for r in roi:
2734 for r in roi:
2735 ctx = repo[r]
2735 ctx = repo[r]
2736 p1 = ctx.p1().rev()
2736 p1 = ctx.p1().rev()
2737 p2 = ctx.p2().rev()
2737 p2 = ctx.p2().rev()
2738 bases = repo.changelog._commonancestorsheads(p1, p2)
2738 bases = repo.changelog._commonancestorsheads(p1, p2)
2739 for p in (p1, p2):
2739 for p in (p1, p2):
2740 for b in bases:
2740 for b in bases:
2741 base = repo[b]
2741 base = repo[b]
2742 parent = repo[p]
2742 parent = repo[p]
2743 missing = copies._computeforwardmissing(base, parent)
2743 missing = copies._computeforwardmissing(base, parent)
2744 if not missing:
2744 if not missing:
2745 continue
2745 continue
2746 data = {
2746 data = {
2747 b'source': base.hex(),
2747 b'source': base.hex(),
2748 b'destination': parent.hex(),
2748 b'destination': parent.hex(),
2749 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2749 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2750 b'nbmissingfiles': len(missing),
2750 b'nbmissingfiles': len(missing),
2751 }
2751 }
2752 if dostats:
2752 if dostats:
2753 alldata['nbrevs'].append(
2753 alldata['nbrevs'].append(
2754 (
2754 (
2755 data['nbrevs'],
2755 data['nbrevs'],
2756 base.hex(),
2756 base.hex(),
2757 parent.hex(),
2757 parent.hex(),
2758 )
2758 )
2759 )
2759 )
2760 alldata['nbmissingfiles'].append(
2760 alldata['nbmissingfiles'].append(
2761 (
2761 (
2762 data['nbmissingfiles'],
2762 data['nbmissingfiles'],
2763 base.hex(),
2763 base.hex(),
2764 parent.hex(),
2764 parent.hex(),
2765 )
2765 )
2766 )
2766 )
2767 if dotiming:
2767 if dotiming:
2768 begin = util.timer()
2768 begin = util.timer()
2769 renames = copies.pathcopies(base, parent)
2769 renames = copies.pathcopies(base, parent)
2770 end = util.timer()
2770 end = util.timer()
2771 # not very stable timing since we did only one run
2771 # not very stable timing since we did only one run
2772 data['time'] = end - begin
2772 data['time'] = end - begin
2773 data['nbrenamedfiles'] = len(renames)
2773 data['nbrenamedfiles'] = len(renames)
2774 if dostats:
2774 if dostats:
2775 alldata['time'].append(
2775 alldata['time'].append(
2776 (
2776 (
2777 data['time'],
2777 data['time'],
2778 base.hex(),
2778 base.hex(),
2779 parent.hex(),
2779 parent.hex(),
2780 )
2780 )
2781 )
2781 )
2782 alldata['nbrenames'].append(
2782 alldata['nbrenames'].append(
2783 (
2783 (
2784 data['nbrenamedfiles'],
2784 data['nbrenamedfiles'],
2785 base.hex(),
2785 base.hex(),
2786 parent.hex(),
2786 parent.hex(),
2787 )
2787 )
2788 )
2788 )
2789 fm.startitem()
2789 fm.startitem()
2790 fm.data(**data)
2790 fm.data(**data)
2791 out = data.copy()
2791 out = data.copy()
2792 out['source'] = fm.hexfunc(base.node())
2792 out['source'] = fm.hexfunc(base.node())
2793 out['destination'] = fm.hexfunc(parent.node())
2793 out['destination'] = fm.hexfunc(parent.node())
2794 fm.plain(output % out)
2794 fm.plain(output % out)
2795
2795
2796 fm.end()
2796 fm.end()
2797 if dostats:
2797 if dostats:
2798 entries = [
2798 entries = [
2799 ('nbrevs', 'number of revision covered'),
2799 ('nbrevs', 'number of revision covered'),
2800 ('nbmissingfiles', 'number of missing files at head'),
2800 ('nbmissingfiles', 'number of missing files at head'),
2801 ]
2801 ]
2802 if dotiming:
2802 if dotiming:
2803 entries.append(('nbrenames', 'renamed files'))
2803 entries.append(('nbrenames', 'renamed files'))
2804 entries.append(('time', 'time'))
2804 entries.append(('time', 'time'))
2805 _displaystats(ui, opts, entries, alldata)
2805 _displaystats(ui, opts, entries, alldata)
2806
2806
2807
2807
2808 @command(b'perf::cca|perfcca', formatteropts)
2808 @command(b'perf::cca|perfcca', formatteropts)
2809 def perfcca(ui, repo, **opts):
2809 def perfcca(ui, repo, **opts):
2810 opts = _byteskwargs(opts)
2810 opts = _byteskwargs(opts)
2811 timer, fm = gettimer(ui, opts)
2811 timer, fm = gettimer(ui, opts)
2812 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2812 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2813 fm.end()
2813 fm.end()
2814
2814
2815
2815
2816 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2816 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2817 def perffncacheload(ui, repo, **opts):
2817 def perffncacheload(ui, repo, **opts):
2818 opts = _byteskwargs(opts)
2818 opts = _byteskwargs(opts)
2819 timer, fm = gettimer(ui, opts)
2819 timer, fm = gettimer(ui, opts)
2820 s = repo.store
2820 s = repo.store
2821
2821
2822 def d():
2822 def d():
2823 s.fncache._load()
2823 s.fncache._load()
2824
2824
2825 timer(d)
2825 timer(d)
2826 fm.end()
2826 fm.end()
2827
2827
2828
2828
2829 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2829 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2830 def perffncachewrite(ui, repo, **opts):
2830 def perffncachewrite(ui, repo, **opts):
2831 opts = _byteskwargs(opts)
2831 opts = _byteskwargs(opts)
2832 timer, fm = gettimer(ui, opts)
2832 timer, fm = gettimer(ui, opts)
2833 s = repo.store
2833 s = repo.store
2834 lock = repo.lock()
2834 lock = repo.lock()
2835 s.fncache._load()
2835 s.fncache._load()
2836 tr = repo.transaction(b'perffncachewrite')
2836 tr = repo.transaction(b'perffncachewrite')
2837 tr.addbackup(b'fncache')
2837 tr.addbackup(b'fncache')
2838
2838
2839 def d():
2839 def d():
2840 s.fncache._dirty = True
2840 s.fncache._dirty = True
2841 s.fncache.write(tr)
2841 s.fncache.write(tr)
2842
2842
2843 timer(d)
2843 timer(d)
2844 tr.close()
2844 tr.close()
2845 lock.release()
2845 lock.release()
2846 fm.end()
2846 fm.end()
2847
2847
2848
2848
2849 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2849 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2850 def perffncacheencode(ui, repo, **opts):
2850 def perffncacheencode(ui, repo, **opts):
2851 opts = _byteskwargs(opts)
2851 opts = _byteskwargs(opts)
2852 timer, fm = gettimer(ui, opts)
2852 timer, fm = gettimer(ui, opts)
2853 s = repo.store
2853 s = repo.store
2854 s.fncache._load()
2854 s.fncache._load()
2855
2855
2856 def d():
2856 def d():
2857 for p in s.fncache.entries:
2857 for p in s.fncache.entries:
2858 s.encode(p)
2858 s.encode(p)
2859
2859
2860 timer(d)
2860 timer(d)
2861 fm.end()
2861 fm.end()
2862
2862
2863
2863
2864 def _bdiffworker(q, blocks, xdiff, ready, done):
2864 def _bdiffworker(q, blocks, xdiff, ready, done):
2865 while not done.is_set():
2865 while not done.is_set():
2866 pair = q.get()
2866 pair = q.get()
2867 while pair is not None:
2867 while pair is not None:
2868 if xdiff:
2868 if xdiff:
2869 mdiff.bdiff.xdiffblocks(*pair)
2869 mdiff.bdiff.xdiffblocks(*pair)
2870 elif blocks:
2870 elif blocks:
2871 mdiff.bdiff.blocks(*pair)
2871 mdiff.bdiff.blocks(*pair)
2872 else:
2872 else:
2873 mdiff.textdiff(*pair)
2873 mdiff.textdiff(*pair)
2874 q.task_done()
2874 q.task_done()
2875 pair = q.get()
2875 pair = q.get()
2876 q.task_done() # for the None one
2876 q.task_done() # for the None one
2877 with ready:
2877 with ready:
2878 ready.wait()
2878 ready.wait()
2879
2879
2880
2880
2881 def _manifestrevision(repo, mnode):
2881 def _manifestrevision(repo, mnode):
2882 ml = repo.manifestlog
2882 ml = repo.manifestlog
2883
2883
2884 if util.safehasattr(ml, b'getstorage'):
2884 if util.safehasattr(ml, b'getstorage'):
2885 store = ml.getstorage(b'')
2885 store = ml.getstorage(b'')
2886 else:
2886 else:
2887 store = ml._revlog
2887 store = ml._revlog
2888
2888
2889 return store.revision(mnode)
2889 return store.revision(mnode)
2890
2890
2891
2891
2892 @command(
2892 @command(
2893 b'perf::bdiff|perfbdiff',
2893 b'perf::bdiff|perfbdiff',
2894 revlogopts
2894 revlogopts
2895 + formatteropts
2895 + formatteropts
2896 + [
2896 + [
2897 (
2897 (
2898 b'',
2898 b'',
2899 b'count',
2899 b'count',
2900 1,
2900 1,
2901 b'number of revisions to test (when using --startrev)',
2901 b'number of revisions to test (when using --startrev)',
2902 ),
2902 ),
2903 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2903 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2904 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2904 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2905 (b'', b'blocks', False, b'test computing diffs into blocks'),
2905 (b'', b'blocks', False, b'test computing diffs into blocks'),
2906 (b'', b'xdiff', False, b'use xdiff algorithm'),
2906 (b'', b'xdiff', False, b'use xdiff algorithm'),
2907 ],
2907 ],
2908 b'-c|-m|FILE REV',
2908 b'-c|-m|FILE REV',
2909 )
2909 )
2910 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2910 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2911 """benchmark a bdiff between revisions
2911 """benchmark a bdiff between revisions
2912
2912
2913 By default, benchmark a bdiff between its delta parent and itself.
2913 By default, benchmark a bdiff between its delta parent and itself.
2914
2914
2915 With ``--count``, benchmark bdiffs between delta parents and self for N
2915 With ``--count``, benchmark bdiffs between delta parents and self for N
2916 revisions starting at the specified revision.
2916 revisions starting at the specified revision.
2917
2917
2918 With ``--alldata``, assume the requested revision is a changeset and
2918 With ``--alldata``, assume the requested revision is a changeset and
2919 measure bdiffs for all changes related to that changeset (manifest
2919 measure bdiffs for all changes related to that changeset (manifest
2920 and filelogs).
2920 and filelogs).
2921 """
2921 """
2922 opts = _byteskwargs(opts)
2922 opts = _byteskwargs(opts)
2923
2923
2924 if opts[b'xdiff'] and not opts[b'blocks']:
2924 if opts[b'xdiff'] and not opts[b'blocks']:
2925 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2925 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2926
2926
2927 if opts[b'alldata']:
2927 if opts[b'alldata']:
2928 opts[b'changelog'] = True
2928 opts[b'changelog'] = True
2929
2929
2930 if opts.get(b'changelog') or opts.get(b'manifest'):
2930 if opts.get(b'changelog') or opts.get(b'manifest'):
2931 file_, rev = None, file_
2931 file_, rev = None, file_
2932 elif rev is None:
2932 elif rev is None:
2933 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2933 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2934
2934
2935 blocks = opts[b'blocks']
2935 blocks = opts[b'blocks']
2936 xdiff = opts[b'xdiff']
2936 xdiff = opts[b'xdiff']
2937 textpairs = []
2937 textpairs = []
2938
2938
2939 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2939 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2940
2940
2941 startrev = r.rev(r.lookup(rev))
2941 startrev = r.rev(r.lookup(rev))
2942 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2942 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2943 if opts[b'alldata']:
2943 if opts[b'alldata']:
2944 # Load revisions associated with changeset.
2944 # Load revisions associated with changeset.
2945 ctx = repo[rev]
2945 ctx = repo[rev]
2946 mtext = _manifestrevision(repo, ctx.manifestnode())
2946 mtext = _manifestrevision(repo, ctx.manifestnode())
2947 for pctx in ctx.parents():
2947 for pctx in ctx.parents():
2948 pman = _manifestrevision(repo, pctx.manifestnode())
2948 pman = _manifestrevision(repo, pctx.manifestnode())
2949 textpairs.append((pman, mtext))
2949 textpairs.append((pman, mtext))
2950
2950
2951 # Load filelog revisions by iterating manifest delta.
2951 # Load filelog revisions by iterating manifest delta.
2952 man = ctx.manifest()
2952 man = ctx.manifest()
2953 pman = ctx.p1().manifest()
2953 pman = ctx.p1().manifest()
2954 for filename, change in pman.diff(man).items():
2954 for filename, change in pman.diff(man).items():
2955 fctx = repo.file(filename)
2955 fctx = repo.file(filename)
2956 f1 = fctx.revision(change[0][0] or -1)
2956 f1 = fctx.revision(change[0][0] or -1)
2957 f2 = fctx.revision(change[1][0] or -1)
2957 f2 = fctx.revision(change[1][0] or -1)
2958 textpairs.append((f1, f2))
2958 textpairs.append((f1, f2))
2959 else:
2959 else:
2960 dp = r.deltaparent(rev)
2960 dp = r.deltaparent(rev)
2961 textpairs.append((r.revision(dp), r.revision(rev)))
2961 textpairs.append((r.revision(dp), r.revision(rev)))
2962
2962
2963 withthreads = threads > 0
2963 withthreads = threads > 0
2964 if not withthreads:
2964 if not withthreads:
2965
2965
2966 def d():
2966 def d():
2967 for pair in textpairs:
2967 for pair in textpairs:
2968 if xdiff:
2968 if xdiff:
2969 mdiff.bdiff.xdiffblocks(*pair)
2969 mdiff.bdiff.xdiffblocks(*pair)
2970 elif blocks:
2970 elif blocks:
2971 mdiff.bdiff.blocks(*pair)
2971 mdiff.bdiff.blocks(*pair)
2972 else:
2972 else:
2973 mdiff.textdiff(*pair)
2973 mdiff.textdiff(*pair)
2974
2974
2975 else:
2975 else:
2976 q = queue()
2976 q = queue()
2977 for i in _xrange(threads):
2977 for i in _xrange(threads):
2978 q.put(None)
2978 q.put(None)
2979 ready = threading.Condition()
2979 ready = threading.Condition()
2980 done = threading.Event()
2980 done = threading.Event()
2981 for i in _xrange(threads):
2981 for i in _xrange(threads):
2982 threading.Thread(
2982 threading.Thread(
2983 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2983 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2984 ).start()
2984 ).start()
2985 q.join()
2985 q.join()
2986
2986
2987 def d():
2987 def d():
2988 for pair in textpairs:
2988 for pair in textpairs:
2989 q.put(pair)
2989 q.put(pair)
2990 for i in _xrange(threads):
2990 for i in _xrange(threads):
2991 q.put(None)
2991 q.put(None)
2992 with ready:
2992 with ready:
2993 ready.notify_all()
2993 ready.notify_all()
2994 q.join()
2994 q.join()
2995
2995
2996 timer, fm = gettimer(ui, opts)
2996 timer, fm = gettimer(ui, opts)
2997 timer(d)
2997 timer(d)
2998 fm.end()
2998 fm.end()
2999
2999
3000 if withthreads:
3000 if withthreads:
3001 done.set()
3001 done.set()
3002 for i in _xrange(threads):
3002 for i in _xrange(threads):
3003 q.put(None)
3003 q.put(None)
3004 with ready:
3004 with ready:
3005 ready.notify_all()
3005 ready.notify_all()
3006
3006
3007
3007
3008 @command(
3008 @command(
3009 b'perf::unbundle',
3009 b'perf::unbundle',
3010 formatteropts,
3010 formatteropts,
3011 b'BUNDLE_FILE',
3011 b'BUNDLE_FILE',
3012 )
3012 )
3013 def perf_unbundle(ui, repo, fname, **opts):
3013 def perf_unbundle(ui, repo, fname, **opts):
3014 """benchmark application of a bundle in a repository.
3014 """benchmark application of a bundle in a repository.
3015
3015
3016 This does not include the final transaction processing"""
3016 This does not include the final transaction processing"""
3017
3017
3018 from mercurial import exchange
3018 from mercurial import exchange
3019 from mercurial import bundle2
3019 from mercurial import bundle2
3020 from mercurial import transaction
3020 from mercurial import transaction
3021
3021
3022 opts = _byteskwargs(opts)
3022 opts = _byteskwargs(opts)
3023
3023
3024 ### some compatibility hotfix
3024 ### some compatibility hotfix
3025 #
3025 #
3026 # the data attribute is dropped in 63edc384d3b7 a changeset introducing a
3026 # the data attribute is dropped in 63edc384d3b7 a changeset introducing a
3027 # critical regression that break transaction rollback for files that are
3027 # critical regression that break transaction rollback for files that are
3028 # de-inlined.
3028 # de-inlined.
3029 method = transaction.transaction._addentry
3029 method = transaction.transaction._addentry
3030 pre_63edc384d3b7 = "data" in getargspec(method).args
3030 pre_63edc384d3b7 = "data" in getargspec(method).args
3031 # the `detailed_exit_code` attribute is introduced in 33c0c25d0b0f
3031 # the `detailed_exit_code` attribute is introduced in 33c0c25d0b0f
3032 # a changeset that is a close descendant of 18415fc918a1, the changeset
3032 # a changeset that is a close descendant of 18415fc918a1, the changeset
3033 # that conclude the fix run for the bug introduced in 63edc384d3b7.
3033 # that conclude the fix run for the bug introduced in 63edc384d3b7.
3034 args = getargspec(error.Abort.__init__).args
3034 args = getargspec(error.Abort.__init__).args
3035 post_18415fc918a1 = "detailed_exit_code" in args
3035 post_18415fc918a1 = "detailed_exit_code" in args
3036
3036
3037 old_max_inline = None
3037 old_max_inline = None
3038 try:
3038 try:
3039 if not (pre_63edc384d3b7 or post_18415fc918a1):
3039 if not (pre_63edc384d3b7 or post_18415fc918a1):
3040 # disable inlining
3040 # disable inlining
3041 old_max_inline = mercurial.revlog._maxinline
3041 old_max_inline = mercurial.revlog._maxinline
3042 # large enough to never happen
3042 # large enough to never happen
3043 mercurial.revlog._maxinline = 2 ** 50
3043 mercurial.revlog._maxinline = 2 ** 50
3044
3044
3045 with repo.lock():
3045 with repo.lock():
3046 bundle = [None, None]
3046 bundle = [None, None]
3047 orig_quiet = repo.ui.quiet
3047 orig_quiet = repo.ui.quiet
3048 try:
3048 try:
3049 repo.ui.quiet = True
3049 repo.ui.quiet = True
3050 with open(fname, mode="rb") as f:
3050 with open(fname, mode="rb") as f:
3051
3051
3052 def noop_report(*args, **kwargs):
3052 def noop_report(*args, **kwargs):
3053 pass
3053 pass
3054
3054
3055 def setup():
3055 def setup():
3056 gen, tr = bundle
3056 gen, tr = bundle
3057 if tr is not None:
3057 if tr is not None:
3058 tr.abort()
3058 tr.abort()
3059 bundle[:] = [None, None]
3059 bundle[:] = [None, None]
3060 f.seek(0)
3060 f.seek(0)
3061 bundle[0] = exchange.readbundle(ui, f, fname)
3061 bundle[0] = exchange.readbundle(ui, f, fname)
3062 bundle[1] = repo.transaction(b'perf::unbundle')
3062 bundle[1] = repo.transaction(b'perf::unbundle')
3063 # silence the transaction
3063 # silence the transaction
3064 bundle[1]._report = noop_report
3064 bundle[1]._report = noop_report
3065
3065
3066 def apply():
3066 def apply():
3067 gen, tr = bundle
3067 gen, tr = bundle
3068 bundle2.applybundle(
3068 bundle2.applybundle(
3069 repo,
3069 repo,
3070 gen,
3070 gen,
3071 tr,
3071 tr,
3072 source=b'perf::unbundle',
3072 source=b'perf::unbundle',
3073 url=fname,
3073 url=fname,
3074 )
3074 )
3075
3075
3076 timer, fm = gettimer(ui, opts)
3076 timer, fm = gettimer(ui, opts)
3077 timer(apply, setup=setup)
3077 timer(apply, setup=setup)
3078 fm.end()
3078 fm.end()
3079 finally:
3079 finally:
3080 repo.ui.quiet == orig_quiet
3080 repo.ui.quiet == orig_quiet
3081 gen, tr = bundle
3081 gen, tr = bundle
3082 if tr is not None:
3082 if tr is not None:
3083 tr.abort()
3083 tr.abort()
3084 finally:
3084 finally:
3085 if old_max_inline is not None:
3085 if old_max_inline is not None:
3086 mercurial.revlog._maxinline = old_max_inline
3086 mercurial.revlog._maxinline = old_max_inline
3087
3087
3088
3088
3089 @command(
3089 @command(
3090 b'perf::unidiff|perfunidiff',
3090 b'perf::unidiff|perfunidiff',
3091 revlogopts
3091 revlogopts
3092 + formatteropts
3092 + formatteropts
3093 + [
3093 + [
3094 (
3094 (
3095 b'',
3095 b'',
3096 b'count',
3096 b'count',
3097 1,
3097 1,
3098 b'number of revisions to test (when using --startrev)',
3098 b'number of revisions to test (when using --startrev)',
3099 ),
3099 ),
3100 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
3100 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
3101 ],
3101 ],
3102 b'-c|-m|FILE REV',
3102 b'-c|-m|FILE REV',
3103 )
3103 )
3104 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
3104 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
3105 """benchmark a unified diff between revisions
3105 """benchmark a unified diff between revisions
3106
3106
3107 This doesn't include any copy tracing - it's just a unified diff
3107 This doesn't include any copy tracing - it's just a unified diff
3108 of the texts.
3108 of the texts.
3109
3109
3110 By default, benchmark a diff between its delta parent and itself.
3110 By default, benchmark a diff between its delta parent and itself.
3111
3111
3112 With ``--count``, benchmark diffs between delta parents and self for N
3112 With ``--count``, benchmark diffs between delta parents and self for N
3113 revisions starting at the specified revision.
3113 revisions starting at the specified revision.
3114
3114
3115 With ``--alldata``, assume the requested revision is a changeset and
3115 With ``--alldata``, assume the requested revision is a changeset and
3116 measure diffs for all changes related to that changeset (manifest
3116 measure diffs for all changes related to that changeset (manifest
3117 and filelogs).
3117 and filelogs).
3118 """
3118 """
3119 opts = _byteskwargs(opts)
3119 opts = _byteskwargs(opts)
3120 if opts[b'alldata']:
3120 if opts[b'alldata']:
3121 opts[b'changelog'] = True
3121 opts[b'changelog'] = True
3122
3122
3123 if opts.get(b'changelog') or opts.get(b'manifest'):
3123 if opts.get(b'changelog') or opts.get(b'manifest'):
3124 file_, rev = None, file_
3124 file_, rev = None, file_
3125 elif rev is None:
3125 elif rev is None:
3126 raise error.CommandError(b'perfunidiff', b'invalid arguments')
3126 raise error.CommandError(b'perfunidiff', b'invalid arguments')
3127
3127
3128 textpairs = []
3128 textpairs = []
3129
3129
3130 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
3130 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
3131
3131
3132 startrev = r.rev(r.lookup(rev))
3132 startrev = r.rev(r.lookup(rev))
3133 for rev in range(startrev, min(startrev + count, len(r) - 1)):
3133 for rev in range(startrev, min(startrev + count, len(r) - 1)):
3134 if opts[b'alldata']:
3134 if opts[b'alldata']:
3135 # Load revisions associated with changeset.
3135 # Load revisions associated with changeset.
3136 ctx = repo[rev]
3136 ctx = repo[rev]
3137 mtext = _manifestrevision(repo, ctx.manifestnode())
3137 mtext = _manifestrevision(repo, ctx.manifestnode())
3138 for pctx in ctx.parents():
3138 for pctx in ctx.parents():
3139 pman = _manifestrevision(repo, pctx.manifestnode())
3139 pman = _manifestrevision(repo, pctx.manifestnode())
3140 textpairs.append((pman, mtext))
3140 textpairs.append((pman, mtext))
3141
3141
3142 # Load filelog revisions by iterating manifest delta.
3142 # Load filelog revisions by iterating manifest delta.
3143 man = ctx.manifest()
3143 man = ctx.manifest()
3144 pman = ctx.p1().manifest()
3144 pman = ctx.p1().manifest()
3145 for filename, change in pman.diff(man).items():
3145 for filename, change in pman.diff(man).items():
3146 fctx = repo.file(filename)
3146 fctx = repo.file(filename)
3147 f1 = fctx.revision(change[0][0] or -1)
3147 f1 = fctx.revision(change[0][0] or -1)
3148 f2 = fctx.revision(change[1][0] or -1)
3148 f2 = fctx.revision(change[1][0] or -1)
3149 textpairs.append((f1, f2))
3149 textpairs.append((f1, f2))
3150 else:
3150 else:
3151 dp = r.deltaparent(rev)
3151 dp = r.deltaparent(rev)
3152 textpairs.append((r.revision(dp), r.revision(rev)))
3152 textpairs.append((r.revision(dp), r.revision(rev)))
3153
3153
3154 def d():
3154 def d():
3155 for left, right in textpairs:
3155 for left, right in textpairs:
3156 # The date strings don't matter, so we pass empty strings.
3156 # The date strings don't matter, so we pass empty strings.
3157 headerlines, hunks = mdiff.unidiff(
3157 headerlines, hunks = mdiff.unidiff(
3158 left, b'', right, b'', b'left', b'right', binary=False
3158 left, b'', right, b'', b'left', b'right', binary=False
3159 )
3159 )
3160 # consume iterators in roughly the way patch.py does
3160 # consume iterators in roughly the way patch.py does
3161 b'\n'.join(headerlines)
3161 b'\n'.join(headerlines)
3162 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
3162 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
3163
3163
3164 timer, fm = gettimer(ui, opts)
3164 timer, fm = gettimer(ui, opts)
3165 timer(d)
3165 timer(d)
3166 fm.end()
3166 fm.end()
3167
3167
3168
3168
3169 @command(b'perf::diffwd|perfdiffwd', formatteropts)
3169 @command(b'perf::diffwd|perfdiffwd', formatteropts)
3170 def perfdiffwd(ui, repo, **opts):
3170 def perfdiffwd(ui, repo, **opts):
3171 """Profile diff of working directory changes"""
3171 """Profile diff of working directory changes"""
3172 opts = _byteskwargs(opts)
3172 opts = _byteskwargs(opts)
3173 timer, fm = gettimer(ui, opts)
3173 timer, fm = gettimer(ui, opts)
3174 options = {
3174 options = {
3175 'w': 'ignore_all_space',
3175 'w': 'ignore_all_space',
3176 'b': 'ignore_space_change',
3176 'b': 'ignore_space_change',
3177 'B': 'ignore_blank_lines',
3177 'B': 'ignore_blank_lines',
3178 }
3178 }
3179
3179
3180 for diffopt in ('', 'w', 'b', 'B', 'wB'):
3180 for diffopt in ('', 'w', 'b', 'B', 'wB'):
3181 opts = {options[c]: b'1' for c in diffopt}
3181 opts = {options[c]: b'1' for c in diffopt}
3182
3182
3183 def d():
3183 def d():
3184 ui.pushbuffer()
3184 ui.pushbuffer()
3185 commands.diff(ui, repo, **opts)
3185 commands.diff(ui, repo, **opts)
3186 ui.popbuffer()
3186 ui.popbuffer()
3187
3187
3188 diffopt = diffopt.encode('ascii')
3188 diffopt = diffopt.encode('ascii')
3189 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
3189 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
3190 timer(d, title=title)
3190 timer(d, title=title)
3191 fm.end()
3191 fm.end()
3192
3192
3193
3193
3194 @command(
3194 @command(
3195 b'perf::revlogindex|perfrevlogindex',
3195 b'perf::revlogindex|perfrevlogindex',
3196 revlogopts + formatteropts,
3196 revlogopts + formatteropts,
3197 b'-c|-m|FILE',
3197 b'-c|-m|FILE',
3198 )
3198 )
3199 def perfrevlogindex(ui, repo, file_=None, **opts):
3199 def perfrevlogindex(ui, repo, file_=None, **opts):
3200 """Benchmark operations against a revlog index.
3200 """Benchmark operations against a revlog index.
3201
3201
3202 This tests constructing a revlog instance, reading index data,
3202 This tests constructing a revlog instance, reading index data,
3203 parsing index data, and performing various operations related to
3203 parsing index data, and performing various operations related to
3204 index data.
3204 index data.
3205 """
3205 """
3206
3206
3207 opts = _byteskwargs(opts)
3207 opts = _byteskwargs(opts)
3208
3208
3209 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
3209 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
3210
3210
3211 opener = getattr(rl, 'opener') # trick linter
3211 opener = getattr(rl, 'opener') # trick linter
3212 # compat with hg <= 5.8
3212 # compat with hg <= 5.8
3213 radix = getattr(rl, 'radix', None)
3213 radix = getattr(rl, 'radix', None)
3214 indexfile = getattr(rl, '_indexfile', None)
3214 indexfile = getattr(rl, '_indexfile', None)
3215 if indexfile is None:
3215 if indexfile is None:
3216 # compatibility with <= hg-5.8
3216 # compatibility with <= hg-5.8
3217 indexfile = getattr(rl, 'indexfile')
3217 indexfile = getattr(rl, 'indexfile')
3218 data = opener.read(indexfile)
3218 data = opener.read(indexfile)
3219
3219
3220 header = struct.unpack(b'>I', data[0:4])[0]
3220 header = struct.unpack(b'>I', data[0:4])[0]
3221 version = header & 0xFFFF
3221 version = header & 0xFFFF
3222 if version == 1:
3222 if version == 1:
3223 inline = header & (1 << 16)
3223 inline = header & (1 << 16)
3224 else:
3224 else:
3225 raise error.Abort(b'unsupported revlog version: %d' % version)
3225 raise error.Abort(b'unsupported revlog version: %d' % version)
3226
3226
3227 parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
3227 parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
3228 if parse_index_v1 is None:
3228 if parse_index_v1 is None:
3229 parse_index_v1 = mercurial.revlog.revlogio().parseindex
3229 parse_index_v1 = mercurial.revlog.revlogio().parseindex
3230
3230
3231 rllen = len(rl)
3231 rllen = len(rl)
3232
3232
3233 node0 = rl.node(0)
3233 node0 = rl.node(0)
3234 node25 = rl.node(rllen // 4)
3234 node25 = rl.node(rllen // 4)
3235 node50 = rl.node(rllen // 2)
3235 node50 = rl.node(rllen // 2)
3236 node75 = rl.node(rllen // 4 * 3)
3236 node75 = rl.node(rllen // 4 * 3)
3237 node100 = rl.node(rllen - 1)
3237 node100 = rl.node(rllen - 1)
3238
3238
3239 allrevs = range(rllen)
3239 allrevs = range(rllen)
3240 allrevsrev = list(reversed(allrevs))
3240 allrevsrev = list(reversed(allrevs))
3241 allnodes = [rl.node(rev) for rev in range(rllen)]
3241 allnodes = [rl.node(rev) for rev in range(rllen)]
3242 allnodesrev = list(reversed(allnodes))
3242 allnodesrev = list(reversed(allnodes))
3243
3243
3244 def constructor():
3244 def constructor():
3245 if radix is not None:
3245 if radix is not None:
3246 revlog(opener, radix=radix)
3246 revlog(opener, radix=radix)
3247 else:
3247 else:
3248 # hg <= 5.8
3248 # hg <= 5.8
3249 revlog(opener, indexfile=indexfile)
3249 revlog(opener, indexfile=indexfile)
3250
3250
3251 def read():
3251 def read():
3252 with opener(indexfile) as fh:
3252 with opener(indexfile) as fh:
3253 fh.read()
3253 fh.read()
3254
3254
3255 def parseindex():
3255 def parseindex():
3256 parse_index_v1(data, inline)
3256 parse_index_v1(data, inline)
3257
3257
3258 def getentry(revornode):
3258 def getentry(revornode):
3259 index = parse_index_v1(data, inline)[0]
3259 index = parse_index_v1(data, inline)[0]
3260 index[revornode]
3260 index[revornode]
3261
3261
3262 def getentries(revs, count=1):
3262 def getentries(revs, count=1):
3263 index = parse_index_v1(data, inline)[0]
3263 index = parse_index_v1(data, inline)[0]
3264
3264
3265 for i in range(count):
3265 for i in range(count):
3266 for rev in revs:
3266 for rev in revs:
3267 index[rev]
3267 index[rev]
3268
3268
3269 def resolvenode(node):
3269 def resolvenode(node):
3270 index = parse_index_v1(data, inline)[0]
3270 index = parse_index_v1(data, inline)[0]
3271 rev = getattr(index, 'rev', None)
3271 rev = getattr(index, 'rev', None)
3272 if rev is None:
3272 if rev is None:
3273 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3273 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3274 # This only works for the C code.
3274 # This only works for the C code.
3275 if nodemap is None:
3275 if nodemap is None:
3276 return
3276 return
3277 rev = nodemap.__getitem__
3277 rev = nodemap.__getitem__
3278
3278
3279 try:
3279 try:
3280 rev(node)
3280 rev(node)
3281 except error.RevlogError:
3281 except error.RevlogError:
3282 pass
3282 pass
3283
3283
3284 def resolvenodes(nodes, count=1):
3284 def resolvenodes(nodes, count=1):
3285 index = parse_index_v1(data, inline)[0]
3285 index = parse_index_v1(data, inline)[0]
3286 rev = getattr(index, 'rev', None)
3286 rev = getattr(index, 'rev', None)
3287 if rev is None:
3287 if rev is None:
3288 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3288 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3289 # This only works for the C code.
3289 # This only works for the C code.
3290 if nodemap is None:
3290 if nodemap is None:
3291 return
3291 return
3292 rev = nodemap.__getitem__
3292 rev = nodemap.__getitem__
3293
3293
3294 for i in range(count):
3294 for i in range(count):
3295 for node in nodes:
3295 for node in nodes:
3296 try:
3296 try:
3297 rev(node)
3297 rev(node)
3298 except error.RevlogError:
3298 except error.RevlogError:
3299 pass
3299 pass
3300
3300
3301 benches = [
3301 benches = [
3302 (constructor, b'revlog constructor'),
3302 (constructor, b'revlog constructor'),
3303 (read, b'read'),
3303 (read, b'read'),
3304 (parseindex, b'create index object'),
3304 (parseindex, b'create index object'),
3305 (lambda: getentry(0), b'retrieve index entry for rev 0'),
3305 (lambda: getentry(0), b'retrieve index entry for rev 0'),
3306 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
3306 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
3307 (lambda: resolvenode(node0), b'look up node at rev 0'),
3307 (lambda: resolvenode(node0), b'look up node at rev 0'),
3308 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
3308 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
3309 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
3309 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
3310 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
3310 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
3311 (lambda: resolvenode(node100), b'look up node at tip'),
3311 (lambda: resolvenode(node100), b'look up node at tip'),
3312 # 2x variation is to measure caching impact.
3312 # 2x variation is to measure caching impact.
3313 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
3313 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
3314 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
3314 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
3315 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
3315 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
3316 (
3316 (
3317 lambda: resolvenodes(allnodesrev, 2),
3317 lambda: resolvenodes(allnodesrev, 2),
3318 b'look up all nodes 2x (reverse)',
3318 b'look up all nodes 2x (reverse)',
3319 ),
3319 ),
3320 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
3320 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
3321 (
3321 (
3322 lambda: getentries(allrevs, 2),
3322 lambda: getentries(allrevs, 2),
3323 b'retrieve all index entries 2x (forward)',
3323 b'retrieve all index entries 2x (forward)',
3324 ),
3324 ),
3325 (
3325 (
3326 lambda: getentries(allrevsrev),
3326 lambda: getentries(allrevsrev),
3327 b'retrieve all index entries (reverse)',
3327 b'retrieve all index entries (reverse)',
3328 ),
3328 ),
3329 (
3329 (
3330 lambda: getentries(allrevsrev, 2),
3330 lambda: getentries(allrevsrev, 2),
3331 b'retrieve all index entries 2x (reverse)',
3331 b'retrieve all index entries 2x (reverse)',
3332 ),
3332 ),
3333 ]
3333 ]
3334
3334
3335 for fn, title in benches:
3335 for fn, title in benches:
3336 timer, fm = gettimer(ui, opts)
3336 timer, fm = gettimer(ui, opts)
3337 timer(fn, title=title)
3337 timer(fn, title=title)
3338 fm.end()
3338 fm.end()
3339
3339
3340
3340
3341 @command(
3341 @command(
3342 b'perf::revlogrevisions|perfrevlogrevisions',
3342 b'perf::revlogrevisions|perfrevlogrevisions',
3343 revlogopts
3343 revlogopts
3344 + formatteropts
3344 + formatteropts
3345 + [
3345 + [
3346 (b'd', b'dist', 100, b'distance between the revisions'),
3346 (b'd', b'dist', 100, b'distance between the revisions'),
3347 (b's', b'startrev', 0, b'revision to start reading at'),
3347 (b's', b'startrev', 0, b'revision to start reading at'),
3348 (b'', b'reverse', False, b'read in reverse'),
3348 (b'', b'reverse', False, b'read in reverse'),
3349 ],
3349 ],
3350 b'-c|-m|FILE',
3350 b'-c|-m|FILE',
3351 )
3351 )
3352 def perfrevlogrevisions(
3352 def perfrevlogrevisions(
3353 ui, repo, file_=None, startrev=0, reverse=False, **opts
3353 ui, repo, file_=None, startrev=0, reverse=False, **opts
3354 ):
3354 ):
3355 """Benchmark reading a series of revisions from a revlog.
3355 """Benchmark reading a series of revisions from a revlog.
3356
3356
3357 By default, we read every ``-d/--dist`` revision from 0 to tip of
3357 By default, we read every ``-d/--dist`` revision from 0 to tip of
3358 the specified revlog.
3358 the specified revlog.
3359
3359
3360 The start revision can be defined via ``-s/--startrev``.
3360 The start revision can be defined via ``-s/--startrev``.
3361 """
3361 """
3362 opts = _byteskwargs(opts)
3362 opts = _byteskwargs(opts)
3363
3363
3364 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
3364 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
3365 rllen = getlen(ui)(rl)
3365 rllen = getlen(ui)(rl)
3366
3366
3367 if startrev < 0:
3367 if startrev < 0:
3368 startrev = rllen + startrev
3368 startrev = rllen + startrev
3369
3369
3370 def d():
3370 def d():
3371 rl.clearcaches()
3371 rl.clearcaches()
3372
3372
3373 beginrev = startrev
3373 beginrev = startrev
3374 endrev = rllen
3374 endrev = rllen
3375 dist = opts[b'dist']
3375 dist = opts[b'dist']
3376
3376
3377 if reverse:
3377 if reverse:
3378 beginrev, endrev = endrev - 1, beginrev - 1
3378 beginrev, endrev = endrev - 1, beginrev - 1
3379 dist = -1 * dist
3379 dist = -1 * dist
3380
3380
3381 for x in _xrange(beginrev, endrev, dist):
3381 for x in _xrange(beginrev, endrev, dist):
3382 # Old revisions don't support passing int.
3382 # Old revisions don't support passing int.
3383 n = rl.node(x)
3383 n = rl.node(x)
3384 rl.revision(n)
3384 rl.revision(n)
3385
3385
3386 timer, fm = gettimer(ui, opts)
3386 timer, fm = gettimer(ui, opts)
3387 timer(d)
3387 timer(d)
3388 fm.end()
3388 fm.end()
3389
3389
3390
3390
3391 @command(
3391 @command(
3392 b'perf::revlogwrite|perfrevlogwrite',
3392 b'perf::revlogwrite|perfrevlogwrite',
3393 revlogopts
3393 revlogopts
3394 + formatteropts
3394 + formatteropts
3395 + [
3395 + [
3396 (b's', b'startrev', 1000, b'revision to start writing at'),
3396 (b's', b'startrev', 1000, b'revision to start writing at'),
3397 (b'', b'stoprev', -1, b'last revision to write'),
3397 (b'', b'stoprev', -1, b'last revision to write'),
3398 (b'', b'count', 3, b'number of passes to perform'),
3398 (b'', b'count', 3, b'number of passes to perform'),
3399 (b'', b'details', False, b'print timing for every revisions tested'),
3399 (b'', b'details', False, b'print timing for every revisions tested'),
3400 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
3400 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
3401 (b'', b'lazydeltabase', True, b'try the provided delta first'),
3401 (b'', b'lazydeltabase', True, b'try the provided delta first'),
3402 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
3402 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
3403 ],
3403 ],
3404 b'-c|-m|FILE',
3404 b'-c|-m|FILE',
3405 )
3405 )
3406 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
3406 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
3407 """Benchmark writing a series of revisions to a revlog.
3407 """Benchmark writing a series of revisions to a revlog.
3408
3408
3409 Possible source values are:
3409 Possible source values are:
3410 * `full`: add from a full text (default).
3410 * `full`: add from a full text (default).
3411 * `parent-1`: add from a delta to the first parent
3411 * `parent-1`: add from a delta to the first parent
3412 * `parent-2`: add from a delta to the second parent if it exists
3412 * `parent-2`: add from a delta to the second parent if it exists
3413 (use a delta from the first parent otherwise)
3413 (use a delta from the first parent otherwise)
3414 * `parent-smallest`: add from the smallest delta (either p1 or p2)
3414 * `parent-smallest`: add from the smallest delta (either p1 or p2)
3415 * `storage`: add from the existing precomputed deltas
3415 * `storage`: add from the existing precomputed deltas
3416
3416
3417 Note: This performance command measures performance in a custom way. As a
3417 Note: This performance command measures performance in a custom way. As a
3418 result some of the global configuration of the 'perf' command does not
3418 result some of the global configuration of the 'perf' command does not
3419 apply to it:
3419 apply to it:
3420
3420
3421 * ``pre-run``: disabled
3421 * ``pre-run``: disabled
3422
3422
3423 * ``profile-benchmark``: disabled
3423 * ``profile-benchmark``: disabled
3424
3424
3425 * ``run-limits``: disabled use --count instead
3425 * ``run-limits``: disabled use --count instead
3426 """
3426 """
3427 opts = _byteskwargs(opts)
3427 opts = _byteskwargs(opts)
3428
3428
3429 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
3429 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
3430 rllen = getlen(ui)(rl)
3430 rllen = getlen(ui)(rl)
3431 if startrev < 0:
3431 if startrev < 0:
3432 startrev = rllen + startrev
3432 startrev = rllen + startrev
3433 if stoprev < 0:
3433 if stoprev < 0:
3434 stoprev = rllen + stoprev
3434 stoprev = rllen + stoprev
3435
3435
3436 lazydeltabase = opts['lazydeltabase']
3436 lazydeltabase = opts['lazydeltabase']
3437 source = opts['source']
3437 source = opts['source']
3438 clearcaches = opts['clear_caches']
3438 clearcaches = opts['clear_caches']
3439 validsource = (
3439 validsource = (
3440 b'full',
3440 b'full',
3441 b'parent-1',
3441 b'parent-1',
3442 b'parent-2',
3442 b'parent-2',
3443 b'parent-smallest',
3443 b'parent-smallest',
3444 b'storage',
3444 b'storage',
3445 )
3445 )
3446 if source not in validsource:
3446 if source not in validsource:
3447 raise error.Abort('invalid source type: %s' % source)
3447 raise error.Abort('invalid source type: %s' % source)
3448
3448
3449 ### actually gather results
3449 ### actually gather results
3450 count = opts['count']
3450 count = opts['count']
3451 if count <= 0:
3451 if count <= 0:
3452 raise error.Abort('invalide run count: %d' % count)
3452 raise error.Abort('invalide run count: %d' % count)
3453 allresults = []
3453 allresults = []
3454 for c in range(count):
3454 for c in range(count):
3455 timing = _timeonewrite(
3455 timing = _timeonewrite(
3456 ui,
3456 ui,
3457 rl,
3457 rl,
3458 source,
3458 source,
3459 startrev,
3459 startrev,
3460 stoprev,
3460 stoprev,
3461 c + 1,
3461 c + 1,
3462 lazydeltabase=lazydeltabase,
3462 lazydeltabase=lazydeltabase,
3463 clearcaches=clearcaches,
3463 clearcaches=clearcaches,
3464 )
3464 )
3465 allresults.append(timing)
3465 allresults.append(timing)
3466
3466
3467 ### consolidate the results in a single list
3467 ### consolidate the results in a single list
3468 results = []
3468 results = []
3469 for idx, (rev, t) in enumerate(allresults[0]):
3469 for idx, (rev, t) in enumerate(allresults[0]):
3470 ts = [t]
3470 ts = [t]
3471 for other in allresults[1:]:
3471 for other in allresults[1:]:
3472 orev, ot = other[idx]
3472 orev, ot = other[idx]
3473 assert orev == rev
3473 assert orev == rev
3474 ts.append(ot)
3474 ts.append(ot)
3475 results.append((rev, ts))
3475 results.append((rev, ts))
3476 resultcount = len(results)
3476 resultcount = len(results)
3477
3477
3478 ### Compute and display relevant statistics
3478 ### Compute and display relevant statistics
3479
3479
3480 # get a formatter
3480 # get a formatter
3481 fm = ui.formatter(b'perf', opts)
3481 fm = ui.formatter(b'perf', opts)
3482 displayall = ui.configbool(b"perf", b"all-timing", True)
3482 displayall = ui.configbool(b"perf", b"all-timing", True)
3483
3483
3484 # print individual details if requested
3484 # print individual details if requested
3485 if opts['details']:
3485 if opts['details']:
3486 for idx, item in enumerate(results, 1):
3486 for idx, item in enumerate(results, 1):
3487 rev, data = item
3487 rev, data = item
3488 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
3488 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
3489 formatone(fm, data, title=title, displayall=displayall)
3489 formatone(fm, data, title=title, displayall=displayall)
3490
3490
3491 # sorts results by median time
3491 # sorts results by median time
3492 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
3492 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
3493 # list of (name, index) to display)
3493 # list of (name, index) to display)
3494 relevants = [
3494 relevants = [
3495 ("min", 0),
3495 ("min", 0),
3496 ("10%", resultcount * 10 // 100),
3496 ("10%", resultcount * 10 // 100),
3497 ("25%", resultcount * 25 // 100),
3497 ("25%", resultcount * 25 // 100),
3498 ("50%", resultcount * 70 // 100),
3498 ("50%", resultcount * 70 // 100),
3499 ("75%", resultcount * 75 // 100),
3499 ("75%", resultcount * 75 // 100),
3500 ("90%", resultcount * 90 // 100),
3500 ("90%", resultcount * 90 // 100),
3501 ("95%", resultcount * 95 // 100),
3501 ("95%", resultcount * 95 // 100),
3502 ("99%", resultcount * 99 // 100),
3502 ("99%", resultcount * 99 // 100),
3503 ("99.9%", resultcount * 999 // 1000),
3503 ("99.9%", resultcount * 999 // 1000),
3504 ("99.99%", resultcount * 9999 // 10000),
3504 ("99.99%", resultcount * 9999 // 10000),
3505 ("99.999%", resultcount * 99999 // 100000),
3505 ("99.999%", resultcount * 99999 // 100000),
3506 ("max", -1),
3506 ("max", -1),
3507 ]
3507 ]
3508 if not ui.quiet:
3508 if not ui.quiet:
3509 for name, idx in relevants:
3509 for name, idx in relevants:
3510 data = results[idx]
3510 data = results[idx]
3511 title = '%s of %d, rev %d' % (name, resultcount, data[0])
3511 title = '%s of %d, rev %d' % (name, resultcount, data[0])
3512 formatone(fm, data[1], title=title, displayall=displayall)
3512 formatone(fm, data[1], title=title, displayall=displayall)
3513
3513
3514 # XXX summing that many float will not be very precise, we ignore this fact
3514 # XXX summing that many float will not be very precise, we ignore this fact
3515 # for now
3515 # for now
3516 totaltime = []
3516 totaltime = []
3517 for item in allresults:
3517 for item in allresults:
3518 totaltime.append(
3518 totaltime.append(
3519 (
3519 (
3520 sum(x[1][0] for x in item),
3520 sum(x[1][0] for x in item),
3521 sum(x[1][1] for x in item),
3521 sum(x[1][1] for x in item),
3522 sum(x[1][2] for x in item),
3522 sum(x[1][2] for x in item),
3523 )
3523 )
3524 )
3524 )
3525 formatone(
3525 formatone(
3526 fm,
3526 fm,
3527 totaltime,
3527 totaltime,
3528 title="total time (%d revs)" % resultcount,
3528 title="total time (%d revs)" % resultcount,
3529 displayall=displayall,
3529 displayall=displayall,
3530 )
3530 )
3531 fm.end()
3531 fm.end()
3532
3532
3533
3533
3534 class _faketr:
3534 class _faketr:
3535 def add(s, x, y, z=None):
3535 def add(s, x, y, z=None):
3536 return None
3536 return None
3537
3537
3538
3538
3539 def _timeonewrite(
3539 def _timeonewrite(
3540 ui,
3540 ui,
3541 orig,
3541 orig,
3542 source,
3542 source,
3543 startrev,
3543 startrev,
3544 stoprev,
3544 stoprev,
3545 runidx=None,
3545 runidx=None,
3546 lazydeltabase=True,
3546 lazydeltabase=True,
3547 clearcaches=True,
3547 clearcaches=True,
3548 ):
3548 ):
3549 timings = []
3549 timings = []
3550 tr = _faketr()
3550 tr = _faketr()
3551 with _temprevlog(ui, orig, startrev) as dest:
3551 with _temprevlog(ui, orig, startrev) as dest:
3552 if hasattr(dest, "delta_config"):
3552 if hasattr(dest, "delta_config"):
3553 dest.delta_config.lazy_delta_base = lazydeltabase
3553 dest.delta_config.lazy_delta_base = lazydeltabase
3554 else:
3554 else:
3555 dest._lazydeltabase = lazydeltabase
3555 dest._lazydeltabase = lazydeltabase
3556 revs = list(orig.revs(startrev, stoprev))
3556 revs = list(orig.revs(startrev, stoprev))
3557 total = len(revs)
3557 total = len(revs)
3558 topic = 'adding'
3558 topic = 'adding'
3559 if runidx is not None:
3559 if runidx is not None:
3560 topic += ' (run #%d)' % runidx
3560 topic += ' (run #%d)' % runidx
3561 # Support both old and new progress API
3561 # Support both old and new progress API
3562 if util.safehasattr(ui, 'makeprogress'):
3562 if util.safehasattr(ui, 'makeprogress'):
3563 progress = ui.makeprogress(topic, unit='revs', total=total)
3563 progress = ui.makeprogress(topic, unit='revs', total=total)
3564
3564
3565 def updateprogress(pos):
3565 def updateprogress(pos):
3566 progress.update(pos)
3566 progress.update(pos)
3567
3567
3568 def completeprogress():
3568 def completeprogress():
3569 progress.complete()
3569 progress.complete()
3570
3570
3571 else:
3571 else:
3572
3572
3573 def updateprogress(pos):
3573 def updateprogress(pos):
3574 ui.progress(topic, pos, unit='revs', total=total)
3574 ui.progress(topic, pos, unit='revs', total=total)
3575
3575
3576 def completeprogress():
3576 def completeprogress():
3577 ui.progress(topic, None, unit='revs', total=total)
3577 ui.progress(topic, None, unit='revs', total=total)
3578
3578
3579 for idx, rev in enumerate(revs):
3579 for idx, rev in enumerate(revs):
3580 updateprogress(idx)
3580 updateprogress(idx)
3581 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
3581 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
3582 if clearcaches:
3582 if clearcaches:
3583 dest.index.clearcaches()
3583 dest.index.clearcaches()
3584 dest.clearcaches()
3584 dest.clearcaches()
3585 with timeone() as r:
3585 with timeone() as r:
3586 dest.addrawrevision(*addargs, **addkwargs)
3586 dest.addrawrevision(*addargs, **addkwargs)
3587 timings.append((rev, r[0]))
3587 timings.append((rev, r[0]))
3588 updateprogress(total)
3588 updateprogress(total)
3589 completeprogress()
3589 completeprogress()
3590 return timings
3590 return timings
3591
3591
3592
3592
3593 def _getrevisionseed(orig, rev, tr, source):
3593 def _getrevisionseed(orig, rev, tr, source):
3594 from mercurial.node import nullid
3594 from mercurial.node import nullid
3595
3595
3596 linkrev = orig.linkrev(rev)
3596 linkrev = orig.linkrev(rev)
3597 node = orig.node(rev)
3597 node = orig.node(rev)
3598 p1, p2 = orig.parents(node)
3598 p1, p2 = orig.parents(node)
3599 flags = orig.flags(rev)
3599 flags = orig.flags(rev)
3600 cachedelta = None
3600 cachedelta = None
3601 text = None
3601 text = None
3602
3602
3603 if source == b'full':
3603 if source == b'full':
3604 text = orig.revision(rev)
3604 text = orig.revision(rev)
3605 elif source == b'parent-1':
3605 elif source == b'parent-1':
3606 baserev = orig.rev(p1)
3606 baserev = orig.rev(p1)
3607 cachedelta = (baserev, orig.revdiff(p1, rev))
3607 cachedelta = (baserev, orig.revdiff(p1, rev))
3608 elif source == b'parent-2':
3608 elif source == b'parent-2':
3609 parent = p2
3609 parent = p2
3610 if p2 == nullid:
3610 if p2 == nullid:
3611 parent = p1
3611 parent = p1
3612 baserev = orig.rev(parent)
3612 baserev = orig.rev(parent)
3613 cachedelta = (baserev, orig.revdiff(parent, rev))
3613 cachedelta = (baserev, orig.revdiff(parent, rev))
3614 elif source == b'parent-smallest':
3614 elif source == b'parent-smallest':
3615 p1diff = orig.revdiff(p1, rev)
3615 p1diff = orig.revdiff(p1, rev)
3616 parent = p1
3616 parent = p1
3617 diff = p1diff
3617 diff = p1diff
3618 if p2 != nullid:
3618 if p2 != nullid:
3619 p2diff = orig.revdiff(p2, rev)
3619 p2diff = orig.revdiff(p2, rev)
3620 if len(p1diff) > len(p2diff):
3620 if len(p1diff) > len(p2diff):
3621 parent = p2
3621 parent = p2
3622 diff = p2diff
3622 diff = p2diff
3623 baserev = orig.rev(parent)
3623 baserev = orig.rev(parent)
3624 cachedelta = (baserev, diff)
3624 cachedelta = (baserev, diff)
3625 elif source == b'storage':
3625 elif source == b'storage':
3626 baserev = orig.deltaparent(rev)
3626 baserev = orig.deltaparent(rev)
3627 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3627 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3628
3628
3629 return (
3629 return (
3630 (text, tr, linkrev, p1, p2),
3630 (text, tr, linkrev, p1, p2),
3631 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3631 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3632 )
3632 )
3633
3633
3634
3634
3635 @contextlib.contextmanager
3635 @contextlib.contextmanager
3636 def _temprevlog(ui, orig, truncaterev):
3636 def _temprevlog(ui, orig, truncaterev):
3637 from mercurial import vfs as vfsmod
3637 from mercurial import vfs as vfsmod
3638
3638
3639 if orig._inline:
3639 if orig._inline:
3640 raise error.Abort('not supporting inline revlog (yet)')
3640 raise error.Abort('not supporting inline revlog (yet)')
3641 revlogkwargs = {}
3641 revlogkwargs = {}
3642 k = 'upperboundcomp'
3642 k = 'upperboundcomp'
3643 if util.safehasattr(orig, k):
3643 if util.safehasattr(orig, k):
3644 revlogkwargs[k] = getattr(orig, k)
3644 revlogkwargs[k] = getattr(orig, k)
3645
3645
3646 indexfile = getattr(orig, '_indexfile', None)
3646 indexfile = getattr(orig, '_indexfile', None)
3647 if indexfile is None:
3647 if indexfile is None:
3648 # compatibility with <= hg-5.8
3648 # compatibility with <= hg-5.8
3649 indexfile = getattr(orig, 'indexfile')
3649 indexfile = getattr(orig, 'indexfile')
3650 origindexpath = orig.opener.join(indexfile)
3650 origindexpath = orig.opener.join(indexfile)
3651
3651
3652 datafile = getattr(orig, '_datafile', getattr(orig, 'datafile'))
3652 datafile = getattr(orig, '_datafile', getattr(orig, 'datafile'))
3653 origdatapath = orig.opener.join(datafile)
3653 origdatapath = orig.opener.join(datafile)
3654 radix = b'revlog'
3654 radix = b'revlog'
3655 indexname = b'revlog.i'
3655 indexname = b'revlog.i'
3656 dataname = b'revlog.d'
3656 dataname = b'revlog.d'
3657
3657
3658 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3658 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3659 try:
3659 try:
3660 # copy the data file in a temporary directory
3660 # copy the data file in a temporary directory
3661 ui.debug('copying data in %s\n' % tmpdir)
3661 ui.debug('copying data in %s\n' % tmpdir)
3662 destindexpath = os.path.join(tmpdir, 'revlog.i')
3662 destindexpath = os.path.join(tmpdir, 'revlog.i')
3663 destdatapath = os.path.join(tmpdir, 'revlog.d')
3663 destdatapath = os.path.join(tmpdir, 'revlog.d')
3664 shutil.copyfile(origindexpath, destindexpath)
3664 shutil.copyfile(origindexpath, destindexpath)
3665 shutil.copyfile(origdatapath, destdatapath)
3665 shutil.copyfile(origdatapath, destdatapath)
3666
3666
3667 # remove the data we want to add again
3667 # remove the data we want to add again
3668 ui.debug('truncating data to be rewritten\n')
3668 ui.debug('truncating data to be rewritten\n')
3669 with open(destindexpath, 'ab') as index:
3669 with open(destindexpath, 'ab') as index:
3670 index.seek(0)
3670 index.seek(0)
3671 index.truncate(truncaterev * orig._io.size)
3671 index.truncate(truncaterev * orig._io.size)
3672 with open(destdatapath, 'ab') as data:
3672 with open(destdatapath, 'ab') as data:
3673 data.seek(0)
3673 data.seek(0)
3674 data.truncate(orig.start(truncaterev))
3674 data.truncate(orig.start(truncaterev))
3675
3675
3676 # instantiate a new revlog from the temporary copy
3676 # instantiate a new revlog from the temporary copy
3677 ui.debug('truncating adding to be rewritten\n')
3677 ui.debug('truncating adding to be rewritten\n')
3678 vfs = vfsmod.vfs(tmpdir)
3678 vfs = vfsmod.vfs(tmpdir)
3679 vfs.options = getattr(orig.opener, 'options', None)
3679 vfs.options = getattr(orig.opener, 'options', None)
3680
3680
3681 try:
3681 try:
3682 dest = revlog(vfs, radix=radix, **revlogkwargs)
3682 dest = revlog(vfs, radix=radix, **revlogkwargs)
3683 except TypeError:
3683 except TypeError:
3684 dest = revlog(
3684 dest = revlog(
3685 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3685 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3686 )
3686 )
3687 if dest._inline:
3687 if dest._inline:
3688 raise error.Abort('not supporting inline revlog (yet)')
3688 raise error.Abort('not supporting inline revlog (yet)')
3689 # make sure internals are initialized
3689 # make sure internals are initialized
3690 dest.revision(len(dest) - 1)
3690 dest.revision(len(dest) - 1)
3691 yield dest
3691 yield dest
3692 del dest, vfs
3692 del dest, vfs
3693 finally:
3693 finally:
3694 shutil.rmtree(tmpdir, True)
3694 shutil.rmtree(tmpdir, True)
3695
3695
3696
3696
3697 @command(
3697 @command(
3698 b'perf::revlogchunks|perfrevlogchunks',
3698 b'perf::revlogchunks|perfrevlogchunks',
3699 revlogopts
3699 revlogopts
3700 + formatteropts
3700 + formatteropts
3701 + [
3701 + [
3702 (b'e', b'engines', b'', b'compression engines to use'),
3702 (b'e', b'engines', b'', b'compression engines to use'),
3703 (b's', b'startrev', 0, b'revision to start at'),
3703 (b's', b'startrev', 0, b'revision to start at'),
3704 ],
3704 ],
3705 b'-c|-m|FILE',
3705 b'-c|-m|FILE',
3706 )
3706 )
3707 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3707 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3708 """Benchmark operations on revlog chunks.
3708 """Benchmark operations on revlog chunks.
3709
3709
3710 Logically, each revlog is a collection of fulltext revisions. However,
3710 Logically, each revlog is a collection of fulltext revisions. However,
3711 stored within each revlog are "chunks" of possibly compressed data. This
3711 stored within each revlog are "chunks" of possibly compressed data. This
3712 data needs to be read and decompressed or compressed and written.
3712 data needs to be read and decompressed or compressed and written.
3713
3713
3714 This command measures the time it takes to read+decompress and recompress
3714 This command measures the time it takes to read+decompress and recompress
3715 chunks in a revlog. It effectively isolates I/O and compression performance.
3715 chunks in a revlog. It effectively isolates I/O and compression performance.
3716 For measurements of higher-level operations like resolving revisions,
3716 For measurements of higher-level operations like resolving revisions,
3717 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3717 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3718 """
3718 """
3719 opts = _byteskwargs(opts)
3719 opts = _byteskwargs(opts)
3720
3720
3721 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3721 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3722
3722
3723 # _chunkraw was renamed to _getsegmentforrevs.
3723 # - _chunkraw was renamed to _getsegmentforrevs
3724 # - _getsegmentforrevs was moved on the inner object
3725 try:
3726 segmentforrevs = rl._inner.get_segment_for_revs
3727 except AttributeError:
3724 try:
3728 try:
3725 segmentforrevs = rl._getsegmentforrevs
3729 segmentforrevs = rl._getsegmentforrevs
3726 except AttributeError:
3730 except AttributeError:
3727 segmentforrevs = rl._chunkraw
3731 segmentforrevs = rl._chunkraw
3728
3732
3729 # Verify engines argument.
3733 # Verify engines argument.
3730 if engines:
3734 if engines:
3731 engines = {e.strip() for e in engines.split(b',')}
3735 engines = {e.strip() for e in engines.split(b',')}
3732 for engine in engines:
3736 for engine in engines:
3733 try:
3737 try:
3734 util.compressionengines[engine]
3738 util.compressionengines[engine]
3735 except KeyError:
3739 except KeyError:
3736 raise error.Abort(b'unknown compression engine: %s' % engine)
3740 raise error.Abort(b'unknown compression engine: %s' % engine)
3737 else:
3741 else:
3738 engines = []
3742 engines = []
3739 for e in util.compengines:
3743 for e in util.compengines:
3740 engine = util.compengines[e]
3744 engine = util.compengines[e]
3741 try:
3745 try:
3742 if engine.available():
3746 if engine.available():
3743 engine.revlogcompressor().compress(b'dummy')
3747 engine.revlogcompressor().compress(b'dummy')
3744 engines.append(e)
3748 engines.append(e)
3745 except NotImplementedError:
3749 except NotImplementedError:
3746 pass
3750 pass
3747
3751
3748 revs = list(rl.revs(startrev, len(rl) - 1))
3752 revs = list(rl.revs(startrev, len(rl) - 1))
3749
3753
3750 @contextlib.contextmanager
3754 @contextlib.contextmanager
3751 def reading(rl):
3755 def reading(rl):
3752 if getattr(rl, 'reading', None) is not None:
3756 if getattr(rl, 'reading', None) is not None:
3753 with rl.reading():
3757 with rl.reading():
3754 yield None
3758 yield None
3755 elif rl._inline:
3759 elif rl._inline:
3756 indexfile = getattr(rl, '_indexfile', None)
3760 indexfile = getattr(rl, '_indexfile', None)
3757 if indexfile is None:
3761 if indexfile is None:
3758 # compatibility with <= hg-5.8
3762 # compatibility with <= hg-5.8
3759 indexfile = getattr(rl, 'indexfile')
3763 indexfile = getattr(rl, 'indexfile')
3760 yield getsvfs(repo)(indexfile)
3764 yield getsvfs(repo)(indexfile)
3761 else:
3765 else:
3762 datafile = getattr(rl, 'datafile', getattr(rl, 'datafile'))
3766 datafile = getattr(rl, 'datafile', getattr(rl, 'datafile'))
3763 yield getsvfs(repo)(datafile)
3767 yield getsvfs(repo)(datafile)
3764
3768
3765 if getattr(rl, 'reading', None) is not None:
3769 if getattr(rl, 'reading', None) is not None:
3766
3770
3767 @contextlib.contextmanager
3771 @contextlib.contextmanager
3768 def lazy_reading(rl):
3772 def lazy_reading(rl):
3769 with rl.reading():
3773 with rl.reading():
3770 yield
3774 yield
3771
3775
3772 else:
3776 else:
3773
3777
3774 @contextlib.contextmanager
3778 @contextlib.contextmanager
3775 def lazy_reading(rl):
3779 def lazy_reading(rl):
3776 yield
3780 yield
3777
3781
3778 def doread():
3782 def doread():
3779 rl.clearcaches()
3783 rl.clearcaches()
3780 for rev in revs:
3784 for rev in revs:
3781 with lazy_reading(rl):
3785 with lazy_reading(rl):
3782 segmentforrevs(rev, rev)
3786 segmentforrevs(rev, rev)
3783
3787
3784 def doreadcachedfh():
3788 def doreadcachedfh():
3785 rl.clearcaches()
3789 rl.clearcaches()
3786 with reading(rl) as fh:
3790 with reading(rl) as fh:
3787 if fh is not None:
3791 if fh is not None:
3788 for rev in revs:
3792 for rev in revs:
3789 segmentforrevs(rev, rev, df=fh)
3793 segmentforrevs(rev, rev, df=fh)
3790 else:
3794 else:
3791 for rev in revs:
3795 for rev in revs:
3792 segmentforrevs(rev, rev)
3796 segmentforrevs(rev, rev)
3793
3797
3794 def doreadbatch():
3798 def doreadbatch():
3795 rl.clearcaches()
3799 rl.clearcaches()
3796 with lazy_reading(rl):
3800 with lazy_reading(rl):
3797 segmentforrevs(revs[0], revs[-1])
3801 segmentforrevs(revs[0], revs[-1])
3798
3802
3799 def doreadbatchcachedfh():
3803 def doreadbatchcachedfh():
3800 rl.clearcaches()
3804 rl.clearcaches()
3801 with reading(rl) as fh:
3805 with reading(rl) as fh:
3802 if fh is not None:
3806 if fh is not None:
3803 segmentforrevs(revs[0], revs[-1], df=fh)
3807 segmentforrevs(revs[0], revs[-1], df=fh)
3804 else:
3808 else:
3805 segmentforrevs(revs[0], revs[-1])
3809 segmentforrevs(revs[0], revs[-1])
3806
3810
3807 def dochunk():
3811 def dochunk():
3808 rl.clearcaches()
3812 rl.clearcaches()
3809 with reading(rl) as fh:
3813 with reading(rl) as fh:
3810 if fh is not None:
3814 if fh is not None:
3811 for rev in revs:
3815 for rev in revs:
3812 rl._chunk(rev, df=fh)
3816 rl._chunk(rev, df=fh)
3813 else:
3817 else:
3814 for rev in revs:
3818 for rev in revs:
3815 rl._chunk(rev)
3819 rl._chunk(rev)
3816
3820
3817 chunks = [None]
3821 chunks = [None]
3818
3822
3819 def dochunkbatch():
3823 def dochunkbatch():
3820 rl.clearcaches()
3824 rl.clearcaches()
3821 with reading(rl) as fh:
3825 with reading(rl) as fh:
3822 if fh is not None:
3826 if fh is not None:
3823 # Save chunks as a side-effect.
3827 # Save chunks as a side-effect.
3824 chunks[0] = rl._chunks(revs, df=fh)
3828 chunks[0] = rl._chunks(revs, df=fh)
3825 else:
3829 else:
3826 # Save chunks as a side-effect.
3830 # Save chunks as a side-effect.
3827 chunks[0] = rl._chunks(revs)
3831 chunks[0] = rl._chunks(revs)
3828
3832
3829 def docompress(compressor):
3833 def docompress(compressor):
3830 rl.clearcaches()
3834 rl.clearcaches()
3831
3835
3832 try:
3836 try:
3833 # Swap in the requested compression engine.
3837 # Swap in the requested compression engine.
3834 oldcompressor = rl._compressor
3838 oldcompressor = rl._compressor
3835 rl._compressor = compressor
3839 rl._compressor = compressor
3836 for chunk in chunks[0]:
3840 for chunk in chunks[0]:
3837 rl.compress(chunk)
3841 rl.compress(chunk)
3838 finally:
3842 finally:
3839 rl._compressor = oldcompressor
3843 rl._compressor = oldcompressor
3840
3844
3841 benches = [
3845 benches = [
3842 (lambda: doread(), b'read'),
3846 (lambda: doread(), b'read'),
3843 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3847 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3844 (lambda: doreadbatch(), b'read batch'),
3848 (lambda: doreadbatch(), b'read batch'),
3845 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3849 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3846 (lambda: dochunk(), b'chunk'),
3850 (lambda: dochunk(), b'chunk'),
3847 (lambda: dochunkbatch(), b'chunk batch'),
3851 (lambda: dochunkbatch(), b'chunk batch'),
3848 ]
3852 ]
3849
3853
3850 for engine in sorted(engines):
3854 for engine in sorted(engines):
3851 compressor = util.compengines[engine].revlogcompressor()
3855 compressor = util.compengines[engine].revlogcompressor()
3852 benches.append(
3856 benches.append(
3853 (
3857 (
3854 functools.partial(docompress, compressor),
3858 functools.partial(docompress, compressor),
3855 b'compress w/ %s' % engine,
3859 b'compress w/ %s' % engine,
3856 )
3860 )
3857 )
3861 )
3858
3862
3859 for fn, title in benches:
3863 for fn, title in benches:
3860 timer, fm = gettimer(ui, opts)
3864 timer, fm = gettimer(ui, opts)
3861 timer(fn, title=title)
3865 timer(fn, title=title)
3862 fm.end()
3866 fm.end()
3863
3867
3864
3868
3865 @command(
3869 @command(
3866 b'perf::revlogrevision|perfrevlogrevision',
3870 b'perf::revlogrevision|perfrevlogrevision',
3867 revlogopts
3871 revlogopts
3868 + formatteropts
3872 + formatteropts
3869 + [(b'', b'cache', False, b'use caches instead of clearing')],
3873 + [(b'', b'cache', False, b'use caches instead of clearing')],
3870 b'-c|-m|FILE REV',
3874 b'-c|-m|FILE REV',
3871 )
3875 )
3872 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3876 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3873 """Benchmark obtaining a revlog revision.
3877 """Benchmark obtaining a revlog revision.
3874
3878
3875 Obtaining a revlog revision consists of roughly the following steps:
3879 Obtaining a revlog revision consists of roughly the following steps:
3876
3880
3877 1. Compute the delta chain
3881 1. Compute the delta chain
3878 2. Slice the delta chain if applicable
3882 2. Slice the delta chain if applicable
3879 3. Obtain the raw chunks for that delta chain
3883 3. Obtain the raw chunks for that delta chain
3880 4. Decompress each raw chunk
3884 4. Decompress each raw chunk
3881 5. Apply binary patches to obtain fulltext
3885 5. Apply binary patches to obtain fulltext
3882 6. Verify hash of fulltext
3886 6. Verify hash of fulltext
3883
3887
3884 This command measures the time spent in each of these phases.
3888 This command measures the time spent in each of these phases.
3885 """
3889 """
3886 opts = _byteskwargs(opts)
3890 opts = _byteskwargs(opts)
3887
3891
3888 if opts.get(b'changelog') or opts.get(b'manifest'):
3892 if opts.get(b'changelog') or opts.get(b'manifest'):
3889 file_, rev = None, file_
3893 file_, rev = None, file_
3890 elif rev is None:
3894 elif rev is None:
3891 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3895 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3892
3896
3893 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3897 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3894
3898
3895 # _chunkraw was renamed to _getsegmentforrevs.
3899 # _chunkraw was renamed to _getsegmentforrevs.
3896 try:
3900 try:
3901 segmentforrevs = r._inner.get_segment_for_revs
3902 except AttributeError:
3903 try:
3897 segmentforrevs = r._getsegmentforrevs
3904 segmentforrevs = r._getsegmentforrevs
3898 except AttributeError:
3905 except AttributeError:
3899 segmentforrevs = r._chunkraw
3906 segmentforrevs = r._chunkraw
3900
3907
3901 node = r.lookup(rev)
3908 node = r.lookup(rev)
3902 rev = r.rev(node)
3909 rev = r.rev(node)
3903
3910
3904 if getattr(r, 'reading', None) is not None:
3911 if getattr(r, 'reading', None) is not None:
3905
3912
3906 @contextlib.contextmanager
3913 @contextlib.contextmanager
3907 def lazy_reading(r):
3914 def lazy_reading(r):
3908 with r.reading():
3915 with r.reading():
3909 yield
3916 yield
3910
3917
3911 else:
3918 else:
3912
3919
3913 @contextlib.contextmanager
3920 @contextlib.contextmanager
3914 def lazy_reading(r):
3921 def lazy_reading(r):
3915 yield
3922 yield
3916
3923
3917 def getrawchunks(data, chain):
3924 def getrawchunks(data, chain):
3918 start = r.start
3925 start = r.start
3919 length = r.length
3926 length = r.length
3920 inline = r._inline
3927 inline = r._inline
3921 try:
3928 try:
3922 iosize = r.index.entry_size
3929 iosize = r.index.entry_size
3923 except AttributeError:
3930 except AttributeError:
3924 iosize = r._io.size
3931 iosize = r._io.size
3925 buffer = util.buffer
3932 buffer = util.buffer
3926
3933
3927 chunks = []
3934 chunks = []
3928 ladd = chunks.append
3935 ladd = chunks.append
3929 for idx, item in enumerate(chain):
3936 for idx, item in enumerate(chain):
3930 offset = start(item[0])
3937 offset = start(item[0])
3931 bits = data[idx]
3938 bits = data[idx]
3932 for rev in item:
3939 for rev in item:
3933 chunkstart = start(rev)
3940 chunkstart = start(rev)
3934 if inline:
3941 if inline:
3935 chunkstart += (rev + 1) * iosize
3942 chunkstart += (rev + 1) * iosize
3936 chunklength = length(rev)
3943 chunklength = length(rev)
3937 ladd(buffer(bits, chunkstart - offset, chunklength))
3944 ladd(buffer(bits, chunkstart - offset, chunklength))
3938
3945
3939 return chunks
3946 return chunks
3940
3947
3941 def dodeltachain(rev):
3948 def dodeltachain(rev):
3942 if not cache:
3949 if not cache:
3943 r.clearcaches()
3950 r.clearcaches()
3944 r._deltachain(rev)
3951 r._deltachain(rev)
3945
3952
3946 def doread(chain):
3953 def doread(chain):
3947 if not cache:
3954 if not cache:
3948 r.clearcaches()
3955 r.clearcaches()
3949 for item in slicedchain:
3956 for item in slicedchain:
3950 with lazy_reading(r):
3957 with lazy_reading(r):
3951 segmentforrevs(item[0], item[-1])
3958 segmentforrevs(item[0], item[-1])
3952
3959
3953 def doslice(r, chain, size):
3960 def doslice(r, chain, size):
3954 for s in slicechunk(r, chain, targetsize=size):
3961 for s in slicechunk(r, chain, targetsize=size):
3955 pass
3962 pass
3956
3963
3957 def dorawchunks(data, chain):
3964 def dorawchunks(data, chain):
3958 if not cache:
3965 if not cache:
3959 r.clearcaches()
3966 r.clearcaches()
3960 getrawchunks(data, chain)
3967 getrawchunks(data, chain)
3961
3968
3962 def dodecompress(chunks):
3969 def dodecompress(chunks):
3963 decomp = r.decompress
3970 decomp = r.decompress
3964 for chunk in chunks:
3971 for chunk in chunks:
3965 decomp(chunk)
3972 decomp(chunk)
3966
3973
3967 def dopatch(text, bins):
3974 def dopatch(text, bins):
3968 if not cache:
3975 if not cache:
3969 r.clearcaches()
3976 r.clearcaches()
3970 mdiff.patches(text, bins)
3977 mdiff.patches(text, bins)
3971
3978
3972 def dohash(text):
3979 def dohash(text):
3973 if not cache:
3980 if not cache:
3974 r.clearcaches()
3981 r.clearcaches()
3975 r.checkhash(text, node, rev=rev)
3982 r.checkhash(text, node, rev=rev)
3976
3983
3977 def dorevision():
3984 def dorevision():
3978 if not cache:
3985 if not cache:
3979 r.clearcaches()
3986 r.clearcaches()
3980 r.revision(node)
3987 r.revision(node)
3981
3988
3982 try:
3989 try:
3983 from mercurial.revlogutils.deltas import slicechunk
3990 from mercurial.revlogutils.deltas import slicechunk
3984 except ImportError:
3991 except ImportError:
3985 slicechunk = getattr(revlog, '_slicechunk', None)
3992 slicechunk = getattr(revlog, '_slicechunk', None)
3986
3993
3987 size = r.length(rev)
3994 size = r.length(rev)
3988 chain = r._deltachain(rev)[0]
3995 chain = r._deltachain(rev)[0]
3989
3996
3990 with_sparse_read = False
3997 with_sparse_read = False
3991 if hasattr(r, 'data_config'):
3998 if hasattr(r, 'data_config'):
3992 with_sparse_read = r.data_config.with_sparse_read
3999 with_sparse_read = r.data_config.with_sparse_read
3993 elif hasattr(r, '_withsparseread'):
4000 elif hasattr(r, '_withsparseread'):
3994 with_sparse_read = r._withsparseread
4001 with_sparse_read = r._withsparseread
3995 if with_sparse_read:
4002 if with_sparse_read:
3996 slicedchain = (chain,)
4003 slicedchain = (chain,)
3997 else:
4004 else:
3998 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
4005 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3999 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
4006 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
4000 rawchunks = getrawchunks(data, slicedchain)
4007 rawchunks = getrawchunks(data, slicedchain)
4001 bins = r._chunks(chain)
4008 bins = r._chunks(chain)
4002 text = bytes(bins[0])
4009 text = bytes(bins[0])
4003 bins = bins[1:]
4010 bins = bins[1:]
4004 text = mdiff.patches(text, bins)
4011 text = mdiff.patches(text, bins)
4005
4012
4006 benches = [
4013 benches = [
4007 (lambda: dorevision(), b'full'),
4014 (lambda: dorevision(), b'full'),
4008 (lambda: dodeltachain(rev), b'deltachain'),
4015 (lambda: dodeltachain(rev), b'deltachain'),
4009 (lambda: doread(chain), b'read'),
4016 (lambda: doread(chain), b'read'),
4010 ]
4017 ]
4011
4018
4012 if with_sparse_read:
4019 if with_sparse_read:
4013 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
4020 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
4014 benches.append(slicing)
4021 benches.append(slicing)
4015
4022
4016 benches.extend(
4023 benches.extend(
4017 [
4024 [
4018 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
4025 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
4019 (lambda: dodecompress(rawchunks), b'decompress'),
4026 (lambda: dodecompress(rawchunks), b'decompress'),
4020 (lambda: dopatch(text, bins), b'patch'),
4027 (lambda: dopatch(text, bins), b'patch'),
4021 (lambda: dohash(text), b'hash'),
4028 (lambda: dohash(text), b'hash'),
4022 ]
4029 ]
4023 )
4030 )
4024
4031
4025 timer, fm = gettimer(ui, opts)
4032 timer, fm = gettimer(ui, opts)
4026 for fn, title in benches:
4033 for fn, title in benches:
4027 timer(fn, title=title)
4034 timer(fn, title=title)
4028 fm.end()
4035 fm.end()
4029
4036
4030
4037
4031 @command(
4038 @command(
4032 b'perf::revset|perfrevset',
4039 b'perf::revset|perfrevset',
4033 [
4040 [
4034 (b'C', b'clear', False, b'clear volatile cache between each call.'),
4041 (b'C', b'clear', False, b'clear volatile cache between each call.'),
4035 (b'', b'contexts', False, b'obtain changectx for each revision'),
4042 (b'', b'contexts', False, b'obtain changectx for each revision'),
4036 ]
4043 ]
4037 + formatteropts,
4044 + formatteropts,
4038 b"REVSET",
4045 b"REVSET",
4039 )
4046 )
4040 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
4047 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
4041 """benchmark the execution time of a revset
4048 """benchmark the execution time of a revset
4042
4049
4043 Use the --clean option if need to evaluate the impact of build volatile
4050 Use the --clean option if need to evaluate the impact of build volatile
4044 revisions set cache on the revset execution. Volatile cache hold filtered
4051 revisions set cache on the revset execution. Volatile cache hold filtered
4045 and obsolete related cache."""
4052 and obsolete related cache."""
4046 opts = _byteskwargs(opts)
4053 opts = _byteskwargs(opts)
4047
4054
4048 timer, fm = gettimer(ui, opts)
4055 timer, fm = gettimer(ui, opts)
4049
4056
4050 def d():
4057 def d():
4051 if clear:
4058 if clear:
4052 repo.invalidatevolatilesets()
4059 repo.invalidatevolatilesets()
4053 if contexts:
4060 if contexts:
4054 for ctx in repo.set(expr):
4061 for ctx in repo.set(expr):
4055 pass
4062 pass
4056 else:
4063 else:
4057 for r in repo.revs(expr):
4064 for r in repo.revs(expr):
4058 pass
4065 pass
4059
4066
4060 timer(d)
4067 timer(d)
4061 fm.end()
4068 fm.end()
4062
4069
4063
4070
4064 @command(
4071 @command(
4065 b'perf::volatilesets|perfvolatilesets',
4072 b'perf::volatilesets|perfvolatilesets',
4066 [
4073 [
4067 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
4074 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
4068 ]
4075 ]
4069 + formatteropts,
4076 + formatteropts,
4070 )
4077 )
4071 def perfvolatilesets(ui, repo, *names, **opts):
4078 def perfvolatilesets(ui, repo, *names, **opts):
4072 """benchmark the computation of various volatile set
4079 """benchmark the computation of various volatile set
4073
4080
4074 Volatile set computes element related to filtering and obsolescence."""
4081 Volatile set computes element related to filtering and obsolescence."""
4075 opts = _byteskwargs(opts)
4082 opts = _byteskwargs(opts)
4076 timer, fm = gettimer(ui, opts)
4083 timer, fm = gettimer(ui, opts)
4077 repo = repo.unfiltered()
4084 repo = repo.unfiltered()
4078
4085
4079 def getobs(name):
4086 def getobs(name):
4080 def d():
4087 def d():
4081 repo.invalidatevolatilesets()
4088 repo.invalidatevolatilesets()
4082 if opts[b'clear_obsstore']:
4089 if opts[b'clear_obsstore']:
4083 clearfilecache(repo, b'obsstore')
4090 clearfilecache(repo, b'obsstore')
4084 obsolete.getrevs(repo, name)
4091 obsolete.getrevs(repo, name)
4085
4092
4086 return d
4093 return d
4087
4094
4088 allobs = sorted(obsolete.cachefuncs)
4095 allobs = sorted(obsolete.cachefuncs)
4089 if names:
4096 if names:
4090 allobs = [n for n in allobs if n in names]
4097 allobs = [n for n in allobs if n in names]
4091
4098
4092 for name in allobs:
4099 for name in allobs:
4093 timer(getobs(name), title=name)
4100 timer(getobs(name), title=name)
4094
4101
4095 def getfiltered(name):
4102 def getfiltered(name):
4096 def d():
4103 def d():
4097 repo.invalidatevolatilesets()
4104 repo.invalidatevolatilesets()
4098 if opts[b'clear_obsstore']:
4105 if opts[b'clear_obsstore']:
4099 clearfilecache(repo, b'obsstore')
4106 clearfilecache(repo, b'obsstore')
4100 repoview.filterrevs(repo, name)
4107 repoview.filterrevs(repo, name)
4101
4108
4102 return d
4109 return d
4103
4110
4104 allfilter = sorted(repoview.filtertable)
4111 allfilter = sorted(repoview.filtertable)
4105 if names:
4112 if names:
4106 allfilter = [n for n in allfilter if n in names]
4113 allfilter = [n for n in allfilter if n in names]
4107
4114
4108 for name in allfilter:
4115 for name in allfilter:
4109 timer(getfiltered(name), title=name)
4116 timer(getfiltered(name), title=name)
4110 fm.end()
4117 fm.end()
4111
4118
4112
4119
4113 @command(
4120 @command(
4114 b'perf::branchmap|perfbranchmap',
4121 b'perf::branchmap|perfbranchmap',
4115 [
4122 [
4116 (b'f', b'full', False, b'Includes build time of subset'),
4123 (b'f', b'full', False, b'Includes build time of subset'),
4117 (
4124 (
4118 b'',
4125 b'',
4119 b'clear-revbranch',
4126 b'clear-revbranch',
4120 False,
4127 False,
4121 b'purge the revbranch cache between computation',
4128 b'purge the revbranch cache between computation',
4122 ),
4129 ),
4123 ]
4130 ]
4124 + formatteropts,
4131 + formatteropts,
4125 )
4132 )
4126 def perfbranchmap(ui, repo, *filternames, **opts):
4133 def perfbranchmap(ui, repo, *filternames, **opts):
4127 """benchmark the update of a branchmap
4134 """benchmark the update of a branchmap
4128
4135
4129 This benchmarks the full repo.branchmap() call with read and write disabled
4136 This benchmarks the full repo.branchmap() call with read and write disabled
4130 """
4137 """
4131 opts = _byteskwargs(opts)
4138 opts = _byteskwargs(opts)
4132 full = opts.get(b"full", False)
4139 full = opts.get(b"full", False)
4133 clear_revbranch = opts.get(b"clear_revbranch", False)
4140 clear_revbranch = opts.get(b"clear_revbranch", False)
4134 timer, fm = gettimer(ui, opts)
4141 timer, fm = gettimer(ui, opts)
4135
4142
4136 def getbranchmap(filtername):
4143 def getbranchmap(filtername):
4137 """generate a benchmark function for the filtername"""
4144 """generate a benchmark function for the filtername"""
4138 if filtername is None:
4145 if filtername is None:
4139 view = repo
4146 view = repo
4140 else:
4147 else:
4141 view = repo.filtered(filtername)
4148 view = repo.filtered(filtername)
4142 if util.safehasattr(view._branchcaches, '_per_filter'):
4149 if util.safehasattr(view._branchcaches, '_per_filter'):
4143 filtered = view._branchcaches._per_filter
4150 filtered = view._branchcaches._per_filter
4144 else:
4151 else:
4145 # older versions
4152 # older versions
4146 filtered = view._branchcaches
4153 filtered = view._branchcaches
4147
4154
4148 def d():
4155 def d():
4149 if clear_revbranch:
4156 if clear_revbranch:
4150 repo.revbranchcache()._clear()
4157 repo.revbranchcache()._clear()
4151 if full:
4158 if full:
4152 view._branchcaches.clear()
4159 view._branchcaches.clear()
4153 else:
4160 else:
4154 filtered.pop(filtername, None)
4161 filtered.pop(filtername, None)
4155 view.branchmap()
4162 view.branchmap()
4156
4163
4157 return d
4164 return d
4158
4165
4159 # add filter in smaller subset to bigger subset
4166 # add filter in smaller subset to bigger subset
4160 possiblefilters = set(repoview.filtertable)
4167 possiblefilters = set(repoview.filtertable)
4161 if filternames:
4168 if filternames:
4162 possiblefilters &= set(filternames)
4169 possiblefilters &= set(filternames)
4163 subsettable = getbranchmapsubsettable()
4170 subsettable = getbranchmapsubsettable()
4164 allfilters = []
4171 allfilters = []
4165 while possiblefilters:
4172 while possiblefilters:
4166 for name in possiblefilters:
4173 for name in possiblefilters:
4167 subset = subsettable.get(name)
4174 subset = subsettable.get(name)
4168 if subset not in possiblefilters:
4175 if subset not in possiblefilters:
4169 break
4176 break
4170 else:
4177 else:
4171 assert False, b'subset cycle %s!' % possiblefilters
4178 assert False, b'subset cycle %s!' % possiblefilters
4172 allfilters.append(name)
4179 allfilters.append(name)
4173 possiblefilters.remove(name)
4180 possiblefilters.remove(name)
4174
4181
4175 # warm the cache
4182 # warm the cache
4176 if not full:
4183 if not full:
4177 for name in allfilters:
4184 for name in allfilters:
4178 repo.filtered(name).branchmap()
4185 repo.filtered(name).branchmap()
4179 if not filternames or b'unfiltered' in filternames:
4186 if not filternames or b'unfiltered' in filternames:
4180 # add unfiltered
4187 # add unfiltered
4181 allfilters.append(None)
4188 allfilters.append(None)
4182
4189
4183 if util.safehasattr(branchmap.branchcache, 'fromfile'):
4190 if util.safehasattr(branchmap.branchcache, 'fromfile'):
4184 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
4191 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
4185 branchcacheread.set(classmethod(lambda *args: None))
4192 branchcacheread.set(classmethod(lambda *args: None))
4186 else:
4193 else:
4187 # older versions
4194 # older versions
4188 branchcacheread = safeattrsetter(branchmap, b'read')
4195 branchcacheread = safeattrsetter(branchmap, b'read')
4189 branchcacheread.set(lambda *args: None)
4196 branchcacheread.set(lambda *args: None)
4190 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
4197 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
4191 branchcachewrite.set(lambda *args: None)
4198 branchcachewrite.set(lambda *args: None)
4192 try:
4199 try:
4193 for name in allfilters:
4200 for name in allfilters:
4194 printname = name
4201 printname = name
4195 if name is None:
4202 if name is None:
4196 printname = b'unfiltered'
4203 printname = b'unfiltered'
4197 timer(getbranchmap(name), title=printname)
4204 timer(getbranchmap(name), title=printname)
4198 finally:
4205 finally:
4199 branchcacheread.restore()
4206 branchcacheread.restore()
4200 branchcachewrite.restore()
4207 branchcachewrite.restore()
4201 fm.end()
4208 fm.end()
4202
4209
4203
4210
4204 @command(
4211 @command(
4205 b'perf::branchmapupdate|perfbranchmapupdate',
4212 b'perf::branchmapupdate|perfbranchmapupdate',
4206 [
4213 [
4207 (b'', b'base', [], b'subset of revision to start from'),
4214 (b'', b'base', [], b'subset of revision to start from'),
4208 (b'', b'target', [], b'subset of revision to end with'),
4215 (b'', b'target', [], b'subset of revision to end with'),
4209 (b'', b'clear-caches', False, b'clear cache between each runs'),
4216 (b'', b'clear-caches', False, b'clear cache between each runs'),
4210 ]
4217 ]
4211 + formatteropts,
4218 + formatteropts,
4212 )
4219 )
4213 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
4220 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
4214 """benchmark branchmap update from for <base> revs to <target> revs
4221 """benchmark branchmap update from for <base> revs to <target> revs
4215
4222
4216 If `--clear-caches` is passed, the following items will be reset before
4223 If `--clear-caches` is passed, the following items will be reset before
4217 each update:
4224 each update:
4218 * the changelog instance and associated indexes
4225 * the changelog instance and associated indexes
4219 * the rev-branch-cache instance
4226 * the rev-branch-cache instance
4220
4227
4221 Examples:
4228 Examples:
4222
4229
4223 # update for the one last revision
4230 # update for the one last revision
4224 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
4231 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
4225
4232
4226 $ update for change coming with a new branch
4233 $ update for change coming with a new branch
4227 $ hg perfbranchmapupdate --base 'stable' --target 'default'
4234 $ hg perfbranchmapupdate --base 'stable' --target 'default'
4228 """
4235 """
4229 from mercurial import branchmap
4236 from mercurial import branchmap
4230 from mercurial import repoview
4237 from mercurial import repoview
4231
4238
4232 opts = _byteskwargs(opts)
4239 opts = _byteskwargs(opts)
4233 timer, fm = gettimer(ui, opts)
4240 timer, fm = gettimer(ui, opts)
4234 clearcaches = opts[b'clear_caches']
4241 clearcaches = opts[b'clear_caches']
4235 unfi = repo.unfiltered()
4242 unfi = repo.unfiltered()
4236 x = [None] # used to pass data between closure
4243 x = [None] # used to pass data between closure
4237
4244
4238 # we use a `list` here to avoid possible side effect from smartset
4245 # we use a `list` here to avoid possible side effect from smartset
4239 baserevs = list(scmutil.revrange(repo, base))
4246 baserevs = list(scmutil.revrange(repo, base))
4240 targetrevs = list(scmutil.revrange(repo, target))
4247 targetrevs = list(scmutil.revrange(repo, target))
4241 if not baserevs:
4248 if not baserevs:
4242 raise error.Abort(b'no revisions selected for --base')
4249 raise error.Abort(b'no revisions selected for --base')
4243 if not targetrevs:
4250 if not targetrevs:
4244 raise error.Abort(b'no revisions selected for --target')
4251 raise error.Abort(b'no revisions selected for --target')
4245
4252
4246 # make sure the target branchmap also contains the one in the base
4253 # make sure the target branchmap also contains the one in the base
4247 targetrevs = list(set(baserevs) | set(targetrevs))
4254 targetrevs = list(set(baserevs) | set(targetrevs))
4248 targetrevs.sort()
4255 targetrevs.sort()
4249
4256
4250 cl = repo.changelog
4257 cl = repo.changelog
4251 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
4258 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
4252 allbaserevs.sort()
4259 allbaserevs.sort()
4253 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
4260 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
4254
4261
4255 newrevs = list(alltargetrevs.difference(allbaserevs))
4262 newrevs = list(alltargetrevs.difference(allbaserevs))
4256 newrevs.sort()
4263 newrevs.sort()
4257
4264
4258 allrevs = frozenset(unfi.changelog.revs())
4265 allrevs = frozenset(unfi.changelog.revs())
4259 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
4266 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
4260 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
4267 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
4261
4268
4262 def basefilter(repo, visibilityexceptions=None):
4269 def basefilter(repo, visibilityexceptions=None):
4263 return basefilterrevs
4270 return basefilterrevs
4264
4271
4265 def targetfilter(repo, visibilityexceptions=None):
4272 def targetfilter(repo, visibilityexceptions=None):
4266 return targetfilterrevs
4273 return targetfilterrevs
4267
4274
4268 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
4275 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
4269 ui.status(msg % (len(allbaserevs), len(newrevs)))
4276 ui.status(msg % (len(allbaserevs), len(newrevs)))
4270 if targetfilterrevs:
4277 if targetfilterrevs:
4271 msg = b'(%d revisions still filtered)\n'
4278 msg = b'(%d revisions still filtered)\n'
4272 ui.status(msg % len(targetfilterrevs))
4279 ui.status(msg % len(targetfilterrevs))
4273
4280
4274 try:
4281 try:
4275 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
4282 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
4276 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
4283 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
4277
4284
4278 baserepo = repo.filtered(b'__perf_branchmap_update_base')
4285 baserepo = repo.filtered(b'__perf_branchmap_update_base')
4279 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
4286 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
4280
4287
4281 # try to find an existing branchmap to reuse
4288 # try to find an existing branchmap to reuse
4282 subsettable = getbranchmapsubsettable()
4289 subsettable = getbranchmapsubsettable()
4283 candidatefilter = subsettable.get(None)
4290 candidatefilter = subsettable.get(None)
4284 while candidatefilter is not None:
4291 while candidatefilter is not None:
4285 candidatebm = repo.filtered(candidatefilter).branchmap()
4292 candidatebm = repo.filtered(candidatefilter).branchmap()
4286 if candidatebm.validfor(baserepo):
4293 if candidatebm.validfor(baserepo):
4287 filtered = repoview.filterrevs(repo, candidatefilter)
4294 filtered = repoview.filterrevs(repo, candidatefilter)
4288 missing = [r for r in allbaserevs if r in filtered]
4295 missing = [r for r in allbaserevs if r in filtered]
4289 base = candidatebm.copy()
4296 base = candidatebm.copy()
4290 base.update(baserepo, missing)
4297 base.update(baserepo, missing)
4291 break
4298 break
4292 candidatefilter = subsettable.get(candidatefilter)
4299 candidatefilter = subsettable.get(candidatefilter)
4293 else:
4300 else:
4294 # no suitable subset where found
4301 # no suitable subset where found
4295 base = branchmap.branchcache()
4302 base = branchmap.branchcache()
4296 base.update(baserepo, allbaserevs)
4303 base.update(baserepo, allbaserevs)
4297
4304
4298 def setup():
4305 def setup():
4299 x[0] = base.copy()
4306 x[0] = base.copy()
4300 if clearcaches:
4307 if clearcaches:
4301 unfi._revbranchcache = None
4308 unfi._revbranchcache = None
4302 clearchangelog(repo)
4309 clearchangelog(repo)
4303
4310
4304 def bench():
4311 def bench():
4305 x[0].update(targetrepo, newrevs)
4312 x[0].update(targetrepo, newrevs)
4306
4313
4307 timer(bench, setup=setup)
4314 timer(bench, setup=setup)
4308 fm.end()
4315 fm.end()
4309 finally:
4316 finally:
4310 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
4317 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
4311 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
4318 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
4312
4319
4313
4320
4314 @command(
4321 @command(
4315 b'perf::branchmapload|perfbranchmapload',
4322 b'perf::branchmapload|perfbranchmapload',
4316 [
4323 [
4317 (b'f', b'filter', b'', b'Specify repoview filter'),
4324 (b'f', b'filter', b'', b'Specify repoview filter'),
4318 (b'', b'list', False, b'List brachmap filter caches'),
4325 (b'', b'list', False, b'List brachmap filter caches'),
4319 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
4326 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
4320 ]
4327 ]
4321 + formatteropts,
4328 + formatteropts,
4322 )
4329 )
4323 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
4330 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
4324 """benchmark reading the branchmap"""
4331 """benchmark reading the branchmap"""
4325 opts = _byteskwargs(opts)
4332 opts = _byteskwargs(opts)
4326 clearrevlogs = opts[b'clear_revlogs']
4333 clearrevlogs = opts[b'clear_revlogs']
4327
4334
4328 if list:
4335 if list:
4329 for name, kind, st in repo.cachevfs.readdir(stat=True):
4336 for name, kind, st in repo.cachevfs.readdir(stat=True):
4330 if name.startswith(b'branch2'):
4337 if name.startswith(b'branch2'):
4331 filtername = name.partition(b'-')[2] or b'unfiltered'
4338 filtername = name.partition(b'-')[2] or b'unfiltered'
4332 ui.status(
4339 ui.status(
4333 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
4340 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
4334 )
4341 )
4335 return
4342 return
4336 if not filter:
4343 if not filter:
4337 filter = None
4344 filter = None
4338 subsettable = getbranchmapsubsettable()
4345 subsettable = getbranchmapsubsettable()
4339 if filter is None:
4346 if filter is None:
4340 repo = repo.unfiltered()
4347 repo = repo.unfiltered()
4341 else:
4348 else:
4342 repo = repoview.repoview(repo, filter)
4349 repo = repoview.repoview(repo, filter)
4343
4350
4344 repo.branchmap() # make sure we have a relevant, up to date branchmap
4351 repo.branchmap() # make sure we have a relevant, up to date branchmap
4345
4352
4346 try:
4353 try:
4347 fromfile = branchmap.branchcache.fromfile
4354 fromfile = branchmap.branchcache.fromfile
4348 except AttributeError:
4355 except AttributeError:
4349 # older versions
4356 # older versions
4350 fromfile = branchmap.read
4357 fromfile = branchmap.read
4351
4358
4352 currentfilter = filter
4359 currentfilter = filter
4353 # try once without timer, the filter may not be cached
4360 # try once without timer, the filter may not be cached
4354 while fromfile(repo) is None:
4361 while fromfile(repo) is None:
4355 currentfilter = subsettable.get(currentfilter)
4362 currentfilter = subsettable.get(currentfilter)
4356 if currentfilter is None:
4363 if currentfilter is None:
4357 raise error.Abort(
4364 raise error.Abort(
4358 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
4365 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
4359 )
4366 )
4360 repo = repo.filtered(currentfilter)
4367 repo = repo.filtered(currentfilter)
4361 timer, fm = gettimer(ui, opts)
4368 timer, fm = gettimer(ui, opts)
4362
4369
4363 def setup():
4370 def setup():
4364 if clearrevlogs:
4371 if clearrevlogs:
4365 clearchangelog(repo)
4372 clearchangelog(repo)
4366
4373
4367 def bench():
4374 def bench():
4368 fromfile(repo)
4375 fromfile(repo)
4369
4376
4370 timer(bench, setup=setup)
4377 timer(bench, setup=setup)
4371 fm.end()
4378 fm.end()
4372
4379
4373
4380
4374 @command(b'perf::loadmarkers|perfloadmarkers')
4381 @command(b'perf::loadmarkers|perfloadmarkers')
4375 def perfloadmarkers(ui, repo):
4382 def perfloadmarkers(ui, repo):
4376 """benchmark the time to parse the on-disk markers for a repo
4383 """benchmark the time to parse the on-disk markers for a repo
4377
4384
4378 Result is the number of markers in the repo."""
4385 Result is the number of markers in the repo."""
4379 timer, fm = gettimer(ui)
4386 timer, fm = gettimer(ui)
4380 svfs = getsvfs(repo)
4387 svfs = getsvfs(repo)
4381 timer(lambda: len(obsolete.obsstore(repo, svfs)))
4388 timer(lambda: len(obsolete.obsstore(repo, svfs)))
4382 fm.end()
4389 fm.end()
4383
4390
4384
4391
4385 @command(
4392 @command(
4386 b'perf::lrucachedict|perflrucachedict',
4393 b'perf::lrucachedict|perflrucachedict',
4387 formatteropts
4394 formatteropts
4388 + [
4395 + [
4389 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
4396 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
4390 (b'', b'mincost', 0, b'smallest cost of items in cache'),
4397 (b'', b'mincost', 0, b'smallest cost of items in cache'),
4391 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
4398 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
4392 (b'', b'size', 4, b'size of cache'),
4399 (b'', b'size', 4, b'size of cache'),
4393 (b'', b'gets', 10000, b'number of key lookups'),
4400 (b'', b'gets', 10000, b'number of key lookups'),
4394 (b'', b'sets', 10000, b'number of key sets'),
4401 (b'', b'sets', 10000, b'number of key sets'),
4395 (b'', b'mixed', 10000, b'number of mixed mode operations'),
4402 (b'', b'mixed', 10000, b'number of mixed mode operations'),
4396 (
4403 (
4397 b'',
4404 b'',
4398 b'mixedgetfreq',
4405 b'mixedgetfreq',
4399 50,
4406 50,
4400 b'frequency of get vs set ops in mixed mode',
4407 b'frequency of get vs set ops in mixed mode',
4401 ),
4408 ),
4402 ],
4409 ],
4403 norepo=True,
4410 norepo=True,
4404 )
4411 )
4405 def perflrucache(
4412 def perflrucache(
4406 ui,
4413 ui,
4407 mincost=0,
4414 mincost=0,
4408 maxcost=100,
4415 maxcost=100,
4409 costlimit=0,
4416 costlimit=0,
4410 size=4,
4417 size=4,
4411 gets=10000,
4418 gets=10000,
4412 sets=10000,
4419 sets=10000,
4413 mixed=10000,
4420 mixed=10000,
4414 mixedgetfreq=50,
4421 mixedgetfreq=50,
4415 **opts
4422 **opts
4416 ):
4423 ):
4417 opts = _byteskwargs(opts)
4424 opts = _byteskwargs(opts)
4418
4425
4419 def doinit():
4426 def doinit():
4420 for i in _xrange(10000):
4427 for i in _xrange(10000):
4421 util.lrucachedict(size)
4428 util.lrucachedict(size)
4422
4429
4423 costrange = list(range(mincost, maxcost + 1))
4430 costrange = list(range(mincost, maxcost + 1))
4424
4431
4425 values = []
4432 values = []
4426 for i in _xrange(size):
4433 for i in _xrange(size):
4427 values.append(random.randint(0, _maxint))
4434 values.append(random.randint(0, _maxint))
4428
4435
4429 # Get mode fills the cache and tests raw lookup performance with no
4436 # Get mode fills the cache and tests raw lookup performance with no
4430 # eviction.
4437 # eviction.
4431 getseq = []
4438 getseq = []
4432 for i in _xrange(gets):
4439 for i in _xrange(gets):
4433 getseq.append(random.choice(values))
4440 getseq.append(random.choice(values))
4434
4441
4435 def dogets():
4442 def dogets():
4436 d = util.lrucachedict(size)
4443 d = util.lrucachedict(size)
4437 for v in values:
4444 for v in values:
4438 d[v] = v
4445 d[v] = v
4439 for key in getseq:
4446 for key in getseq:
4440 value = d[key]
4447 value = d[key]
4441 value # silence pyflakes warning
4448 value # silence pyflakes warning
4442
4449
4443 def dogetscost():
4450 def dogetscost():
4444 d = util.lrucachedict(size, maxcost=costlimit)
4451 d = util.lrucachedict(size, maxcost=costlimit)
4445 for i, v in enumerate(values):
4452 for i, v in enumerate(values):
4446 d.insert(v, v, cost=costs[i])
4453 d.insert(v, v, cost=costs[i])
4447 for key in getseq:
4454 for key in getseq:
4448 try:
4455 try:
4449 value = d[key]
4456 value = d[key]
4450 value # silence pyflakes warning
4457 value # silence pyflakes warning
4451 except KeyError:
4458 except KeyError:
4452 pass
4459 pass
4453
4460
4454 # Set mode tests insertion speed with cache eviction.
4461 # Set mode tests insertion speed with cache eviction.
4455 setseq = []
4462 setseq = []
4456 costs = []
4463 costs = []
4457 for i in _xrange(sets):
4464 for i in _xrange(sets):
4458 setseq.append(random.randint(0, _maxint))
4465 setseq.append(random.randint(0, _maxint))
4459 costs.append(random.choice(costrange))
4466 costs.append(random.choice(costrange))
4460
4467
4461 def doinserts():
4468 def doinserts():
4462 d = util.lrucachedict(size)
4469 d = util.lrucachedict(size)
4463 for v in setseq:
4470 for v in setseq:
4464 d.insert(v, v)
4471 d.insert(v, v)
4465
4472
4466 def doinsertscost():
4473 def doinsertscost():
4467 d = util.lrucachedict(size, maxcost=costlimit)
4474 d = util.lrucachedict(size, maxcost=costlimit)
4468 for i, v in enumerate(setseq):
4475 for i, v in enumerate(setseq):
4469 d.insert(v, v, cost=costs[i])
4476 d.insert(v, v, cost=costs[i])
4470
4477
4471 def dosets():
4478 def dosets():
4472 d = util.lrucachedict(size)
4479 d = util.lrucachedict(size)
4473 for v in setseq:
4480 for v in setseq:
4474 d[v] = v
4481 d[v] = v
4475
4482
4476 # Mixed mode randomly performs gets and sets with eviction.
4483 # Mixed mode randomly performs gets and sets with eviction.
4477 mixedops = []
4484 mixedops = []
4478 for i in _xrange(mixed):
4485 for i in _xrange(mixed):
4479 r = random.randint(0, 100)
4486 r = random.randint(0, 100)
4480 if r < mixedgetfreq:
4487 if r < mixedgetfreq:
4481 op = 0
4488 op = 0
4482 else:
4489 else:
4483 op = 1
4490 op = 1
4484
4491
4485 mixedops.append(
4492 mixedops.append(
4486 (op, random.randint(0, size * 2), random.choice(costrange))
4493 (op, random.randint(0, size * 2), random.choice(costrange))
4487 )
4494 )
4488
4495
4489 def domixed():
4496 def domixed():
4490 d = util.lrucachedict(size)
4497 d = util.lrucachedict(size)
4491
4498
4492 for op, v, cost in mixedops:
4499 for op, v, cost in mixedops:
4493 if op == 0:
4500 if op == 0:
4494 try:
4501 try:
4495 d[v]
4502 d[v]
4496 except KeyError:
4503 except KeyError:
4497 pass
4504 pass
4498 else:
4505 else:
4499 d[v] = v
4506 d[v] = v
4500
4507
4501 def domixedcost():
4508 def domixedcost():
4502 d = util.lrucachedict(size, maxcost=costlimit)
4509 d = util.lrucachedict(size, maxcost=costlimit)
4503
4510
4504 for op, v, cost in mixedops:
4511 for op, v, cost in mixedops:
4505 if op == 0:
4512 if op == 0:
4506 try:
4513 try:
4507 d[v]
4514 d[v]
4508 except KeyError:
4515 except KeyError:
4509 pass
4516 pass
4510 else:
4517 else:
4511 d.insert(v, v, cost=cost)
4518 d.insert(v, v, cost=cost)
4512
4519
4513 benches = [
4520 benches = [
4514 (doinit, b'init'),
4521 (doinit, b'init'),
4515 ]
4522 ]
4516
4523
4517 if costlimit:
4524 if costlimit:
4518 benches.extend(
4525 benches.extend(
4519 [
4526 [
4520 (dogetscost, b'gets w/ cost limit'),
4527 (dogetscost, b'gets w/ cost limit'),
4521 (doinsertscost, b'inserts w/ cost limit'),
4528 (doinsertscost, b'inserts w/ cost limit'),
4522 (domixedcost, b'mixed w/ cost limit'),
4529 (domixedcost, b'mixed w/ cost limit'),
4523 ]
4530 ]
4524 )
4531 )
4525 else:
4532 else:
4526 benches.extend(
4533 benches.extend(
4527 [
4534 [
4528 (dogets, b'gets'),
4535 (dogets, b'gets'),
4529 (doinserts, b'inserts'),
4536 (doinserts, b'inserts'),
4530 (dosets, b'sets'),
4537 (dosets, b'sets'),
4531 (domixed, b'mixed'),
4538 (domixed, b'mixed'),
4532 ]
4539 ]
4533 )
4540 )
4534
4541
4535 for fn, title in benches:
4542 for fn, title in benches:
4536 timer, fm = gettimer(ui, opts)
4543 timer, fm = gettimer(ui, opts)
4537 timer(fn, title=title)
4544 timer(fn, title=title)
4538 fm.end()
4545 fm.end()
4539
4546
4540
4547
4541 @command(
4548 @command(
4542 b'perf::write|perfwrite',
4549 b'perf::write|perfwrite',
4543 formatteropts
4550 formatteropts
4544 + [
4551 + [
4545 (b'', b'write-method', b'write', b'ui write method'),
4552 (b'', b'write-method', b'write', b'ui write method'),
4546 (b'', b'nlines', 100, b'number of lines'),
4553 (b'', b'nlines', 100, b'number of lines'),
4547 (b'', b'nitems', 100, b'number of items (per line)'),
4554 (b'', b'nitems', 100, b'number of items (per line)'),
4548 (b'', b'item', b'x', b'item that is written'),
4555 (b'', b'item', b'x', b'item that is written'),
4549 (b'', b'batch-line', None, b'pass whole line to write method at once'),
4556 (b'', b'batch-line', None, b'pass whole line to write method at once'),
4550 (b'', b'flush-line', None, b'flush after each line'),
4557 (b'', b'flush-line', None, b'flush after each line'),
4551 ],
4558 ],
4552 )
4559 )
4553 def perfwrite(ui, repo, **opts):
4560 def perfwrite(ui, repo, **opts):
4554 """microbenchmark ui.write (and others)"""
4561 """microbenchmark ui.write (and others)"""
4555 opts = _byteskwargs(opts)
4562 opts = _byteskwargs(opts)
4556
4563
4557 write = getattr(ui, _sysstr(opts[b'write_method']))
4564 write = getattr(ui, _sysstr(opts[b'write_method']))
4558 nlines = int(opts[b'nlines'])
4565 nlines = int(opts[b'nlines'])
4559 nitems = int(opts[b'nitems'])
4566 nitems = int(opts[b'nitems'])
4560 item = opts[b'item']
4567 item = opts[b'item']
4561 batch_line = opts.get(b'batch_line')
4568 batch_line = opts.get(b'batch_line')
4562 flush_line = opts.get(b'flush_line')
4569 flush_line = opts.get(b'flush_line')
4563
4570
4564 if batch_line:
4571 if batch_line:
4565 line = item * nitems + b'\n'
4572 line = item * nitems + b'\n'
4566
4573
4567 def benchmark():
4574 def benchmark():
4568 for i in pycompat.xrange(nlines):
4575 for i in pycompat.xrange(nlines):
4569 if batch_line:
4576 if batch_line:
4570 write(line)
4577 write(line)
4571 else:
4578 else:
4572 for i in pycompat.xrange(nitems):
4579 for i in pycompat.xrange(nitems):
4573 write(item)
4580 write(item)
4574 write(b'\n')
4581 write(b'\n')
4575 if flush_line:
4582 if flush_line:
4576 ui.flush()
4583 ui.flush()
4577 ui.flush()
4584 ui.flush()
4578
4585
4579 timer, fm = gettimer(ui, opts)
4586 timer, fm = gettimer(ui, opts)
4580 timer(benchmark)
4587 timer(benchmark)
4581 fm.end()
4588 fm.end()
4582
4589
4583
4590
4584 def uisetup(ui):
4591 def uisetup(ui):
4585 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
4592 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
4586 commands, b'debugrevlogopts'
4593 commands, b'debugrevlogopts'
4587 ):
4594 ):
4588 # for "historical portability":
4595 # for "historical portability":
4589 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
4596 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
4590 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
4597 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
4591 # openrevlog() should cause failure, because it has been
4598 # openrevlog() should cause failure, because it has been
4592 # available since 3.5 (or 49c583ca48c4).
4599 # available since 3.5 (or 49c583ca48c4).
4593 def openrevlog(orig, repo, cmd, file_, opts):
4600 def openrevlog(orig, repo, cmd, file_, opts):
4594 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
4601 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
4595 raise error.Abort(
4602 raise error.Abort(
4596 b"This version doesn't support --dir option",
4603 b"This version doesn't support --dir option",
4597 hint=b"use 3.5 or later",
4604 hint=b"use 3.5 or later",
4598 )
4605 )
4599 return orig(repo, cmd, file_, opts)
4606 return orig(repo, cmd, file_, opts)
4600
4607
4601 name = _sysstr(b'openrevlog')
4608 name = _sysstr(b'openrevlog')
4602 extensions.wrapfunction(cmdutil, name, openrevlog)
4609 extensions.wrapfunction(cmdutil, name, openrevlog)
4603
4610
4604
4611
4605 @command(
4612 @command(
4606 b'perf::progress|perfprogress',
4613 b'perf::progress|perfprogress',
4607 formatteropts
4614 formatteropts
4608 + [
4615 + [
4609 (b'', b'topic', b'topic', b'topic for progress messages'),
4616 (b'', b'topic', b'topic', b'topic for progress messages'),
4610 (b'c', b'total', 1000000, b'total value we are progressing to'),
4617 (b'c', b'total', 1000000, b'total value we are progressing to'),
4611 ],
4618 ],
4612 norepo=True,
4619 norepo=True,
4613 )
4620 )
4614 def perfprogress(ui, topic=None, total=None, **opts):
4621 def perfprogress(ui, topic=None, total=None, **opts):
4615 """printing of progress bars"""
4622 """printing of progress bars"""
4616 opts = _byteskwargs(opts)
4623 opts = _byteskwargs(opts)
4617
4624
4618 timer, fm = gettimer(ui, opts)
4625 timer, fm = gettimer(ui, opts)
4619
4626
4620 def doprogress():
4627 def doprogress():
4621 with ui.makeprogress(topic, total=total) as progress:
4628 with ui.makeprogress(topic, total=total) as progress:
4622 for i in _xrange(total):
4629 for i in _xrange(total):
4623 progress.increment()
4630 progress.increment()
4624
4631
4625 timer(doprogress)
4632 timer(doprogress)
4626 fm.end()
4633 fm.end()
@@ -1,3901 +1,3907 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 chunk_cache,
356 chunk_cache,
357 ):
357 ):
358 self.opener = opener
358 self.opener = opener
359 self.index = index
359 self.index = index
360
360
361 self.index_file = index_file
361 self.index_file = index_file
362 self.data_file = data_file
362 self.data_file = data_file
363 self.sidedata_file = sidedata_file
363 self.sidedata_file = sidedata_file
364 self.inline = inline
364 self.inline = inline
365 self.data_config = data_config
365 self.data_config = data_config
366
366
367 # index
367 # index
368
368
369 # 3-tuple of file handles being used for active writing.
369 # 3-tuple of file handles being used for active writing.
370 self._writinghandles = None
370 self._writinghandles = None
371
371
372 self._segmentfile = randomaccessfile.randomaccessfile(
372 self._segmentfile = randomaccessfile.randomaccessfile(
373 self.opener,
373 self.opener,
374 (self.index_file if self.inline else self.data_file),
374 (self.index_file if self.inline else self.data_file),
375 self.data_config.chunk_cache_size,
375 self.data_config.chunk_cache_size,
376 chunk_cache,
376 chunk_cache,
377 )
377 )
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 self.opener,
379 self.opener,
380 self.sidedata_file,
380 self.sidedata_file,
381 self.data_config.chunk_cache_size,
381 self.data_config.chunk_cache_size,
382 )
382 )
383
383
384 # Derived from index values.
384 # Derived from index values.
385
385
386 def start(self, rev):
386 def start(self, rev):
387 """the offset of the data chunk for this revision"""
387 """the offset of the data chunk for this revision"""
388 return int(self.index[rev][0] >> 16)
388 return int(self.index[rev][0] >> 16)
389
389
390 def length(self, rev):
390 def length(self, rev):
391 """the length of the data chunk for this revision"""
391 """the length of the data chunk for this revision"""
392 return self.index[rev][1]
392 return self.index[rev][1]
393
393
394 def end(self, rev):
394 def end(self, rev):
395 """the end of the data chunk for this revision"""
395 """the end of the data chunk for this revision"""
396 return self.start(rev) + self.length(rev)
396 return self.start(rev) + self.length(rev)
397
397
398 @contextlib.contextmanager
398 @contextlib.contextmanager
399 def reading(self):
399 def reading(self):
400 """Context manager that keeps data and sidedata files open for reading"""
400 """Context manager that keeps data and sidedata files open for reading"""
401 if len(self.index) == 0:
401 if len(self.index) == 0:
402 yield # nothing to be read
402 yield # nothing to be read
403 else:
403 else:
404 with self._segmentfile.reading():
404 with self._segmentfile.reading():
405 with self._segmentfile_sidedata.reading():
405 with self._segmentfile_sidedata.reading():
406 yield
406 yield
407
407
408 @property
408 @property
409 def is_writing(self):
409 def is_writing(self):
410 """True is a writing context is open"""
410 """True is a writing context is open"""
411 return self._writinghandles is not None
411 return self._writinghandles is not None
412
412
413 @contextlib.contextmanager
413 @contextlib.contextmanager
414 def writing(self, transaction, data_end=None, sidedata_end=None):
414 def writing(self, transaction, data_end=None, sidedata_end=None):
415 """Open the revlog files for writing
415 """Open the revlog files for writing
416
416
417 Add content to a revlog should be done within such context.
417 Add content to a revlog should be done within such context.
418 """
418 """
419 if self.is_writing:
419 if self.is_writing:
420 yield
420 yield
421 else:
421 else:
422 ifh = dfh = sdfh = None
422 ifh = dfh = sdfh = None
423 try:
423 try:
424 r = len(self.index)
424 r = len(self.index)
425 # opening the data file.
425 # opening the data file.
426 dsize = 0
426 dsize = 0
427 if r:
427 if r:
428 dsize = self.end(r - 1)
428 dsize = self.end(r - 1)
429 dfh = None
429 dfh = None
430 if not self.inline:
430 if not self.inline:
431 try:
431 try:
432 dfh = self.opener(self.data_file, mode=b"r+")
432 dfh = self.opener(self.data_file, mode=b"r+")
433 if data_end is None:
433 if data_end is None:
434 dfh.seek(0, os.SEEK_END)
434 dfh.seek(0, os.SEEK_END)
435 else:
435 else:
436 dfh.seek(data_end, os.SEEK_SET)
436 dfh.seek(data_end, os.SEEK_SET)
437 except FileNotFoundError:
437 except FileNotFoundError:
438 dfh = self.opener(self.data_file, mode=b"w+")
438 dfh = self.opener(self.data_file, mode=b"w+")
439 transaction.add(self.data_file, dsize)
439 transaction.add(self.data_file, dsize)
440 if self.sidedata_file is not None:
440 if self.sidedata_file is not None:
441 assert sidedata_end is not None
441 assert sidedata_end is not None
442 # revlog-v2 does not inline, help Pytype
442 # revlog-v2 does not inline, help Pytype
443 assert dfh is not None
443 assert dfh is not None
444 try:
444 try:
445 sdfh = self.opener(self.sidedata_file, mode=b"r+")
445 sdfh = self.opener(self.sidedata_file, mode=b"r+")
446 dfh.seek(sidedata_end, os.SEEK_SET)
446 dfh.seek(sidedata_end, os.SEEK_SET)
447 except FileNotFoundError:
447 except FileNotFoundError:
448 sdfh = self.opener(self.sidedata_file, mode=b"w+")
448 sdfh = self.opener(self.sidedata_file, mode=b"w+")
449 transaction.add(self.sidedata_file, sidedata_end)
449 transaction.add(self.sidedata_file, sidedata_end)
450
450
451 # opening the index file.
451 # opening the index file.
452 isize = r * self.index.entry_size
452 isize = r * self.index.entry_size
453 ifh = self.__index_write_fp()
453 ifh = self.__index_write_fp()
454 if self.inline:
454 if self.inline:
455 transaction.add(self.index_file, dsize + isize)
455 transaction.add(self.index_file, dsize + isize)
456 else:
456 else:
457 transaction.add(self.index_file, isize)
457 transaction.add(self.index_file, isize)
458 # exposing all file handle for writing.
458 # exposing all file handle for writing.
459 self._writinghandles = (ifh, dfh, sdfh)
459 self._writinghandles = (ifh, dfh, sdfh)
460 self._segmentfile.writing_handle = ifh if self.inline else dfh
460 self._segmentfile.writing_handle = ifh if self.inline else dfh
461 self._segmentfile_sidedata.writing_handle = sdfh
461 self._segmentfile_sidedata.writing_handle = sdfh
462 yield
462 yield
463 finally:
463 finally:
464 self._writinghandles = None
464 self._writinghandles = None
465 self._segmentfile.writing_handle = None
465 self._segmentfile.writing_handle = None
466 self._segmentfile_sidedata.writing_handle = None
466 self._segmentfile_sidedata.writing_handle = None
467 if dfh is not None:
467 if dfh is not None:
468 dfh.close()
468 dfh.close()
469 if sdfh is not None:
469 if sdfh is not None:
470 sdfh.close()
470 sdfh.close()
471 # closing the index file last to avoid exposing referent to
471 # closing the index file last to avoid exposing referent to
472 # potential unflushed data content.
472 # potential unflushed data content.
473 if ifh is not None:
473 if ifh is not None:
474 ifh.close()
474 ifh.close()
475
475
476 def __index_write_fp(self, index_end=None):
476 def __index_write_fp(self, index_end=None):
477 """internal method to open the index file for writing
477 """internal method to open the index file for writing
478
478
479 You should not use this directly and use `_writing` instead
479 You should not use this directly and use `_writing` instead
480 """
480 """
481 try:
481 try:
482 f = self.opener(
482 f = self.opener(
483 self.index_file,
483 self.index_file,
484 mode=b"r+",
484 mode=b"r+",
485 checkambig=self.data_config.check_ambig,
485 checkambig=self.data_config.check_ambig,
486 )
486 )
487 if index_end is None:
487 if index_end is None:
488 f.seek(0, os.SEEK_END)
488 f.seek(0, os.SEEK_END)
489 else:
489 else:
490 f.seek(index_end, os.SEEK_SET)
490 f.seek(index_end, os.SEEK_SET)
491 return f
491 return f
492 except FileNotFoundError:
492 except FileNotFoundError:
493 return self.opener(
493 return self.opener(
494 self.index_file,
494 self.index_file,
495 mode=b"w+",
495 mode=b"w+",
496 checkambig=self.data_config.check_ambig,
496 checkambig=self.data_config.check_ambig,
497 )
497 )
498
498
499 def __index_new_fp(self):
499 def __index_new_fp(self):
500 """internal method to create a new index file for writing
500 """internal method to create a new index file for writing
501
501
502 You should not use this unless you are upgrading from inline revlog
502 You should not use this unless you are upgrading from inline revlog
503 """
503 """
504 return self.opener(
504 return self.opener(
505 self.index_file,
505 self.index_file,
506 mode=b"w",
506 mode=b"w",
507 checkambig=self.data_config.check_ambig,
507 checkambig=self.data_config.check_ambig,
508 atomictemp=True,
508 atomictemp=True,
509 )
509 )
510
510
511 def get_segment_for_revs(self, startrev, endrev):
512 """Obtain a segment of raw data corresponding to a range of revisions.
513
514 Accepts the start and end revisions and an optional already-open
515 file handle to be used for reading. If the file handle is read, its
516 seek position will not be preserved.
517
518 Requests for data may be satisfied by a cache.
519
520 Returns a 2-tuple of (offset, data) for the requested range of
521 revisions. Offset is the integer offset from the beginning of the
522 revlog and data is a str or buffer of the raw byte data.
523
524 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
525 to determine where each revision's data begins and ends.
526
527 API: we should consider making this a private part of the InnerRevlog
528 at some point.
529 """
530 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
531 # (functions are expensive).
532 index = self.index
533 istart = index[startrev]
534 start = int(istart[0] >> 16)
535 if startrev == endrev:
536 end = start + istart[1]
537 else:
538 iend = index[endrev]
539 end = int(iend[0] >> 16) + iend[1]
540
541 if self.inline:
542 start += (startrev + 1) * self.index.entry_size
543 end += (endrev + 1) * self.index.entry_size
544 length = end - start
545
546 return start, self._segmentfile.read_chunk(start, length)
547
511
548
512 class revlog:
549 class revlog:
513 """
550 """
514 the underlying revision storage object
551 the underlying revision storage object
515
552
516 A revlog consists of two parts, an index and the revision data.
553 A revlog consists of two parts, an index and the revision data.
517
554
518 The index is a file with a fixed record size containing
555 The index is a file with a fixed record size containing
519 information on each revision, including its nodeid (hash), the
556 information on each revision, including its nodeid (hash), the
520 nodeids of its parents, the position and offset of its data within
557 nodeids of its parents, the position and offset of its data within
521 the data file, and the revision it's based on. Finally, each entry
558 the data file, and the revision it's based on. Finally, each entry
522 contains a linkrev entry that can serve as a pointer to external
559 contains a linkrev entry that can serve as a pointer to external
523 data.
560 data.
524
561
525 The revision data itself is a linear collection of data chunks.
562 The revision data itself is a linear collection of data chunks.
526 Each chunk represents a revision and is usually represented as a
563 Each chunk represents a revision and is usually represented as a
527 delta against the previous chunk. To bound lookup time, runs of
564 delta against the previous chunk. To bound lookup time, runs of
528 deltas are limited to about 2 times the length of the original
565 deltas are limited to about 2 times the length of the original
529 version data. This makes retrieval of a version proportional to
566 version data. This makes retrieval of a version proportional to
530 its size, or O(1) relative to the number of revisions.
567 its size, or O(1) relative to the number of revisions.
531
568
532 Both pieces of the revlog are written to in an append-only
569 Both pieces of the revlog are written to in an append-only
533 fashion, which means we never need to rewrite a file to insert or
570 fashion, which means we never need to rewrite a file to insert or
534 remove data, and can use some simple techniques to avoid the need
571 remove data, and can use some simple techniques to avoid the need
535 for locking while reading.
572 for locking while reading.
536
573
537 If checkambig, indexfile is opened with checkambig=True at
574 If checkambig, indexfile is opened with checkambig=True at
538 writing, to avoid file stat ambiguity.
575 writing, to avoid file stat ambiguity.
539
576
540 If mmaplargeindex is True, and an mmapindexthreshold is set, the
577 If mmaplargeindex is True, and an mmapindexthreshold is set, the
541 index will be mmapped rather than read if it is larger than the
578 index will be mmapped rather than read if it is larger than the
542 configured threshold.
579 configured threshold.
543
580
544 If censorable is True, the revlog can have censored revisions.
581 If censorable is True, the revlog can have censored revisions.
545
582
546 If `upperboundcomp` is not None, this is the expected maximal gain from
583 If `upperboundcomp` is not None, this is the expected maximal gain from
547 compression for the data content.
584 compression for the data content.
548
585
549 `concurrencychecker` is an optional function that receives 3 arguments: a
586 `concurrencychecker` is an optional function that receives 3 arguments: a
550 file handle, a filename, and an expected position. It should check whether
587 file handle, a filename, and an expected position. It should check whether
551 the current position in the file handle is valid, and log/warn/fail (by
588 the current position in the file handle is valid, and log/warn/fail (by
552 raising).
589 raising).
553
590
554 See mercurial/revlogutils/contants.py for details about the content of an
591 See mercurial/revlogutils/contants.py for details about the content of an
555 index entry.
592 index entry.
556 """
593 """
557
594
558 _flagserrorclass = error.RevlogError
595 _flagserrorclass = error.RevlogError
559
596
560 @staticmethod
597 @staticmethod
561 def is_inline_index(header_bytes):
598 def is_inline_index(header_bytes):
562 """Determine if a revlog is inline from the initial bytes of the index"""
599 """Determine if a revlog is inline from the initial bytes of the index"""
563 header = INDEX_HEADER.unpack(header_bytes)[0]
600 header = INDEX_HEADER.unpack(header_bytes)[0]
564
601
565 _format_flags = header & ~0xFFFF
602 _format_flags = header & ~0xFFFF
566 _format_version = header & 0xFFFF
603 _format_version = header & 0xFFFF
567
604
568 features = FEATURES_BY_VERSION[_format_version]
605 features = FEATURES_BY_VERSION[_format_version]
569 return features[b'inline'](_format_flags)
606 return features[b'inline'](_format_flags)
570
607
571 def __init__(
608 def __init__(
572 self,
609 self,
573 opener,
610 opener,
574 target,
611 target,
575 radix,
612 radix,
576 postfix=None, # only exist for `tmpcensored` now
613 postfix=None, # only exist for `tmpcensored` now
577 checkambig=False,
614 checkambig=False,
578 mmaplargeindex=False,
615 mmaplargeindex=False,
579 censorable=False,
616 censorable=False,
580 upperboundcomp=None,
617 upperboundcomp=None,
581 persistentnodemap=False,
618 persistentnodemap=False,
582 concurrencychecker=None,
619 concurrencychecker=None,
583 trypending=False,
620 trypending=False,
584 try_split=False,
621 try_split=False,
585 canonical_parent_order=True,
622 canonical_parent_order=True,
586 ):
623 ):
587 """
624 """
588 create a revlog object
625 create a revlog object
589
626
590 opener is a function that abstracts the file opening operation
627 opener is a function that abstracts the file opening operation
591 and can be used to implement COW semantics or the like.
628 and can be used to implement COW semantics or the like.
592
629
593 `target`: a (KIND, ID) tuple that identify the content stored in
630 `target`: a (KIND, ID) tuple that identify the content stored in
594 this revlog. It help the rest of the code to understand what the revlog
631 this revlog. It help the rest of the code to understand what the revlog
595 is about without having to resort to heuristic and index filename
632 is about without having to resort to heuristic and index filename
596 analysis. Note: that this must be reliably be set by normal code, but
633 analysis. Note: that this must be reliably be set by normal code, but
597 that test, debug, or performance measurement code might not set this to
634 that test, debug, or performance measurement code might not set this to
598 accurate value.
635 accurate value.
599 """
636 """
600
637
601 self.radix = radix
638 self.radix = radix
602
639
603 self._docket_file = None
640 self._docket_file = None
604 self._indexfile = None
641 self._indexfile = None
605 self._datafile = None
642 self._datafile = None
606 self._sidedatafile = None
643 self._sidedatafile = None
607 self._nodemap_file = None
644 self._nodemap_file = None
608 self.postfix = postfix
645 self.postfix = postfix
609 self._trypending = trypending
646 self._trypending = trypending
610 self._try_split = try_split
647 self._try_split = try_split
611 self.opener = opener
648 self.opener = opener
612 if persistentnodemap:
649 if persistentnodemap:
613 self._nodemap_file = nodemaputil.get_nodemap_file(self)
650 self._nodemap_file = nodemaputil.get_nodemap_file(self)
614
651
615 assert target[0] in ALL_KINDS
652 assert target[0] in ALL_KINDS
616 assert len(target) == 2
653 assert len(target) == 2
617 self.target = target
654 self.target = target
618 if b'feature-config' in self.opener.options:
655 if b'feature-config' in self.opener.options:
619 self.feature_config = self.opener.options[b'feature-config'].copy()
656 self.feature_config = self.opener.options[b'feature-config'].copy()
620 else:
657 else:
621 self.feature_config = FeatureConfig()
658 self.feature_config = FeatureConfig()
622 self.feature_config.censorable = censorable
659 self.feature_config.censorable = censorable
623 self.feature_config.canonical_parent_order = canonical_parent_order
660 self.feature_config.canonical_parent_order = canonical_parent_order
624 if b'data-config' in self.opener.options:
661 if b'data-config' in self.opener.options:
625 self.data_config = self.opener.options[b'data-config'].copy()
662 self.data_config = self.opener.options[b'data-config'].copy()
626 else:
663 else:
627 self.data_config = DataConfig()
664 self.data_config = DataConfig()
628 self.data_config.check_ambig = checkambig
665 self.data_config.check_ambig = checkambig
629 self.data_config.mmap_large_index = mmaplargeindex
666 self.data_config.mmap_large_index = mmaplargeindex
630 if b'delta-config' in self.opener.options:
667 if b'delta-config' in self.opener.options:
631 self.delta_config = self.opener.options[b'delta-config'].copy()
668 self.delta_config = self.opener.options[b'delta-config'].copy()
632 else:
669 else:
633 self.delta_config = DeltaConfig()
670 self.delta_config = DeltaConfig()
634 self.delta_config.upper_bound_comp = upperboundcomp
671 self.delta_config.upper_bound_comp = upperboundcomp
635
672
636 # 3-tuple of (node, rev, text) for a raw revision.
673 # 3-tuple of (node, rev, text) for a raw revision.
637 self._revisioncache = None
674 self._revisioncache = None
638 # Maps rev to chain base rev.
675 # Maps rev to chain base rev.
639 self._chainbasecache = util.lrucachedict(100)
676 self._chainbasecache = util.lrucachedict(100)
640
677
641 self.index = None
678 self.index = None
642 self._docket = None
679 self._docket = None
643 self._nodemap_docket = None
680 self._nodemap_docket = None
644 # Mapping of partial identifiers to full nodes.
681 # Mapping of partial identifiers to full nodes.
645 self._pcache = {}
682 self._pcache = {}
646
683
647 # other optionnals features
684 # other optionnals features
648
685
649 # Make copy of flag processors so each revlog instance can support
686 # Make copy of flag processors so each revlog instance can support
650 # custom flags.
687 # custom flags.
651 self._flagprocessors = dict(flagutil.flagprocessors)
688 self._flagprocessors = dict(flagutil.flagprocessors)
652 # prevent nesting of addgroup
689 # prevent nesting of addgroup
653 self._adding_group = None
690 self._adding_group = None
654
691
655 chunk_cache = self._loadindex()
692 chunk_cache = self._loadindex()
656 self._load_inner(chunk_cache)
693 self._load_inner(chunk_cache)
657
694
658 self._concurrencychecker = concurrencychecker
695 self._concurrencychecker = concurrencychecker
659
696
660 @property
697 @property
661 def _generaldelta(self):
698 def _generaldelta(self):
662 """temporary compatibility proxy"""
699 """temporary compatibility proxy"""
663 util.nouideprecwarn(
700 util.nouideprecwarn(
664 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
701 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
665 )
702 )
666 return self.delta_config.general_delta
703 return self.delta_config.general_delta
667
704
668 @property
705 @property
669 def _checkambig(self):
706 def _checkambig(self):
670 """temporary compatibility proxy"""
707 """temporary compatibility proxy"""
671 util.nouideprecwarn(
708 util.nouideprecwarn(
672 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
709 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
673 )
710 )
674 return self.data_config.check_ambig
711 return self.data_config.check_ambig
675
712
676 @property
713 @property
677 def _mmaplargeindex(self):
714 def _mmaplargeindex(self):
678 """temporary compatibility proxy"""
715 """temporary compatibility proxy"""
679 util.nouideprecwarn(
716 util.nouideprecwarn(
680 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
717 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
681 )
718 )
682 return self.data_config.mmap_large_index
719 return self.data_config.mmap_large_index
683
720
684 @property
721 @property
685 def _censorable(self):
722 def _censorable(self):
686 """temporary compatibility proxy"""
723 """temporary compatibility proxy"""
687 util.nouideprecwarn(
724 util.nouideprecwarn(
688 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
725 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
689 )
726 )
690 return self.feature_config.censorable
727 return self.feature_config.censorable
691
728
692 @property
729 @property
693 def _chunkcachesize(self):
730 def _chunkcachesize(self):
694 """temporary compatibility proxy"""
731 """temporary compatibility proxy"""
695 util.nouideprecwarn(
732 util.nouideprecwarn(
696 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
733 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
697 )
734 )
698 return self.data_config.chunk_cache_size
735 return self.data_config.chunk_cache_size
699
736
700 @property
737 @property
701 def _maxchainlen(self):
738 def _maxchainlen(self):
702 """temporary compatibility proxy"""
739 """temporary compatibility proxy"""
703 util.nouideprecwarn(
740 util.nouideprecwarn(
704 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
741 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
705 )
742 )
706 return self.delta_config.max_chain_len
743 return self.delta_config.max_chain_len
707
744
708 @property
745 @property
709 def _deltabothparents(self):
746 def _deltabothparents(self):
710 """temporary compatibility proxy"""
747 """temporary compatibility proxy"""
711 util.nouideprecwarn(
748 util.nouideprecwarn(
712 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
749 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
713 )
750 )
714 return self.delta_config.delta_both_parents
751 return self.delta_config.delta_both_parents
715
752
716 @property
753 @property
717 def _candidate_group_chunk_size(self):
754 def _candidate_group_chunk_size(self):
718 """temporary compatibility proxy"""
755 """temporary compatibility proxy"""
719 util.nouideprecwarn(
756 util.nouideprecwarn(
720 b"use revlog.delta_config.candidate_group_chunk_size",
757 b"use revlog.delta_config.candidate_group_chunk_size",
721 b"6.6",
758 b"6.6",
722 stacklevel=2,
759 stacklevel=2,
723 )
760 )
724 return self.delta_config.candidate_group_chunk_size
761 return self.delta_config.candidate_group_chunk_size
725
762
726 @property
763 @property
727 def _debug_delta(self):
764 def _debug_delta(self):
728 """temporary compatibility proxy"""
765 """temporary compatibility proxy"""
729 util.nouideprecwarn(
766 util.nouideprecwarn(
730 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
767 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
731 )
768 )
732 return self.delta_config.debug_delta
769 return self.delta_config.debug_delta
733
770
734 @property
771 @property
735 def _compengine(self):
772 def _compengine(self):
736 """temporary compatibility proxy"""
773 """temporary compatibility proxy"""
737 util.nouideprecwarn(
774 util.nouideprecwarn(
738 b"use revlog.feature_config.compression_engine",
775 b"use revlog.feature_config.compression_engine",
739 b"6.6",
776 b"6.6",
740 stacklevel=2,
777 stacklevel=2,
741 )
778 )
742 return self.feature_config.compression_engine
779 return self.feature_config.compression_engine
743
780
744 @property
781 @property
745 def upperboundcomp(self):
782 def upperboundcomp(self):
746 """temporary compatibility proxy"""
783 """temporary compatibility proxy"""
747 util.nouideprecwarn(
784 util.nouideprecwarn(
748 b"use revlog.delta_config.upper_bound_comp",
785 b"use revlog.delta_config.upper_bound_comp",
749 b"6.6",
786 b"6.6",
750 stacklevel=2,
787 stacklevel=2,
751 )
788 )
752 return self.delta_config.upper_bound_comp
789 return self.delta_config.upper_bound_comp
753
790
754 @property
791 @property
755 def _compengineopts(self):
792 def _compengineopts(self):
756 """temporary compatibility proxy"""
793 """temporary compatibility proxy"""
757 util.nouideprecwarn(
794 util.nouideprecwarn(
758 b"use revlog.feature_config.compression_engine_options",
795 b"use revlog.feature_config.compression_engine_options",
759 b"6.6",
796 b"6.6",
760 stacklevel=2,
797 stacklevel=2,
761 )
798 )
762 return self.feature_config.compression_engine_options
799 return self.feature_config.compression_engine_options
763
800
764 @property
801 @property
765 def _maxdeltachainspan(self):
802 def _maxdeltachainspan(self):
766 """temporary compatibility proxy"""
803 """temporary compatibility proxy"""
767 util.nouideprecwarn(
804 util.nouideprecwarn(
768 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
805 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
769 )
806 )
770 return self.delta_config.max_deltachain_span
807 return self.delta_config.max_deltachain_span
771
808
772 @property
809 @property
773 def _withsparseread(self):
810 def _withsparseread(self):
774 """temporary compatibility proxy"""
811 """temporary compatibility proxy"""
775 util.nouideprecwarn(
812 util.nouideprecwarn(
776 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
813 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
777 )
814 )
778 return self.data_config.with_sparse_read
815 return self.data_config.with_sparse_read
779
816
780 @property
817 @property
781 def _sparserevlog(self):
818 def _sparserevlog(self):
782 """temporary compatibility proxy"""
819 """temporary compatibility proxy"""
783 util.nouideprecwarn(
820 util.nouideprecwarn(
784 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
821 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
785 )
822 )
786 return self.delta_config.sparse_revlog
823 return self.delta_config.sparse_revlog
787
824
788 @property
825 @property
789 def hassidedata(self):
826 def hassidedata(self):
790 """temporary compatibility proxy"""
827 """temporary compatibility proxy"""
791 util.nouideprecwarn(
828 util.nouideprecwarn(
792 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
829 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
793 )
830 )
794 return self.feature_config.has_side_data
831 return self.feature_config.has_side_data
795
832
796 @property
833 @property
797 def _srdensitythreshold(self):
834 def _srdensitythreshold(self):
798 """temporary compatibility proxy"""
835 """temporary compatibility proxy"""
799 util.nouideprecwarn(
836 util.nouideprecwarn(
800 b"use revlog.data_config.sr_density_threshold",
837 b"use revlog.data_config.sr_density_threshold",
801 b"6.6",
838 b"6.6",
802 stacklevel=2,
839 stacklevel=2,
803 )
840 )
804 return self.data_config.sr_density_threshold
841 return self.data_config.sr_density_threshold
805
842
806 @property
843 @property
807 def _srmingapsize(self):
844 def _srmingapsize(self):
808 """temporary compatibility proxy"""
845 """temporary compatibility proxy"""
809 util.nouideprecwarn(
846 util.nouideprecwarn(
810 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
847 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
811 )
848 )
812 return self.data_config.sr_min_gap_size
849 return self.data_config.sr_min_gap_size
813
850
814 @property
851 @property
815 def _compute_rank(self):
852 def _compute_rank(self):
816 """temporary compatibility proxy"""
853 """temporary compatibility proxy"""
817 util.nouideprecwarn(
854 util.nouideprecwarn(
818 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
855 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
819 )
856 )
820 return self.feature_config.compute_rank
857 return self.feature_config.compute_rank
821
858
822 @property
859 @property
823 def canonical_parent_order(self):
860 def canonical_parent_order(self):
824 """temporary compatibility proxy"""
861 """temporary compatibility proxy"""
825 util.nouideprecwarn(
862 util.nouideprecwarn(
826 b"use revlog.feature_config.canonical_parent_order",
863 b"use revlog.feature_config.canonical_parent_order",
827 b"6.6",
864 b"6.6",
828 stacklevel=2,
865 stacklevel=2,
829 )
866 )
830 return self.feature_config.canonical_parent_order
867 return self.feature_config.canonical_parent_order
831
868
832 @property
869 @property
833 def _lazydelta(self):
870 def _lazydelta(self):
834 """temporary compatibility proxy"""
871 """temporary compatibility proxy"""
835 util.nouideprecwarn(
872 util.nouideprecwarn(
836 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
873 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
837 )
874 )
838 return self.delta_config.lazy_delta
875 return self.delta_config.lazy_delta
839
876
840 @property
877 @property
841 def _lazydeltabase(self):
878 def _lazydeltabase(self):
842 """temporary compatibility proxy"""
879 """temporary compatibility proxy"""
843 util.nouideprecwarn(
880 util.nouideprecwarn(
844 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
881 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
845 )
882 )
846 return self.delta_config.lazy_delta_base
883 return self.delta_config.lazy_delta_base
847
884
848 def _init_opts(self):
885 def _init_opts(self):
849 """process options (from above/config) to setup associated default revlog mode
886 """process options (from above/config) to setup associated default revlog mode
850
887
851 These values might be affected when actually reading on disk information.
888 These values might be affected when actually reading on disk information.
852
889
853 The relevant values are returned for use in _loadindex().
890 The relevant values are returned for use in _loadindex().
854
891
855 * newversionflags:
892 * newversionflags:
856 version header to use if we need to create a new revlog
893 version header to use if we need to create a new revlog
857
894
858 * mmapindexthreshold:
895 * mmapindexthreshold:
859 minimal index size for start to use mmap
896 minimal index size for start to use mmap
860
897
861 * force_nodemap:
898 * force_nodemap:
862 force the usage of a "development" version of the nodemap code
899 force the usage of a "development" version of the nodemap code
863 """
900 """
864 opts = self.opener.options
901 opts = self.opener.options
865
902
866 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
903 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
867 new_header = CHANGELOGV2
904 new_header = CHANGELOGV2
868 compute_rank = opts.get(b'changelogv2.compute-rank', True)
905 compute_rank = opts.get(b'changelogv2.compute-rank', True)
869 self.feature_config.compute_rank = compute_rank
906 self.feature_config.compute_rank = compute_rank
870 elif b'revlogv2' in opts:
907 elif b'revlogv2' in opts:
871 new_header = REVLOGV2
908 new_header = REVLOGV2
872 elif b'revlogv1' in opts:
909 elif b'revlogv1' in opts:
873 new_header = REVLOGV1 | FLAG_INLINE_DATA
910 new_header = REVLOGV1 | FLAG_INLINE_DATA
874 if b'generaldelta' in opts:
911 if b'generaldelta' in opts:
875 new_header |= FLAG_GENERALDELTA
912 new_header |= FLAG_GENERALDELTA
876 elif b'revlogv0' in self.opener.options:
913 elif b'revlogv0' in self.opener.options:
877 new_header = REVLOGV0
914 new_header = REVLOGV0
878 else:
915 else:
879 new_header = REVLOG_DEFAULT_VERSION
916 new_header = REVLOG_DEFAULT_VERSION
880
917
881 mmapindexthreshold = None
918 mmapindexthreshold = None
882 if self.data_config.mmap_large_index:
919 if self.data_config.mmap_large_index:
883 mmapindexthreshold = self.data_config.mmap_index_threshold
920 mmapindexthreshold = self.data_config.mmap_index_threshold
884 if self.feature_config.enable_ellipsis:
921 if self.feature_config.enable_ellipsis:
885 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
922 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
886
923
887 # revlog v0 doesn't have flag processors
924 # revlog v0 doesn't have flag processors
888 for flag, processor in opts.get(b'flagprocessors', {}).items():
925 for flag, processor in opts.get(b'flagprocessors', {}).items():
889 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
926 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
890
927
891 chunk_cache_size = self.data_config.chunk_cache_size
928 chunk_cache_size = self.data_config.chunk_cache_size
892 if chunk_cache_size <= 0:
929 if chunk_cache_size <= 0:
893 raise error.RevlogError(
930 raise error.RevlogError(
894 _(b'revlog chunk cache size %r is not greater than 0')
931 _(b'revlog chunk cache size %r is not greater than 0')
895 % chunk_cache_size
932 % chunk_cache_size
896 )
933 )
897 elif chunk_cache_size & (chunk_cache_size - 1):
934 elif chunk_cache_size & (chunk_cache_size - 1):
898 raise error.RevlogError(
935 raise error.RevlogError(
899 _(b'revlog chunk cache size %r is not a power of 2')
936 _(b'revlog chunk cache size %r is not a power of 2')
900 % chunk_cache_size
937 % chunk_cache_size
901 )
938 )
902 force_nodemap = opts.get(b'devel-force-nodemap', False)
939 force_nodemap = opts.get(b'devel-force-nodemap', False)
903 return new_header, mmapindexthreshold, force_nodemap
940 return new_header, mmapindexthreshold, force_nodemap
904
941
905 def _get_data(self, filepath, mmap_threshold, size=None):
942 def _get_data(self, filepath, mmap_threshold, size=None):
906 """return a file content with or without mmap
943 """return a file content with or without mmap
907
944
908 If the file is missing return the empty string"""
945 If the file is missing return the empty string"""
909 try:
946 try:
910 with self.opener(filepath) as fp:
947 with self.opener(filepath) as fp:
911 if mmap_threshold is not None:
948 if mmap_threshold is not None:
912 file_size = self.opener.fstat(fp).st_size
949 file_size = self.opener.fstat(fp).st_size
913 if file_size >= mmap_threshold:
950 if file_size >= mmap_threshold:
914 if size is not None:
951 if size is not None:
915 # avoid potentiel mmap crash
952 # avoid potentiel mmap crash
916 size = min(file_size, size)
953 size = min(file_size, size)
917 # TODO: should .close() to release resources without
954 # TODO: should .close() to release resources without
918 # relying on Python GC
955 # relying on Python GC
919 if size is None:
956 if size is None:
920 return util.buffer(util.mmapread(fp))
957 return util.buffer(util.mmapread(fp))
921 else:
958 else:
922 return util.buffer(util.mmapread(fp, size))
959 return util.buffer(util.mmapread(fp, size))
923 if size is None:
960 if size is None:
924 return fp.read()
961 return fp.read()
925 else:
962 else:
926 return fp.read(size)
963 return fp.read(size)
927 except FileNotFoundError:
964 except FileNotFoundError:
928 return b''
965 return b''
929
966
930 def get_streams(self, max_linkrev, force_inline=False):
967 def get_streams(self, max_linkrev, force_inline=False):
931 """return a list of streams that represent this revlog
968 """return a list of streams that represent this revlog
932
969
933 This is used by stream-clone to do bytes to bytes copies of a repository.
970 This is used by stream-clone to do bytes to bytes copies of a repository.
934
971
935 This streams data for all revisions that refer to a changelog revision up
972 This streams data for all revisions that refer to a changelog revision up
936 to `max_linkrev`.
973 to `max_linkrev`.
937
974
938 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
975 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
939
976
940 It returns is a list of three-tuple:
977 It returns is a list of three-tuple:
941
978
942 [
979 [
943 (filename, bytes_stream, stream_size),
980 (filename, bytes_stream, stream_size),
944 …
981 …
945 ]
982 ]
946 """
983 """
947 n = len(self)
984 n = len(self)
948 index = self.index
985 index = self.index
949 while n > 0:
986 while n > 0:
950 linkrev = index[n - 1][4]
987 linkrev = index[n - 1][4]
951 if linkrev < max_linkrev:
988 if linkrev < max_linkrev:
952 break
989 break
953 # note: this loop will rarely go through multiple iterations, since
990 # note: this loop will rarely go through multiple iterations, since
954 # it only traverses commits created during the current streaming
991 # it only traverses commits created during the current streaming
955 # pull operation.
992 # pull operation.
956 #
993 #
957 # If this become a problem, using a binary search should cap the
994 # If this become a problem, using a binary search should cap the
958 # runtime of this.
995 # runtime of this.
959 n = n - 1
996 n = n - 1
960 if n == 0:
997 if n == 0:
961 # no data to send
998 # no data to send
962 return []
999 return []
963 index_size = n * index.entry_size
1000 index_size = n * index.entry_size
964 data_size = self.end(n - 1)
1001 data_size = self.end(n - 1)
965
1002
966 # XXX we might have been split (or stripped) since the object
1003 # XXX we might have been split (or stripped) since the object
967 # initialization, We need to close this race too, but having a way to
1004 # initialization, We need to close this race too, but having a way to
968 # pre-open the file we feed to the revlog and never closing them before
1005 # pre-open the file we feed to the revlog and never closing them before
969 # we are done streaming.
1006 # we are done streaming.
970
1007
971 if self._inline:
1008 if self._inline:
972
1009
973 def get_stream():
1010 def get_stream():
974 with self.opener(self._indexfile, mode=b"r") as fp:
1011 with self.opener(self._indexfile, mode=b"r") as fp:
975 yield None
1012 yield None
976 size = index_size + data_size
1013 size = index_size + data_size
977 if size <= 65536:
1014 if size <= 65536:
978 yield fp.read(size)
1015 yield fp.read(size)
979 else:
1016 else:
980 yield from util.filechunkiter(fp, limit=size)
1017 yield from util.filechunkiter(fp, limit=size)
981
1018
982 inline_stream = get_stream()
1019 inline_stream = get_stream()
983 next(inline_stream)
1020 next(inline_stream)
984 return [
1021 return [
985 (self._indexfile, inline_stream, index_size + data_size),
1022 (self._indexfile, inline_stream, index_size + data_size),
986 ]
1023 ]
987 elif force_inline:
1024 elif force_inline:
988
1025
989 def get_stream():
1026 def get_stream():
990 with self.reading():
1027 with self.reading():
991 yield None
1028 yield None
992
1029
993 for rev in range(n):
1030 for rev in range(n):
994 idx = self.index.entry_binary(rev)
1031 idx = self.index.entry_binary(rev)
995 if rev == 0 and self._docket is None:
1032 if rev == 0 and self._docket is None:
996 # re-inject the inline flag
1033 # re-inject the inline flag
997 header = self._format_flags
1034 header = self._format_flags
998 header |= self._format_version
1035 header |= self._format_version
999 header |= FLAG_INLINE_DATA
1036 header |= FLAG_INLINE_DATA
1000 header = self.index.pack_header(header)
1037 header = self.index.pack_header(header)
1001 idx = header + idx
1038 idx = header + idx
1002 yield idx
1039 yield idx
1003 yield self._getsegmentforrevs(rev, rev)[1]
1040 yield self._inner.get_segment_for_revs(rev, rev)[1]
1004
1041
1005 inline_stream = get_stream()
1042 inline_stream = get_stream()
1006 next(inline_stream)
1043 next(inline_stream)
1007 return [
1044 return [
1008 (self._indexfile, inline_stream, index_size + data_size),
1045 (self._indexfile, inline_stream, index_size + data_size),
1009 ]
1046 ]
1010 else:
1047 else:
1011
1048
1012 def get_index_stream():
1049 def get_index_stream():
1013 with self.opener(self._indexfile, mode=b"r") as fp:
1050 with self.opener(self._indexfile, mode=b"r") as fp:
1014 yield None
1051 yield None
1015 if index_size <= 65536:
1052 if index_size <= 65536:
1016 yield fp.read(index_size)
1053 yield fp.read(index_size)
1017 else:
1054 else:
1018 yield from util.filechunkiter(fp, limit=index_size)
1055 yield from util.filechunkiter(fp, limit=index_size)
1019
1056
1020 def get_data_stream():
1057 def get_data_stream():
1021 with self._datafp() as fp:
1058 with self._datafp() as fp:
1022 yield None
1059 yield None
1023 if data_size <= 65536:
1060 if data_size <= 65536:
1024 yield fp.read(data_size)
1061 yield fp.read(data_size)
1025 else:
1062 else:
1026 yield from util.filechunkiter(fp, limit=data_size)
1063 yield from util.filechunkiter(fp, limit=data_size)
1027
1064
1028 index_stream = get_index_stream()
1065 index_stream = get_index_stream()
1029 next(index_stream)
1066 next(index_stream)
1030 data_stream = get_data_stream()
1067 data_stream = get_data_stream()
1031 next(data_stream)
1068 next(data_stream)
1032 return [
1069 return [
1033 (self._datafile, data_stream, data_size),
1070 (self._datafile, data_stream, data_size),
1034 (self._indexfile, index_stream, index_size),
1071 (self._indexfile, index_stream, index_size),
1035 ]
1072 ]
1036
1073
1037 def _loadindex(self, docket=None):
1074 def _loadindex(self, docket=None):
1038
1075
1039 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1076 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1040
1077
1041 if self.postfix is not None:
1078 if self.postfix is not None:
1042 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1079 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1043 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1080 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1044 entry_point = b'%s.i.a' % self.radix
1081 entry_point = b'%s.i.a' % self.radix
1045 elif self._try_split and self.opener.exists(self._split_index_file):
1082 elif self._try_split and self.opener.exists(self._split_index_file):
1046 entry_point = self._split_index_file
1083 entry_point = self._split_index_file
1047 else:
1084 else:
1048 entry_point = b'%s.i' % self.radix
1085 entry_point = b'%s.i' % self.radix
1049
1086
1050 if docket is not None:
1087 if docket is not None:
1051 self._docket = docket
1088 self._docket = docket
1052 self._docket_file = entry_point
1089 self._docket_file = entry_point
1053 else:
1090 else:
1054 self._initempty = True
1091 self._initempty = True
1055 entry_data = self._get_data(entry_point, mmapindexthreshold)
1092 entry_data = self._get_data(entry_point, mmapindexthreshold)
1056 if len(entry_data) > 0:
1093 if len(entry_data) > 0:
1057 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1094 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1058 self._initempty = False
1095 self._initempty = False
1059 else:
1096 else:
1060 header = new_header
1097 header = new_header
1061
1098
1062 self._format_flags = header & ~0xFFFF
1099 self._format_flags = header & ~0xFFFF
1063 self._format_version = header & 0xFFFF
1100 self._format_version = header & 0xFFFF
1064
1101
1065 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1102 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1066 if supported_flags is None:
1103 if supported_flags is None:
1067 msg = _(b'unknown version (%d) in revlog %s')
1104 msg = _(b'unknown version (%d) in revlog %s')
1068 msg %= (self._format_version, self.display_id)
1105 msg %= (self._format_version, self.display_id)
1069 raise error.RevlogError(msg)
1106 raise error.RevlogError(msg)
1070 elif self._format_flags & ~supported_flags:
1107 elif self._format_flags & ~supported_flags:
1071 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1108 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1072 display_flag = self._format_flags >> 16
1109 display_flag = self._format_flags >> 16
1073 msg %= (display_flag, self._format_version, self.display_id)
1110 msg %= (display_flag, self._format_version, self.display_id)
1074 raise error.RevlogError(msg)
1111 raise error.RevlogError(msg)
1075
1112
1076 features = FEATURES_BY_VERSION[self._format_version]
1113 features = FEATURES_BY_VERSION[self._format_version]
1077 self._inline = features[b'inline'](self._format_flags)
1114 self._inline = features[b'inline'](self._format_flags)
1078 self.delta_config.general_delta = features[b'generaldelta'](
1115 self.delta_config.general_delta = features[b'generaldelta'](
1079 self._format_flags
1116 self._format_flags
1080 )
1117 )
1081 self.feature_config.has_side_data = features[b'sidedata']
1118 self.feature_config.has_side_data = features[b'sidedata']
1082
1119
1083 if not features[b'docket']:
1120 if not features[b'docket']:
1084 self._indexfile = entry_point
1121 self._indexfile = entry_point
1085 index_data = entry_data
1122 index_data = entry_data
1086 else:
1123 else:
1087 self._docket_file = entry_point
1124 self._docket_file = entry_point
1088 if self._initempty:
1125 if self._initempty:
1089 self._docket = docketutil.default_docket(self, header)
1126 self._docket = docketutil.default_docket(self, header)
1090 else:
1127 else:
1091 self._docket = docketutil.parse_docket(
1128 self._docket = docketutil.parse_docket(
1092 self, entry_data, use_pending=self._trypending
1129 self, entry_data, use_pending=self._trypending
1093 )
1130 )
1094
1131
1095 if self._docket is not None:
1132 if self._docket is not None:
1096 self._indexfile = self._docket.index_filepath()
1133 self._indexfile = self._docket.index_filepath()
1097 index_data = b''
1134 index_data = b''
1098 index_size = self._docket.index_end
1135 index_size = self._docket.index_end
1099 if index_size > 0:
1136 if index_size > 0:
1100 index_data = self._get_data(
1137 index_data = self._get_data(
1101 self._indexfile, mmapindexthreshold, size=index_size
1138 self._indexfile, mmapindexthreshold, size=index_size
1102 )
1139 )
1103 if len(index_data) < index_size:
1140 if len(index_data) < index_size:
1104 msg = _(b'too few index data for %s: got %d, expected %d')
1141 msg = _(b'too few index data for %s: got %d, expected %d')
1105 msg %= (self.display_id, len(index_data), index_size)
1142 msg %= (self.display_id, len(index_data), index_size)
1106 raise error.RevlogError(msg)
1143 raise error.RevlogError(msg)
1107
1144
1108 self._inline = False
1145 self._inline = False
1109 # generaldelta implied by version 2 revlogs.
1146 # generaldelta implied by version 2 revlogs.
1110 self.delta_config.general_delta = True
1147 self.delta_config.general_delta = True
1111 # the logic for persistent nodemap will be dealt with within the
1148 # the logic for persistent nodemap will be dealt with within the
1112 # main docket, so disable it for now.
1149 # main docket, so disable it for now.
1113 self._nodemap_file = None
1150 self._nodemap_file = None
1114
1151
1115 if self._docket is not None:
1152 if self._docket is not None:
1116 self._datafile = self._docket.data_filepath()
1153 self._datafile = self._docket.data_filepath()
1117 self._sidedatafile = self._docket.sidedata_filepath()
1154 self._sidedatafile = self._docket.sidedata_filepath()
1118 elif self.postfix is None:
1155 elif self.postfix is None:
1119 self._datafile = b'%s.d' % self.radix
1156 self._datafile = b'%s.d' % self.radix
1120 else:
1157 else:
1121 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1158 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1122
1159
1123 self.nodeconstants = sha1nodeconstants
1160 self.nodeconstants = sha1nodeconstants
1124 self.nullid = self.nodeconstants.nullid
1161 self.nullid = self.nodeconstants.nullid
1125
1162
1126 # sparse-revlog can't be on without general-delta (issue6056)
1163 # sparse-revlog can't be on without general-delta (issue6056)
1127 if not self.delta_config.general_delta:
1164 if not self.delta_config.general_delta:
1128 self.delta_config.sparse_revlog = False
1165 self.delta_config.sparse_revlog = False
1129
1166
1130 self._storedeltachains = True
1167 self._storedeltachains = True
1131
1168
1132 devel_nodemap = (
1169 devel_nodemap = (
1133 self._nodemap_file
1170 self._nodemap_file
1134 and force_nodemap
1171 and force_nodemap
1135 and parse_index_v1_nodemap is not None
1172 and parse_index_v1_nodemap is not None
1136 )
1173 )
1137
1174
1138 use_rust_index = False
1175 use_rust_index = False
1139 if rustrevlog is not None:
1176 if rustrevlog is not None:
1140 if self._nodemap_file is not None:
1177 if self._nodemap_file is not None:
1141 use_rust_index = True
1178 use_rust_index = True
1142 else:
1179 else:
1143 use_rust_index = self.opener.options.get(b'rust.index')
1180 use_rust_index = self.opener.options.get(b'rust.index')
1144
1181
1145 self._parse_index = parse_index_v1
1182 self._parse_index = parse_index_v1
1146 if self._format_version == REVLOGV0:
1183 if self._format_version == REVLOGV0:
1147 self._parse_index = revlogv0.parse_index_v0
1184 self._parse_index = revlogv0.parse_index_v0
1148 elif self._format_version == REVLOGV2:
1185 elif self._format_version == REVLOGV2:
1149 self._parse_index = parse_index_v2
1186 self._parse_index = parse_index_v2
1150 elif self._format_version == CHANGELOGV2:
1187 elif self._format_version == CHANGELOGV2:
1151 self._parse_index = parse_index_cl_v2
1188 self._parse_index = parse_index_cl_v2
1152 elif devel_nodemap:
1189 elif devel_nodemap:
1153 self._parse_index = parse_index_v1_nodemap
1190 self._parse_index = parse_index_v1_nodemap
1154 elif use_rust_index:
1191 elif use_rust_index:
1155 self._parse_index = parse_index_v1_mixed
1192 self._parse_index = parse_index_v1_mixed
1156 try:
1193 try:
1157 d = self._parse_index(index_data, self._inline)
1194 d = self._parse_index(index_data, self._inline)
1158 index, chunkcache = d
1195 index, chunkcache = d
1159 use_nodemap = (
1196 use_nodemap = (
1160 not self._inline
1197 not self._inline
1161 and self._nodemap_file is not None
1198 and self._nodemap_file is not None
1162 and hasattr(index, 'update_nodemap_data')
1199 and hasattr(index, 'update_nodemap_data')
1163 )
1200 )
1164 if use_nodemap:
1201 if use_nodemap:
1165 nodemap_data = nodemaputil.persisted_data(self)
1202 nodemap_data = nodemaputil.persisted_data(self)
1166 if nodemap_data is not None:
1203 if nodemap_data is not None:
1167 docket = nodemap_data[0]
1204 docket = nodemap_data[0]
1168 if (
1205 if (
1169 len(d[0]) > docket.tip_rev
1206 len(d[0]) > docket.tip_rev
1170 and d[0][docket.tip_rev][7] == docket.tip_node
1207 and d[0][docket.tip_rev][7] == docket.tip_node
1171 ):
1208 ):
1172 # no changelog tampering
1209 # no changelog tampering
1173 self._nodemap_docket = docket
1210 self._nodemap_docket = docket
1174 index.update_nodemap_data(*nodemap_data)
1211 index.update_nodemap_data(*nodemap_data)
1175 except (ValueError, IndexError):
1212 except (ValueError, IndexError):
1176 raise error.RevlogError(
1213 raise error.RevlogError(
1177 _(b"index %s is corrupted") % self.display_id
1214 _(b"index %s is corrupted") % self.display_id
1178 )
1215 )
1179 self.index = index
1216 self.index = index
1180 # revnum -> (chain-length, sum-delta-length)
1217 # revnum -> (chain-length, sum-delta-length)
1181 self._chaininfocache = util.lrucachedict(500)
1218 self._chaininfocache = util.lrucachedict(500)
1182 # revlog header -> revlog compressor
1219 # revlog header -> revlog compressor
1183 self._decompressors = {}
1220 self._decompressors = {}
1184
1221
1185 return chunkcache
1222 return chunkcache
1186
1223
1187 def _load_inner(self, chunk_cache):
1224 def _load_inner(self, chunk_cache):
1188 self._inner = _InnerRevlog(
1225 self._inner = _InnerRevlog(
1189 opener=self.opener,
1226 opener=self.opener,
1190 index=self.index,
1227 index=self.index,
1191 index_file=self._indexfile,
1228 index_file=self._indexfile,
1192 data_file=self._datafile,
1229 data_file=self._datafile,
1193 sidedata_file=self._sidedatafile,
1230 sidedata_file=self._sidedatafile,
1194 inline=self._inline,
1231 inline=self._inline,
1195 data_config=self.data_config,
1232 data_config=self.data_config,
1196 chunk_cache=chunk_cache,
1233 chunk_cache=chunk_cache,
1197 )
1234 )
1198
1235
1199 def get_revlog(self):
1236 def get_revlog(self):
1200 """simple function to mirror API of other not-really-revlog API"""
1237 """simple function to mirror API of other not-really-revlog API"""
1201 return self
1238 return self
1202
1239
1203 @util.propertycache
1240 @util.propertycache
1204 def revlog_kind(self):
1241 def revlog_kind(self):
1205 return self.target[0]
1242 return self.target[0]
1206
1243
1207 @util.propertycache
1244 @util.propertycache
1208 def display_id(self):
1245 def display_id(self):
1209 """The public facing "ID" of the revlog that we use in message"""
1246 """The public facing "ID" of the revlog that we use in message"""
1210 if self.revlog_kind == KIND_FILELOG:
1247 if self.revlog_kind == KIND_FILELOG:
1211 # Reference the file without the "data/" prefix, so it is familiar
1248 # Reference the file without the "data/" prefix, so it is familiar
1212 # to the user.
1249 # to the user.
1213 return self.target[1]
1250 return self.target[1]
1214 else:
1251 else:
1215 return self.radix
1252 return self.radix
1216
1253
1217 def _get_decompressor(self, t):
1254 def _get_decompressor(self, t):
1218 try:
1255 try:
1219 compressor = self._decompressors[t]
1256 compressor = self._decompressors[t]
1220 except KeyError:
1257 except KeyError:
1221 try:
1258 try:
1222 engine = util.compengines.forrevlogheader(t)
1259 engine = util.compengines.forrevlogheader(t)
1223 compressor = engine.revlogcompressor(
1260 compressor = engine.revlogcompressor(
1224 self.feature_config.compression_engine_options
1261 self.feature_config.compression_engine_options
1225 )
1262 )
1226 self._decompressors[t] = compressor
1263 self._decompressors[t] = compressor
1227 except KeyError:
1264 except KeyError:
1228 raise error.RevlogError(
1265 raise error.RevlogError(
1229 _(b'unknown compression type %s') % binascii.hexlify(t)
1266 _(b'unknown compression type %s') % binascii.hexlify(t)
1230 )
1267 )
1231 return compressor
1268 return compressor
1232
1269
1233 @util.propertycache
1270 @util.propertycache
1234 def _compressor(self):
1271 def _compressor(self):
1235 engine = util.compengines[self.feature_config.compression_engine]
1272 engine = util.compengines[self.feature_config.compression_engine]
1236 return engine.revlogcompressor(
1273 return engine.revlogcompressor(
1237 self.feature_config.compression_engine_options
1274 self.feature_config.compression_engine_options
1238 )
1275 )
1239
1276
1240 @util.propertycache
1277 @util.propertycache
1241 def _decompressor(self):
1278 def _decompressor(self):
1242 """the default decompressor"""
1279 """the default decompressor"""
1243 if self._docket is None:
1280 if self._docket is None:
1244 return None
1281 return None
1245 t = self._docket.default_compression_header
1282 t = self._docket.default_compression_header
1246 c = self._get_decompressor(t)
1283 c = self._get_decompressor(t)
1247 return c.decompress
1284 return c.decompress
1248
1285
1249 def _datafp(self, mode=b'r'):
1286 def _datafp(self, mode=b'r'):
1250 """file object for the revlog's data file"""
1287 """file object for the revlog's data file"""
1251 return self.opener(self._datafile, mode=mode)
1288 return self.opener(self._datafile, mode=mode)
1252
1289
1253 def tiprev(self):
1290 def tiprev(self):
1254 return len(self.index) - 1
1291 return len(self.index) - 1
1255
1292
1256 def tip(self):
1293 def tip(self):
1257 return self.node(self.tiprev())
1294 return self.node(self.tiprev())
1258
1295
1259 def __contains__(self, rev):
1296 def __contains__(self, rev):
1260 return 0 <= rev < len(self)
1297 return 0 <= rev < len(self)
1261
1298
1262 def __len__(self):
1299 def __len__(self):
1263 return len(self.index)
1300 return len(self.index)
1264
1301
1265 def __iter__(self):
1302 def __iter__(self):
1266 return iter(range(len(self)))
1303 return iter(range(len(self)))
1267
1304
1268 def revs(self, start=0, stop=None):
1305 def revs(self, start=0, stop=None):
1269 """iterate over all rev in this revlog (from start to stop)"""
1306 """iterate over all rev in this revlog (from start to stop)"""
1270 return storageutil.iterrevs(len(self), start=start, stop=stop)
1307 return storageutil.iterrevs(len(self), start=start, stop=stop)
1271
1308
1272 def hasnode(self, node):
1309 def hasnode(self, node):
1273 try:
1310 try:
1274 self.rev(node)
1311 self.rev(node)
1275 return True
1312 return True
1276 except KeyError:
1313 except KeyError:
1277 return False
1314 return False
1278
1315
1279 def _candelta(self, baserev, rev):
1316 def _candelta(self, baserev, rev):
1280 """whether two revisions (baserev, rev) can be delta-ed or not"""
1317 """whether two revisions (baserev, rev) can be delta-ed or not"""
1281 # Disable delta if either rev requires a content-changing flag
1318 # Disable delta if either rev requires a content-changing flag
1282 # processor (ex. LFS). This is because such flag processor can alter
1319 # processor (ex. LFS). This is because such flag processor can alter
1283 # the rawtext content that the delta will be based on, and two clients
1320 # the rawtext content that the delta will be based on, and two clients
1284 # could have a same revlog node with different flags (i.e. different
1321 # could have a same revlog node with different flags (i.e. different
1285 # rawtext contents) and the delta could be incompatible.
1322 # rawtext contents) and the delta could be incompatible.
1286 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1323 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1287 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1324 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1288 ):
1325 ):
1289 return False
1326 return False
1290 return True
1327 return True
1291
1328
1292 def update_caches(self, transaction):
1329 def update_caches(self, transaction):
1293 """update on disk cache
1330 """update on disk cache
1294
1331
1295 If a transaction is passed, the update may be delayed to transaction
1332 If a transaction is passed, the update may be delayed to transaction
1296 commit."""
1333 commit."""
1297 if self._nodemap_file is not None:
1334 if self._nodemap_file is not None:
1298 if transaction is None:
1335 if transaction is None:
1299 nodemaputil.update_persistent_nodemap(self)
1336 nodemaputil.update_persistent_nodemap(self)
1300 else:
1337 else:
1301 nodemaputil.setup_persistent_nodemap(transaction, self)
1338 nodemaputil.setup_persistent_nodemap(transaction, self)
1302
1339
1303 def clearcaches(self):
1340 def clearcaches(self):
1304 """Clear in-memory caches"""
1341 """Clear in-memory caches"""
1305 self._revisioncache = None
1342 self._revisioncache = None
1306 self._chainbasecache.clear()
1343 self._chainbasecache.clear()
1307 self._inner._segmentfile.clear_cache()
1344 self._inner._segmentfile.clear_cache()
1308 self._inner._segmentfile_sidedata.clear_cache()
1345 self._inner._segmentfile_sidedata.clear_cache()
1309 self._pcache = {}
1346 self._pcache = {}
1310 self._nodemap_docket = None
1347 self._nodemap_docket = None
1311 self.index.clearcaches()
1348 self.index.clearcaches()
1312 # The python code is the one responsible for validating the docket, we
1349 # The python code is the one responsible for validating the docket, we
1313 # end up having to refresh it here.
1350 # end up having to refresh it here.
1314 use_nodemap = (
1351 use_nodemap = (
1315 not self._inline
1352 not self._inline
1316 and self._nodemap_file is not None
1353 and self._nodemap_file is not None
1317 and hasattr(self.index, 'update_nodemap_data')
1354 and hasattr(self.index, 'update_nodemap_data')
1318 )
1355 )
1319 if use_nodemap:
1356 if use_nodemap:
1320 nodemap_data = nodemaputil.persisted_data(self)
1357 nodemap_data = nodemaputil.persisted_data(self)
1321 if nodemap_data is not None:
1358 if nodemap_data is not None:
1322 self._nodemap_docket = nodemap_data[0]
1359 self._nodemap_docket = nodemap_data[0]
1323 self.index.update_nodemap_data(*nodemap_data)
1360 self.index.update_nodemap_data(*nodemap_data)
1324
1361
1325 def rev(self, node):
1362 def rev(self, node):
1326 """return the revision number associated with a <nodeid>"""
1363 """return the revision number associated with a <nodeid>"""
1327 try:
1364 try:
1328 return self.index.rev(node)
1365 return self.index.rev(node)
1329 except TypeError:
1366 except TypeError:
1330 raise
1367 raise
1331 except error.RevlogError:
1368 except error.RevlogError:
1332 # parsers.c radix tree lookup failed
1369 # parsers.c radix tree lookup failed
1333 if (
1370 if (
1334 node == self.nodeconstants.wdirid
1371 node == self.nodeconstants.wdirid
1335 or node in self.nodeconstants.wdirfilenodeids
1372 or node in self.nodeconstants.wdirfilenodeids
1336 ):
1373 ):
1337 raise error.WdirUnsupported
1374 raise error.WdirUnsupported
1338 raise error.LookupError(node, self.display_id, _(b'no node'))
1375 raise error.LookupError(node, self.display_id, _(b'no node'))
1339
1376
1340 # Accessors for index entries.
1377 # Accessors for index entries.
1341
1378
1342 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1379 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1343 # are flags.
1380 # are flags.
1344 def start(self, rev):
1381 def start(self, rev):
1345 return int(self.index[rev][0] >> 16)
1382 return int(self.index[rev][0] >> 16)
1346
1383
1347 def sidedata_cut_off(self, rev):
1384 def sidedata_cut_off(self, rev):
1348 sd_cut_off = self.index[rev][8]
1385 sd_cut_off = self.index[rev][8]
1349 if sd_cut_off != 0:
1386 if sd_cut_off != 0:
1350 return sd_cut_off
1387 return sd_cut_off
1351 # This is some annoying dance, because entries without sidedata
1388 # This is some annoying dance, because entries without sidedata
1352 # currently use 0 as their ofsset. (instead of previous-offset +
1389 # currently use 0 as their ofsset. (instead of previous-offset +
1353 # previous-size)
1390 # previous-size)
1354 #
1391 #
1355 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1392 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1356 # In the meantime, we need this.
1393 # In the meantime, we need this.
1357 while 0 <= rev:
1394 while 0 <= rev:
1358 e = self.index[rev]
1395 e = self.index[rev]
1359 if e[9] != 0:
1396 if e[9] != 0:
1360 return e[8] + e[9]
1397 return e[8] + e[9]
1361 rev -= 1
1398 rev -= 1
1362 return 0
1399 return 0
1363
1400
1364 def flags(self, rev):
1401 def flags(self, rev):
1365 return self.index[rev][0] & 0xFFFF
1402 return self.index[rev][0] & 0xFFFF
1366
1403
1367 def length(self, rev):
1404 def length(self, rev):
1368 return self.index[rev][1]
1405 return self.index[rev][1]
1369
1406
1370 def sidedata_length(self, rev):
1407 def sidedata_length(self, rev):
1371 if not self.feature_config.has_side_data:
1408 if not self.feature_config.has_side_data:
1372 return 0
1409 return 0
1373 return self.index[rev][9]
1410 return self.index[rev][9]
1374
1411
1375 def rawsize(self, rev):
1412 def rawsize(self, rev):
1376 """return the length of the uncompressed text for a given revision"""
1413 """return the length of the uncompressed text for a given revision"""
1377 l = self.index[rev][2]
1414 l = self.index[rev][2]
1378 if l >= 0:
1415 if l >= 0:
1379 return l
1416 return l
1380
1417
1381 t = self.rawdata(rev)
1418 t = self.rawdata(rev)
1382 return len(t)
1419 return len(t)
1383
1420
1384 def size(self, rev):
1421 def size(self, rev):
1385 """length of non-raw text (processed by a "read" flag processor)"""
1422 """length of non-raw text (processed by a "read" flag processor)"""
1386 # fast path: if no "read" flag processor could change the content,
1423 # fast path: if no "read" flag processor could change the content,
1387 # size is rawsize. note: ELLIPSIS is known to not change the content.
1424 # size is rawsize. note: ELLIPSIS is known to not change the content.
1388 flags = self.flags(rev)
1425 flags = self.flags(rev)
1389 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1426 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1390 return self.rawsize(rev)
1427 return self.rawsize(rev)
1391
1428
1392 return len(self.revision(rev))
1429 return len(self.revision(rev))
1393
1430
1394 def fast_rank(self, rev):
1431 def fast_rank(self, rev):
1395 """Return the rank of a revision if already known, or None otherwise.
1432 """Return the rank of a revision if already known, or None otherwise.
1396
1433
1397 The rank of a revision is the size of the sub-graph it defines as a
1434 The rank of a revision is the size of the sub-graph it defines as a
1398 head. Equivalently, the rank of a revision `r` is the size of the set
1435 head. Equivalently, the rank of a revision `r` is the size of the set
1399 `ancestors(r)`, `r` included.
1436 `ancestors(r)`, `r` included.
1400
1437
1401 This method returns the rank retrieved from the revlog in constant
1438 This method returns the rank retrieved from the revlog in constant
1402 time. It makes no attempt at computing unknown values for versions of
1439 time. It makes no attempt at computing unknown values for versions of
1403 the revlog which do not persist the rank.
1440 the revlog which do not persist the rank.
1404 """
1441 """
1405 rank = self.index[rev][ENTRY_RANK]
1442 rank = self.index[rev][ENTRY_RANK]
1406 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1443 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1407 return None
1444 return None
1408 if rev == nullrev:
1445 if rev == nullrev:
1409 return 0 # convention
1446 return 0 # convention
1410 return rank
1447 return rank
1411
1448
1412 def chainbase(self, rev):
1449 def chainbase(self, rev):
1413 base = self._chainbasecache.get(rev)
1450 base = self._chainbasecache.get(rev)
1414 if base is not None:
1451 if base is not None:
1415 return base
1452 return base
1416
1453
1417 index = self.index
1454 index = self.index
1418 iterrev = rev
1455 iterrev = rev
1419 base = index[iterrev][3]
1456 base = index[iterrev][3]
1420 while base != iterrev:
1457 while base != iterrev:
1421 iterrev = base
1458 iterrev = base
1422 base = index[iterrev][3]
1459 base = index[iterrev][3]
1423
1460
1424 self._chainbasecache[rev] = base
1461 self._chainbasecache[rev] = base
1425 return base
1462 return base
1426
1463
1427 def linkrev(self, rev):
1464 def linkrev(self, rev):
1428 return self.index[rev][4]
1465 return self.index[rev][4]
1429
1466
1430 def parentrevs(self, rev):
1467 def parentrevs(self, rev):
1431 try:
1468 try:
1432 entry = self.index[rev]
1469 entry = self.index[rev]
1433 except IndexError:
1470 except IndexError:
1434 if rev == wdirrev:
1471 if rev == wdirrev:
1435 raise error.WdirUnsupported
1472 raise error.WdirUnsupported
1436 raise
1473 raise
1437
1474
1438 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1475 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1439 return entry[6], entry[5]
1476 return entry[6], entry[5]
1440 else:
1477 else:
1441 return entry[5], entry[6]
1478 return entry[5], entry[6]
1442
1479
1443 # fast parentrevs(rev) where rev isn't filtered
1480 # fast parentrevs(rev) where rev isn't filtered
1444 _uncheckedparentrevs = parentrevs
1481 _uncheckedparentrevs = parentrevs
1445
1482
1446 def node(self, rev):
1483 def node(self, rev):
1447 try:
1484 try:
1448 return self.index[rev][7]
1485 return self.index[rev][7]
1449 except IndexError:
1486 except IndexError:
1450 if rev == wdirrev:
1487 if rev == wdirrev:
1451 raise error.WdirUnsupported
1488 raise error.WdirUnsupported
1452 raise
1489 raise
1453
1490
1454 # Derived from index values.
1491 # Derived from index values.
1455
1492
1456 def end(self, rev):
1493 def end(self, rev):
1457 return self.start(rev) + self.length(rev)
1494 return self.start(rev) + self.length(rev)
1458
1495
1459 def parents(self, node):
1496 def parents(self, node):
1460 i = self.index
1497 i = self.index
1461 d = i[self.rev(node)]
1498 d = i[self.rev(node)]
1462 # inline node() to avoid function call overhead
1499 # inline node() to avoid function call overhead
1463 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1500 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1464 return i[d[6]][7], i[d[5]][7]
1501 return i[d[6]][7], i[d[5]][7]
1465 else:
1502 else:
1466 return i[d[5]][7], i[d[6]][7]
1503 return i[d[5]][7], i[d[6]][7]
1467
1504
1468 def chainlen(self, rev):
1505 def chainlen(self, rev):
1469 return self._chaininfo(rev)[0]
1506 return self._chaininfo(rev)[0]
1470
1507
1471 def _chaininfo(self, rev):
1508 def _chaininfo(self, rev):
1472 chaininfocache = self._chaininfocache
1509 chaininfocache = self._chaininfocache
1473 if rev in chaininfocache:
1510 if rev in chaininfocache:
1474 return chaininfocache[rev]
1511 return chaininfocache[rev]
1475 index = self.index
1512 index = self.index
1476 generaldelta = self.delta_config.general_delta
1513 generaldelta = self.delta_config.general_delta
1477 iterrev = rev
1514 iterrev = rev
1478 e = index[iterrev]
1515 e = index[iterrev]
1479 clen = 0
1516 clen = 0
1480 compresseddeltalen = 0
1517 compresseddeltalen = 0
1481 while iterrev != e[3]:
1518 while iterrev != e[3]:
1482 clen += 1
1519 clen += 1
1483 compresseddeltalen += e[1]
1520 compresseddeltalen += e[1]
1484 if generaldelta:
1521 if generaldelta:
1485 iterrev = e[3]
1522 iterrev = e[3]
1486 else:
1523 else:
1487 iterrev -= 1
1524 iterrev -= 1
1488 if iterrev in chaininfocache:
1525 if iterrev in chaininfocache:
1489 t = chaininfocache[iterrev]
1526 t = chaininfocache[iterrev]
1490 clen += t[0]
1527 clen += t[0]
1491 compresseddeltalen += t[1]
1528 compresseddeltalen += t[1]
1492 break
1529 break
1493 e = index[iterrev]
1530 e = index[iterrev]
1494 else:
1531 else:
1495 # Add text length of base since decompressing that also takes
1532 # Add text length of base since decompressing that also takes
1496 # work. For cache hits the length is already included.
1533 # work. For cache hits the length is already included.
1497 compresseddeltalen += e[1]
1534 compresseddeltalen += e[1]
1498 r = (clen, compresseddeltalen)
1535 r = (clen, compresseddeltalen)
1499 chaininfocache[rev] = r
1536 chaininfocache[rev] = r
1500 return r
1537 return r
1501
1538
1502 def _deltachain(self, rev, stoprev=None):
1539 def _deltachain(self, rev, stoprev=None):
1503 """Obtain the delta chain for a revision.
1540 """Obtain the delta chain for a revision.
1504
1541
1505 ``stoprev`` specifies a revision to stop at. If not specified, we
1542 ``stoprev`` specifies a revision to stop at. If not specified, we
1506 stop at the base of the chain.
1543 stop at the base of the chain.
1507
1544
1508 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1545 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1509 revs in ascending order and ``stopped`` is a bool indicating whether
1546 revs in ascending order and ``stopped`` is a bool indicating whether
1510 ``stoprev`` was hit.
1547 ``stoprev`` was hit.
1511 """
1548 """
1512 generaldelta = self.delta_config.general_delta
1549 generaldelta = self.delta_config.general_delta
1513 # Try C implementation.
1550 # Try C implementation.
1514 try:
1551 try:
1515 return self.index.deltachain(rev, stoprev, generaldelta)
1552 return self.index.deltachain(rev, stoprev, generaldelta)
1516 except AttributeError:
1553 except AttributeError:
1517 pass
1554 pass
1518
1555
1519 chain = []
1556 chain = []
1520
1557
1521 # Alias to prevent attribute lookup in tight loop.
1558 # Alias to prevent attribute lookup in tight loop.
1522 index = self.index
1559 index = self.index
1523
1560
1524 iterrev = rev
1561 iterrev = rev
1525 e = index[iterrev]
1562 e = index[iterrev]
1526 while iterrev != e[3] and iterrev != stoprev:
1563 while iterrev != e[3] and iterrev != stoprev:
1527 chain.append(iterrev)
1564 chain.append(iterrev)
1528 if generaldelta:
1565 if generaldelta:
1529 iterrev = e[3]
1566 iterrev = e[3]
1530 else:
1567 else:
1531 iterrev -= 1
1568 iterrev -= 1
1532 e = index[iterrev]
1569 e = index[iterrev]
1533
1570
1534 if iterrev == stoprev:
1571 if iterrev == stoprev:
1535 stopped = True
1572 stopped = True
1536 else:
1573 else:
1537 chain.append(iterrev)
1574 chain.append(iterrev)
1538 stopped = False
1575 stopped = False
1539
1576
1540 chain.reverse()
1577 chain.reverse()
1541 return chain, stopped
1578 return chain, stopped
1542
1579
1543 def ancestors(self, revs, stoprev=0, inclusive=False):
1580 def ancestors(self, revs, stoprev=0, inclusive=False):
1544 """Generate the ancestors of 'revs' in reverse revision order.
1581 """Generate the ancestors of 'revs' in reverse revision order.
1545 Does not generate revs lower than stoprev.
1582 Does not generate revs lower than stoprev.
1546
1583
1547 See the documentation for ancestor.lazyancestors for more details."""
1584 See the documentation for ancestor.lazyancestors for more details."""
1548
1585
1549 # first, make sure start revisions aren't filtered
1586 # first, make sure start revisions aren't filtered
1550 revs = list(revs)
1587 revs = list(revs)
1551 checkrev = self.node
1588 checkrev = self.node
1552 for r in revs:
1589 for r in revs:
1553 checkrev(r)
1590 checkrev(r)
1554 # and we're sure ancestors aren't filtered as well
1591 # and we're sure ancestors aren't filtered as well
1555
1592
1556 if rustancestor is not None and self.index.rust_ext_compat:
1593 if rustancestor is not None and self.index.rust_ext_compat:
1557 lazyancestors = rustancestor.LazyAncestors
1594 lazyancestors = rustancestor.LazyAncestors
1558 arg = self.index
1595 arg = self.index
1559 else:
1596 else:
1560 lazyancestors = ancestor.lazyancestors
1597 lazyancestors = ancestor.lazyancestors
1561 arg = self._uncheckedparentrevs
1598 arg = self._uncheckedparentrevs
1562 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1599 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1563
1600
1564 def descendants(self, revs):
1601 def descendants(self, revs):
1565 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1602 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1566
1603
1567 def findcommonmissing(self, common=None, heads=None):
1604 def findcommonmissing(self, common=None, heads=None):
1568 """Return a tuple of the ancestors of common and the ancestors of heads
1605 """Return a tuple of the ancestors of common and the ancestors of heads
1569 that are not ancestors of common. In revset terminology, we return the
1606 that are not ancestors of common. In revset terminology, we return the
1570 tuple:
1607 tuple:
1571
1608
1572 ::common, (::heads) - (::common)
1609 ::common, (::heads) - (::common)
1573
1610
1574 The list is sorted by revision number, meaning it is
1611 The list is sorted by revision number, meaning it is
1575 topologically sorted.
1612 topologically sorted.
1576
1613
1577 'heads' and 'common' are both lists of node IDs. If heads is
1614 'heads' and 'common' are both lists of node IDs. If heads is
1578 not supplied, uses all of the revlog's heads. If common is not
1615 not supplied, uses all of the revlog's heads. If common is not
1579 supplied, uses nullid."""
1616 supplied, uses nullid."""
1580 if common is None:
1617 if common is None:
1581 common = [self.nullid]
1618 common = [self.nullid]
1582 if heads is None:
1619 if heads is None:
1583 heads = self.heads()
1620 heads = self.heads()
1584
1621
1585 common = [self.rev(n) for n in common]
1622 common = [self.rev(n) for n in common]
1586 heads = [self.rev(n) for n in heads]
1623 heads = [self.rev(n) for n in heads]
1587
1624
1588 # we want the ancestors, but inclusive
1625 # we want the ancestors, but inclusive
1589 class lazyset:
1626 class lazyset:
1590 def __init__(self, lazyvalues):
1627 def __init__(self, lazyvalues):
1591 self.addedvalues = set()
1628 self.addedvalues = set()
1592 self.lazyvalues = lazyvalues
1629 self.lazyvalues = lazyvalues
1593
1630
1594 def __contains__(self, value):
1631 def __contains__(self, value):
1595 return value in self.addedvalues or value in self.lazyvalues
1632 return value in self.addedvalues or value in self.lazyvalues
1596
1633
1597 def __iter__(self):
1634 def __iter__(self):
1598 added = self.addedvalues
1635 added = self.addedvalues
1599 for r in added:
1636 for r in added:
1600 yield r
1637 yield r
1601 for r in self.lazyvalues:
1638 for r in self.lazyvalues:
1602 if not r in added:
1639 if not r in added:
1603 yield r
1640 yield r
1604
1641
1605 def add(self, value):
1642 def add(self, value):
1606 self.addedvalues.add(value)
1643 self.addedvalues.add(value)
1607
1644
1608 def update(self, values):
1645 def update(self, values):
1609 self.addedvalues.update(values)
1646 self.addedvalues.update(values)
1610
1647
1611 has = lazyset(self.ancestors(common))
1648 has = lazyset(self.ancestors(common))
1612 has.add(nullrev)
1649 has.add(nullrev)
1613 has.update(common)
1650 has.update(common)
1614
1651
1615 # take all ancestors from heads that aren't in has
1652 # take all ancestors from heads that aren't in has
1616 missing = set()
1653 missing = set()
1617 visit = collections.deque(r for r in heads if r not in has)
1654 visit = collections.deque(r for r in heads if r not in has)
1618 while visit:
1655 while visit:
1619 r = visit.popleft()
1656 r = visit.popleft()
1620 if r in missing:
1657 if r in missing:
1621 continue
1658 continue
1622 else:
1659 else:
1623 missing.add(r)
1660 missing.add(r)
1624 for p in self.parentrevs(r):
1661 for p in self.parentrevs(r):
1625 if p not in has:
1662 if p not in has:
1626 visit.append(p)
1663 visit.append(p)
1627 missing = list(missing)
1664 missing = list(missing)
1628 missing.sort()
1665 missing.sort()
1629 return has, [self.node(miss) for miss in missing]
1666 return has, [self.node(miss) for miss in missing]
1630
1667
1631 def incrementalmissingrevs(self, common=None):
1668 def incrementalmissingrevs(self, common=None):
1632 """Return an object that can be used to incrementally compute the
1669 """Return an object that can be used to incrementally compute the
1633 revision numbers of the ancestors of arbitrary sets that are not
1670 revision numbers of the ancestors of arbitrary sets that are not
1634 ancestors of common. This is an ancestor.incrementalmissingancestors
1671 ancestors of common. This is an ancestor.incrementalmissingancestors
1635 object.
1672 object.
1636
1673
1637 'common' is a list of revision numbers. If common is not supplied, uses
1674 'common' is a list of revision numbers. If common is not supplied, uses
1638 nullrev.
1675 nullrev.
1639 """
1676 """
1640 if common is None:
1677 if common is None:
1641 common = [nullrev]
1678 common = [nullrev]
1642
1679
1643 if rustancestor is not None and self.index.rust_ext_compat:
1680 if rustancestor is not None and self.index.rust_ext_compat:
1644 return rustancestor.MissingAncestors(self.index, common)
1681 return rustancestor.MissingAncestors(self.index, common)
1645 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1682 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1646
1683
1647 def findmissingrevs(self, common=None, heads=None):
1684 def findmissingrevs(self, common=None, heads=None):
1648 """Return the revision numbers of the ancestors of heads that
1685 """Return the revision numbers of the ancestors of heads that
1649 are not ancestors of common.
1686 are not ancestors of common.
1650
1687
1651 More specifically, return a list of revision numbers corresponding to
1688 More specifically, return a list of revision numbers corresponding to
1652 nodes N such that every N satisfies the following constraints:
1689 nodes N such that every N satisfies the following constraints:
1653
1690
1654 1. N is an ancestor of some node in 'heads'
1691 1. N is an ancestor of some node in 'heads'
1655 2. N is not an ancestor of any node in 'common'
1692 2. N is not an ancestor of any node in 'common'
1656
1693
1657 The list is sorted by revision number, meaning it is
1694 The list is sorted by revision number, meaning it is
1658 topologically sorted.
1695 topologically sorted.
1659
1696
1660 'heads' and 'common' are both lists of revision numbers. If heads is
1697 'heads' and 'common' are both lists of revision numbers. If heads is
1661 not supplied, uses all of the revlog's heads. If common is not
1698 not supplied, uses all of the revlog's heads. If common is not
1662 supplied, uses nullid."""
1699 supplied, uses nullid."""
1663 if common is None:
1700 if common is None:
1664 common = [nullrev]
1701 common = [nullrev]
1665 if heads is None:
1702 if heads is None:
1666 heads = self.headrevs()
1703 heads = self.headrevs()
1667
1704
1668 inc = self.incrementalmissingrevs(common=common)
1705 inc = self.incrementalmissingrevs(common=common)
1669 return inc.missingancestors(heads)
1706 return inc.missingancestors(heads)
1670
1707
1671 def findmissing(self, common=None, heads=None):
1708 def findmissing(self, common=None, heads=None):
1672 """Return the ancestors of heads that are not ancestors of common.
1709 """Return the ancestors of heads that are not ancestors of common.
1673
1710
1674 More specifically, return a list of nodes N such that every N
1711 More specifically, return a list of nodes N such that every N
1675 satisfies the following constraints:
1712 satisfies the following constraints:
1676
1713
1677 1. N is an ancestor of some node in 'heads'
1714 1. N is an ancestor of some node in 'heads'
1678 2. N is not an ancestor of any node in 'common'
1715 2. N is not an ancestor of any node in 'common'
1679
1716
1680 The list is sorted by revision number, meaning it is
1717 The list is sorted by revision number, meaning it is
1681 topologically sorted.
1718 topologically sorted.
1682
1719
1683 'heads' and 'common' are both lists of node IDs. If heads is
1720 'heads' and 'common' are both lists of node IDs. If heads is
1684 not supplied, uses all of the revlog's heads. If common is not
1721 not supplied, uses all of the revlog's heads. If common is not
1685 supplied, uses nullid."""
1722 supplied, uses nullid."""
1686 if common is None:
1723 if common is None:
1687 common = [self.nullid]
1724 common = [self.nullid]
1688 if heads is None:
1725 if heads is None:
1689 heads = self.heads()
1726 heads = self.heads()
1690
1727
1691 common = [self.rev(n) for n in common]
1728 common = [self.rev(n) for n in common]
1692 heads = [self.rev(n) for n in heads]
1729 heads = [self.rev(n) for n in heads]
1693
1730
1694 inc = self.incrementalmissingrevs(common=common)
1731 inc = self.incrementalmissingrevs(common=common)
1695 return [self.node(r) for r in inc.missingancestors(heads)]
1732 return [self.node(r) for r in inc.missingancestors(heads)]
1696
1733
1697 def nodesbetween(self, roots=None, heads=None):
1734 def nodesbetween(self, roots=None, heads=None):
1698 """Return a topological path from 'roots' to 'heads'.
1735 """Return a topological path from 'roots' to 'heads'.
1699
1736
1700 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1737 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1701 topologically sorted list of all nodes N that satisfy both of
1738 topologically sorted list of all nodes N that satisfy both of
1702 these constraints:
1739 these constraints:
1703
1740
1704 1. N is a descendant of some node in 'roots'
1741 1. N is a descendant of some node in 'roots'
1705 2. N is an ancestor of some node in 'heads'
1742 2. N is an ancestor of some node in 'heads'
1706
1743
1707 Every node is considered to be both a descendant and an ancestor
1744 Every node is considered to be both a descendant and an ancestor
1708 of itself, so every reachable node in 'roots' and 'heads' will be
1745 of itself, so every reachable node in 'roots' and 'heads' will be
1709 included in 'nodes'.
1746 included in 'nodes'.
1710
1747
1711 'outroots' is the list of reachable nodes in 'roots', i.e., the
1748 'outroots' is the list of reachable nodes in 'roots', i.e., the
1712 subset of 'roots' that is returned in 'nodes'. Likewise,
1749 subset of 'roots' that is returned in 'nodes'. Likewise,
1713 'outheads' is the subset of 'heads' that is also in 'nodes'.
1750 'outheads' is the subset of 'heads' that is also in 'nodes'.
1714
1751
1715 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1752 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1716 unspecified, uses nullid as the only root. If 'heads' is
1753 unspecified, uses nullid as the only root. If 'heads' is
1717 unspecified, uses list of all of the revlog's heads."""
1754 unspecified, uses list of all of the revlog's heads."""
1718 nonodes = ([], [], [])
1755 nonodes = ([], [], [])
1719 if roots is not None:
1756 if roots is not None:
1720 roots = list(roots)
1757 roots = list(roots)
1721 if not roots:
1758 if not roots:
1722 return nonodes
1759 return nonodes
1723 lowestrev = min([self.rev(n) for n in roots])
1760 lowestrev = min([self.rev(n) for n in roots])
1724 else:
1761 else:
1725 roots = [self.nullid] # Everybody's a descendant of nullid
1762 roots = [self.nullid] # Everybody's a descendant of nullid
1726 lowestrev = nullrev
1763 lowestrev = nullrev
1727 if (lowestrev == nullrev) and (heads is None):
1764 if (lowestrev == nullrev) and (heads is None):
1728 # We want _all_ the nodes!
1765 # We want _all_ the nodes!
1729 return (
1766 return (
1730 [self.node(r) for r in self],
1767 [self.node(r) for r in self],
1731 [self.nullid],
1768 [self.nullid],
1732 list(self.heads()),
1769 list(self.heads()),
1733 )
1770 )
1734 if heads is None:
1771 if heads is None:
1735 # All nodes are ancestors, so the latest ancestor is the last
1772 # All nodes are ancestors, so the latest ancestor is the last
1736 # node.
1773 # node.
1737 highestrev = len(self) - 1
1774 highestrev = len(self) - 1
1738 # Set ancestors to None to signal that every node is an ancestor.
1775 # Set ancestors to None to signal that every node is an ancestor.
1739 ancestors = None
1776 ancestors = None
1740 # Set heads to an empty dictionary for later discovery of heads
1777 # Set heads to an empty dictionary for later discovery of heads
1741 heads = {}
1778 heads = {}
1742 else:
1779 else:
1743 heads = list(heads)
1780 heads = list(heads)
1744 if not heads:
1781 if not heads:
1745 return nonodes
1782 return nonodes
1746 ancestors = set()
1783 ancestors = set()
1747 # Turn heads into a dictionary so we can remove 'fake' heads.
1784 # Turn heads into a dictionary so we can remove 'fake' heads.
1748 # Also, later we will be using it to filter out the heads we can't
1785 # Also, later we will be using it to filter out the heads we can't
1749 # find from roots.
1786 # find from roots.
1750 heads = dict.fromkeys(heads, False)
1787 heads = dict.fromkeys(heads, False)
1751 # Start at the top and keep marking parents until we're done.
1788 # Start at the top and keep marking parents until we're done.
1752 nodestotag = set(heads)
1789 nodestotag = set(heads)
1753 # Remember where the top was so we can use it as a limit later.
1790 # Remember where the top was so we can use it as a limit later.
1754 highestrev = max([self.rev(n) for n in nodestotag])
1791 highestrev = max([self.rev(n) for n in nodestotag])
1755 while nodestotag:
1792 while nodestotag:
1756 # grab a node to tag
1793 # grab a node to tag
1757 n = nodestotag.pop()
1794 n = nodestotag.pop()
1758 # Never tag nullid
1795 # Never tag nullid
1759 if n == self.nullid:
1796 if n == self.nullid:
1760 continue
1797 continue
1761 # A node's revision number represents its place in a
1798 # A node's revision number represents its place in a
1762 # topologically sorted list of nodes.
1799 # topologically sorted list of nodes.
1763 r = self.rev(n)
1800 r = self.rev(n)
1764 if r >= lowestrev:
1801 if r >= lowestrev:
1765 if n not in ancestors:
1802 if n not in ancestors:
1766 # If we are possibly a descendant of one of the roots
1803 # If we are possibly a descendant of one of the roots
1767 # and we haven't already been marked as an ancestor
1804 # and we haven't already been marked as an ancestor
1768 ancestors.add(n) # Mark as ancestor
1805 ancestors.add(n) # Mark as ancestor
1769 # Add non-nullid parents to list of nodes to tag.
1806 # Add non-nullid parents to list of nodes to tag.
1770 nodestotag.update(
1807 nodestotag.update(
1771 [p for p in self.parents(n) if p != self.nullid]
1808 [p for p in self.parents(n) if p != self.nullid]
1772 )
1809 )
1773 elif n in heads: # We've seen it before, is it a fake head?
1810 elif n in heads: # We've seen it before, is it a fake head?
1774 # So it is, real heads should not be the ancestors of
1811 # So it is, real heads should not be the ancestors of
1775 # any other heads.
1812 # any other heads.
1776 heads.pop(n)
1813 heads.pop(n)
1777 if not ancestors:
1814 if not ancestors:
1778 return nonodes
1815 return nonodes
1779 # Now that we have our set of ancestors, we want to remove any
1816 # Now that we have our set of ancestors, we want to remove any
1780 # roots that are not ancestors.
1817 # roots that are not ancestors.
1781
1818
1782 # If one of the roots was nullid, everything is included anyway.
1819 # If one of the roots was nullid, everything is included anyway.
1783 if lowestrev > nullrev:
1820 if lowestrev > nullrev:
1784 # But, since we weren't, let's recompute the lowest rev to not
1821 # But, since we weren't, let's recompute the lowest rev to not
1785 # include roots that aren't ancestors.
1822 # include roots that aren't ancestors.
1786
1823
1787 # Filter out roots that aren't ancestors of heads
1824 # Filter out roots that aren't ancestors of heads
1788 roots = [root for root in roots if root in ancestors]
1825 roots = [root for root in roots if root in ancestors]
1789 # Recompute the lowest revision
1826 # Recompute the lowest revision
1790 if roots:
1827 if roots:
1791 lowestrev = min([self.rev(root) for root in roots])
1828 lowestrev = min([self.rev(root) for root in roots])
1792 else:
1829 else:
1793 # No more roots? Return empty list
1830 # No more roots? Return empty list
1794 return nonodes
1831 return nonodes
1795 else:
1832 else:
1796 # We are descending from nullid, and don't need to care about
1833 # We are descending from nullid, and don't need to care about
1797 # any other roots.
1834 # any other roots.
1798 lowestrev = nullrev
1835 lowestrev = nullrev
1799 roots = [self.nullid]
1836 roots = [self.nullid]
1800 # Transform our roots list into a set.
1837 # Transform our roots list into a set.
1801 descendants = set(roots)
1838 descendants = set(roots)
1802 # Also, keep the original roots so we can filter out roots that aren't
1839 # Also, keep the original roots so we can filter out roots that aren't
1803 # 'real' roots (i.e. are descended from other roots).
1840 # 'real' roots (i.e. are descended from other roots).
1804 roots = descendants.copy()
1841 roots = descendants.copy()
1805 # Our topologically sorted list of output nodes.
1842 # Our topologically sorted list of output nodes.
1806 orderedout = []
1843 orderedout = []
1807 # Don't start at nullid since we don't want nullid in our output list,
1844 # Don't start at nullid since we don't want nullid in our output list,
1808 # and if nullid shows up in descendants, empty parents will look like
1845 # and if nullid shows up in descendants, empty parents will look like
1809 # they're descendants.
1846 # they're descendants.
1810 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1847 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1811 n = self.node(r)
1848 n = self.node(r)
1812 isdescendant = False
1849 isdescendant = False
1813 if lowestrev == nullrev: # Everybody is a descendant of nullid
1850 if lowestrev == nullrev: # Everybody is a descendant of nullid
1814 isdescendant = True
1851 isdescendant = True
1815 elif n in descendants:
1852 elif n in descendants:
1816 # n is already a descendant
1853 # n is already a descendant
1817 isdescendant = True
1854 isdescendant = True
1818 # This check only needs to be done here because all the roots
1855 # This check only needs to be done here because all the roots
1819 # will start being marked is descendants before the loop.
1856 # will start being marked is descendants before the loop.
1820 if n in roots:
1857 if n in roots:
1821 # If n was a root, check if it's a 'real' root.
1858 # If n was a root, check if it's a 'real' root.
1822 p = tuple(self.parents(n))
1859 p = tuple(self.parents(n))
1823 # If any of its parents are descendants, it's not a root.
1860 # If any of its parents are descendants, it's not a root.
1824 if (p[0] in descendants) or (p[1] in descendants):
1861 if (p[0] in descendants) or (p[1] in descendants):
1825 roots.remove(n)
1862 roots.remove(n)
1826 else:
1863 else:
1827 p = tuple(self.parents(n))
1864 p = tuple(self.parents(n))
1828 # A node is a descendant if either of its parents are
1865 # A node is a descendant if either of its parents are
1829 # descendants. (We seeded the dependents list with the roots
1866 # descendants. (We seeded the dependents list with the roots
1830 # up there, remember?)
1867 # up there, remember?)
1831 if (p[0] in descendants) or (p[1] in descendants):
1868 if (p[0] in descendants) or (p[1] in descendants):
1832 descendants.add(n)
1869 descendants.add(n)
1833 isdescendant = True
1870 isdescendant = True
1834 if isdescendant and ((ancestors is None) or (n in ancestors)):
1871 if isdescendant and ((ancestors is None) or (n in ancestors)):
1835 # Only include nodes that are both descendants and ancestors.
1872 # Only include nodes that are both descendants and ancestors.
1836 orderedout.append(n)
1873 orderedout.append(n)
1837 if (ancestors is not None) and (n in heads):
1874 if (ancestors is not None) and (n in heads):
1838 # We're trying to figure out which heads are reachable
1875 # We're trying to figure out which heads are reachable
1839 # from roots.
1876 # from roots.
1840 # Mark this head as having been reached
1877 # Mark this head as having been reached
1841 heads[n] = True
1878 heads[n] = True
1842 elif ancestors is None:
1879 elif ancestors is None:
1843 # Otherwise, we're trying to discover the heads.
1880 # Otherwise, we're trying to discover the heads.
1844 # Assume this is a head because if it isn't, the next step
1881 # Assume this is a head because if it isn't, the next step
1845 # will eventually remove it.
1882 # will eventually remove it.
1846 heads[n] = True
1883 heads[n] = True
1847 # But, obviously its parents aren't.
1884 # But, obviously its parents aren't.
1848 for p in self.parents(n):
1885 for p in self.parents(n):
1849 heads.pop(p, None)
1886 heads.pop(p, None)
1850 heads = [head for head, flag in heads.items() if flag]
1887 heads = [head for head, flag in heads.items() if flag]
1851 roots = list(roots)
1888 roots = list(roots)
1852 assert orderedout
1889 assert orderedout
1853 assert roots
1890 assert roots
1854 assert heads
1891 assert heads
1855 return (orderedout, roots, heads)
1892 return (orderedout, roots, heads)
1856
1893
1857 def headrevs(self, revs=None):
1894 def headrevs(self, revs=None):
1858 if revs is None:
1895 if revs is None:
1859 try:
1896 try:
1860 return self.index.headrevs()
1897 return self.index.headrevs()
1861 except AttributeError:
1898 except AttributeError:
1862 return self._headrevs()
1899 return self._headrevs()
1863 if rustdagop is not None and self.index.rust_ext_compat:
1900 if rustdagop is not None and self.index.rust_ext_compat:
1864 return rustdagop.headrevs(self.index, revs)
1901 return rustdagop.headrevs(self.index, revs)
1865 return dagop.headrevs(revs, self._uncheckedparentrevs)
1902 return dagop.headrevs(revs, self._uncheckedparentrevs)
1866
1903
1867 def computephases(self, roots):
1904 def computephases(self, roots):
1868 return self.index.computephasesmapsets(roots)
1905 return self.index.computephasesmapsets(roots)
1869
1906
1870 def _headrevs(self):
1907 def _headrevs(self):
1871 count = len(self)
1908 count = len(self)
1872 if not count:
1909 if not count:
1873 return [nullrev]
1910 return [nullrev]
1874 # we won't iter over filtered rev so nobody is a head at start
1911 # we won't iter over filtered rev so nobody is a head at start
1875 ishead = [0] * (count + 1)
1912 ishead = [0] * (count + 1)
1876 index = self.index
1913 index = self.index
1877 for r in self:
1914 for r in self:
1878 ishead[r] = 1 # I may be an head
1915 ishead[r] = 1 # I may be an head
1879 e = index[r]
1916 e = index[r]
1880 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1917 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1881 return [r for r, val in enumerate(ishead) if val]
1918 return [r for r, val in enumerate(ishead) if val]
1882
1919
1883 def heads(self, start=None, stop=None):
1920 def heads(self, start=None, stop=None):
1884 """return the list of all nodes that have no children
1921 """return the list of all nodes that have no children
1885
1922
1886 if start is specified, only heads that are descendants of
1923 if start is specified, only heads that are descendants of
1887 start will be returned
1924 start will be returned
1888 if stop is specified, it will consider all the revs from stop
1925 if stop is specified, it will consider all the revs from stop
1889 as if they had no children
1926 as if they had no children
1890 """
1927 """
1891 if start is None and stop is None:
1928 if start is None and stop is None:
1892 if not len(self):
1929 if not len(self):
1893 return [self.nullid]
1930 return [self.nullid]
1894 return [self.node(r) for r in self.headrevs()]
1931 return [self.node(r) for r in self.headrevs()]
1895
1932
1896 if start is None:
1933 if start is None:
1897 start = nullrev
1934 start = nullrev
1898 else:
1935 else:
1899 start = self.rev(start)
1936 start = self.rev(start)
1900
1937
1901 stoprevs = {self.rev(n) for n in stop or []}
1938 stoprevs = {self.rev(n) for n in stop or []}
1902
1939
1903 revs = dagop.headrevssubset(
1940 revs = dagop.headrevssubset(
1904 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1941 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1905 )
1942 )
1906
1943
1907 return [self.node(rev) for rev in revs]
1944 return [self.node(rev) for rev in revs]
1908
1945
1909 def children(self, node):
1946 def children(self, node):
1910 """find the children of a given node"""
1947 """find the children of a given node"""
1911 c = []
1948 c = []
1912 p = self.rev(node)
1949 p = self.rev(node)
1913 for r in self.revs(start=p + 1):
1950 for r in self.revs(start=p + 1):
1914 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1951 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1915 if prevs:
1952 if prevs:
1916 for pr in prevs:
1953 for pr in prevs:
1917 if pr == p:
1954 if pr == p:
1918 c.append(self.node(r))
1955 c.append(self.node(r))
1919 elif p == nullrev:
1956 elif p == nullrev:
1920 c.append(self.node(r))
1957 c.append(self.node(r))
1921 return c
1958 return c
1922
1959
1923 def commonancestorsheads(self, a, b):
1960 def commonancestorsheads(self, a, b):
1924 """calculate all the heads of the common ancestors of nodes a and b"""
1961 """calculate all the heads of the common ancestors of nodes a and b"""
1925 a, b = self.rev(a), self.rev(b)
1962 a, b = self.rev(a), self.rev(b)
1926 ancs = self._commonancestorsheads(a, b)
1963 ancs = self._commonancestorsheads(a, b)
1927 return pycompat.maplist(self.node, ancs)
1964 return pycompat.maplist(self.node, ancs)
1928
1965
1929 def _commonancestorsheads(self, *revs):
1966 def _commonancestorsheads(self, *revs):
1930 """calculate all the heads of the common ancestors of revs"""
1967 """calculate all the heads of the common ancestors of revs"""
1931 try:
1968 try:
1932 ancs = self.index.commonancestorsheads(*revs)
1969 ancs = self.index.commonancestorsheads(*revs)
1933 except (AttributeError, OverflowError): # C implementation failed
1970 except (AttributeError, OverflowError): # C implementation failed
1934 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1971 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1935 return ancs
1972 return ancs
1936
1973
1937 def isancestor(self, a, b):
1974 def isancestor(self, a, b):
1938 """return True if node a is an ancestor of node b
1975 """return True if node a is an ancestor of node b
1939
1976
1940 A revision is considered an ancestor of itself."""
1977 A revision is considered an ancestor of itself."""
1941 a, b = self.rev(a), self.rev(b)
1978 a, b = self.rev(a), self.rev(b)
1942 return self.isancestorrev(a, b)
1979 return self.isancestorrev(a, b)
1943
1980
1944 def isancestorrev(self, a, b):
1981 def isancestorrev(self, a, b):
1945 """return True if revision a is an ancestor of revision b
1982 """return True if revision a is an ancestor of revision b
1946
1983
1947 A revision is considered an ancestor of itself.
1984 A revision is considered an ancestor of itself.
1948
1985
1949 The implementation of this is trivial but the use of
1986 The implementation of this is trivial but the use of
1950 reachableroots is not."""
1987 reachableroots is not."""
1951 if a == nullrev:
1988 if a == nullrev:
1952 return True
1989 return True
1953 elif a == b:
1990 elif a == b:
1954 return True
1991 return True
1955 elif a > b:
1992 elif a > b:
1956 return False
1993 return False
1957 return bool(self.reachableroots(a, [b], [a], includepath=False))
1994 return bool(self.reachableroots(a, [b], [a], includepath=False))
1958
1995
1959 def reachableroots(self, minroot, heads, roots, includepath=False):
1996 def reachableroots(self, minroot, heads, roots, includepath=False):
1960 """return (heads(::(<roots> and <roots>::<heads>)))
1997 """return (heads(::(<roots> and <roots>::<heads>)))
1961
1998
1962 If includepath is True, return (<roots>::<heads>)."""
1999 If includepath is True, return (<roots>::<heads>)."""
1963 try:
2000 try:
1964 return self.index.reachableroots2(
2001 return self.index.reachableroots2(
1965 minroot, heads, roots, includepath
2002 minroot, heads, roots, includepath
1966 )
2003 )
1967 except AttributeError:
2004 except AttributeError:
1968 return dagop._reachablerootspure(
2005 return dagop._reachablerootspure(
1969 self.parentrevs, minroot, roots, heads, includepath
2006 self.parentrevs, minroot, roots, heads, includepath
1970 )
2007 )
1971
2008
1972 def ancestor(self, a, b):
2009 def ancestor(self, a, b):
1973 """calculate the "best" common ancestor of nodes a and b"""
2010 """calculate the "best" common ancestor of nodes a and b"""
1974
2011
1975 a, b = self.rev(a), self.rev(b)
2012 a, b = self.rev(a), self.rev(b)
1976 try:
2013 try:
1977 ancs = self.index.ancestors(a, b)
2014 ancs = self.index.ancestors(a, b)
1978 except (AttributeError, OverflowError):
2015 except (AttributeError, OverflowError):
1979 ancs = ancestor.ancestors(self.parentrevs, a, b)
2016 ancs = ancestor.ancestors(self.parentrevs, a, b)
1980 if ancs:
2017 if ancs:
1981 # choose a consistent winner when there's a tie
2018 # choose a consistent winner when there's a tie
1982 return min(map(self.node, ancs))
2019 return min(map(self.node, ancs))
1983 return self.nullid
2020 return self.nullid
1984
2021
1985 def _match(self, id):
2022 def _match(self, id):
1986 if isinstance(id, int):
2023 if isinstance(id, int):
1987 # rev
2024 # rev
1988 return self.node(id)
2025 return self.node(id)
1989 if len(id) == self.nodeconstants.nodelen:
2026 if len(id) == self.nodeconstants.nodelen:
1990 # possibly a binary node
2027 # possibly a binary node
1991 # odds of a binary node being all hex in ASCII are 1 in 10**25
2028 # odds of a binary node being all hex in ASCII are 1 in 10**25
1992 try:
2029 try:
1993 node = id
2030 node = id
1994 self.rev(node) # quick search the index
2031 self.rev(node) # quick search the index
1995 return node
2032 return node
1996 except error.LookupError:
2033 except error.LookupError:
1997 pass # may be partial hex id
2034 pass # may be partial hex id
1998 try:
2035 try:
1999 # str(rev)
2036 # str(rev)
2000 rev = int(id)
2037 rev = int(id)
2001 if b"%d" % rev != id:
2038 if b"%d" % rev != id:
2002 raise ValueError
2039 raise ValueError
2003 if rev < 0:
2040 if rev < 0:
2004 rev = len(self) + rev
2041 rev = len(self) + rev
2005 if rev < 0 or rev >= len(self):
2042 if rev < 0 or rev >= len(self):
2006 raise ValueError
2043 raise ValueError
2007 return self.node(rev)
2044 return self.node(rev)
2008 except (ValueError, OverflowError):
2045 except (ValueError, OverflowError):
2009 pass
2046 pass
2010 if len(id) == 2 * self.nodeconstants.nodelen:
2047 if len(id) == 2 * self.nodeconstants.nodelen:
2011 try:
2048 try:
2012 # a full hex nodeid?
2049 # a full hex nodeid?
2013 node = bin(id)
2050 node = bin(id)
2014 self.rev(node)
2051 self.rev(node)
2015 return node
2052 return node
2016 except (binascii.Error, error.LookupError):
2053 except (binascii.Error, error.LookupError):
2017 pass
2054 pass
2018
2055
2019 def _partialmatch(self, id):
2056 def _partialmatch(self, id):
2020 # we don't care wdirfilenodeids as they should be always full hash
2057 # we don't care wdirfilenodeids as they should be always full hash
2021 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2058 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2022 ambiguous = False
2059 ambiguous = False
2023 try:
2060 try:
2024 partial = self.index.partialmatch(id)
2061 partial = self.index.partialmatch(id)
2025 if partial and self.hasnode(partial):
2062 if partial and self.hasnode(partial):
2026 if maybewdir:
2063 if maybewdir:
2027 # single 'ff...' match in radix tree, ambiguous with wdir
2064 # single 'ff...' match in radix tree, ambiguous with wdir
2028 ambiguous = True
2065 ambiguous = True
2029 else:
2066 else:
2030 return partial
2067 return partial
2031 elif maybewdir:
2068 elif maybewdir:
2032 # no 'ff...' match in radix tree, wdir identified
2069 # no 'ff...' match in radix tree, wdir identified
2033 raise error.WdirUnsupported
2070 raise error.WdirUnsupported
2034 else:
2071 else:
2035 return None
2072 return None
2036 except error.RevlogError:
2073 except error.RevlogError:
2037 # parsers.c radix tree lookup gave multiple matches
2074 # parsers.c radix tree lookup gave multiple matches
2038 # fast path: for unfiltered changelog, radix tree is accurate
2075 # fast path: for unfiltered changelog, radix tree is accurate
2039 if not getattr(self, 'filteredrevs', None):
2076 if not getattr(self, 'filteredrevs', None):
2040 ambiguous = True
2077 ambiguous = True
2041 # fall through to slow path that filters hidden revisions
2078 # fall through to slow path that filters hidden revisions
2042 except (AttributeError, ValueError):
2079 except (AttributeError, ValueError):
2043 # we are pure python, or key is not hex
2080 # we are pure python, or key is not hex
2044 pass
2081 pass
2045 if ambiguous:
2082 if ambiguous:
2046 raise error.AmbiguousPrefixLookupError(
2083 raise error.AmbiguousPrefixLookupError(
2047 id, self.display_id, _(b'ambiguous identifier')
2084 id, self.display_id, _(b'ambiguous identifier')
2048 )
2085 )
2049
2086
2050 if id in self._pcache:
2087 if id in self._pcache:
2051 return self._pcache[id]
2088 return self._pcache[id]
2052
2089
2053 if len(id) <= 40:
2090 if len(id) <= 40:
2054 # hex(node)[:...]
2091 # hex(node)[:...]
2055 l = len(id) // 2 * 2 # grab an even number of digits
2092 l = len(id) // 2 * 2 # grab an even number of digits
2056 try:
2093 try:
2057 # we're dropping the last digit, so let's check that it's hex,
2094 # we're dropping the last digit, so let's check that it's hex,
2058 # to avoid the expensive computation below if it's not
2095 # to avoid the expensive computation below if it's not
2059 if len(id) % 2 > 0:
2096 if len(id) % 2 > 0:
2060 if not (id[-1] in hexdigits):
2097 if not (id[-1] in hexdigits):
2061 return None
2098 return None
2062 prefix = bin(id[:l])
2099 prefix = bin(id[:l])
2063 except binascii.Error:
2100 except binascii.Error:
2064 pass
2101 pass
2065 else:
2102 else:
2066 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2103 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2067 nl = [
2104 nl = [
2068 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2105 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2069 ]
2106 ]
2070 if self.nodeconstants.nullhex.startswith(id):
2107 if self.nodeconstants.nullhex.startswith(id):
2071 nl.append(self.nullid)
2108 nl.append(self.nullid)
2072 if len(nl) > 0:
2109 if len(nl) > 0:
2073 if len(nl) == 1 and not maybewdir:
2110 if len(nl) == 1 and not maybewdir:
2074 self._pcache[id] = nl[0]
2111 self._pcache[id] = nl[0]
2075 return nl[0]
2112 return nl[0]
2076 raise error.AmbiguousPrefixLookupError(
2113 raise error.AmbiguousPrefixLookupError(
2077 id, self.display_id, _(b'ambiguous identifier')
2114 id, self.display_id, _(b'ambiguous identifier')
2078 )
2115 )
2079 if maybewdir:
2116 if maybewdir:
2080 raise error.WdirUnsupported
2117 raise error.WdirUnsupported
2081 return None
2118 return None
2082
2119
2083 def lookup(self, id):
2120 def lookup(self, id):
2084 """locate a node based on:
2121 """locate a node based on:
2085 - revision number or str(revision number)
2122 - revision number or str(revision number)
2086 - nodeid or subset of hex nodeid
2123 - nodeid or subset of hex nodeid
2087 """
2124 """
2088 n = self._match(id)
2125 n = self._match(id)
2089 if n is not None:
2126 if n is not None:
2090 return n
2127 return n
2091 n = self._partialmatch(id)
2128 n = self._partialmatch(id)
2092 if n:
2129 if n:
2093 return n
2130 return n
2094
2131
2095 raise error.LookupError(id, self.display_id, _(b'no match found'))
2132 raise error.LookupError(id, self.display_id, _(b'no match found'))
2096
2133
2097 def shortest(self, node, minlength=1):
2134 def shortest(self, node, minlength=1):
2098 """Find the shortest unambiguous prefix that matches node."""
2135 """Find the shortest unambiguous prefix that matches node."""
2099
2136
2100 def isvalid(prefix):
2137 def isvalid(prefix):
2101 try:
2138 try:
2102 matchednode = self._partialmatch(prefix)
2139 matchednode = self._partialmatch(prefix)
2103 except error.AmbiguousPrefixLookupError:
2140 except error.AmbiguousPrefixLookupError:
2104 return False
2141 return False
2105 except error.WdirUnsupported:
2142 except error.WdirUnsupported:
2106 # single 'ff...' match
2143 # single 'ff...' match
2107 return True
2144 return True
2108 if matchednode is None:
2145 if matchednode is None:
2109 raise error.LookupError(node, self.display_id, _(b'no node'))
2146 raise error.LookupError(node, self.display_id, _(b'no node'))
2110 return True
2147 return True
2111
2148
2112 def maybewdir(prefix):
2149 def maybewdir(prefix):
2113 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2150 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2114
2151
2115 hexnode = hex(node)
2152 hexnode = hex(node)
2116
2153
2117 def disambiguate(hexnode, minlength):
2154 def disambiguate(hexnode, minlength):
2118 """Disambiguate against wdirid."""
2155 """Disambiguate against wdirid."""
2119 for length in range(minlength, len(hexnode) + 1):
2156 for length in range(minlength, len(hexnode) + 1):
2120 prefix = hexnode[:length]
2157 prefix = hexnode[:length]
2121 if not maybewdir(prefix):
2158 if not maybewdir(prefix):
2122 return prefix
2159 return prefix
2123
2160
2124 if not getattr(self, 'filteredrevs', None):
2161 if not getattr(self, 'filteredrevs', None):
2125 try:
2162 try:
2126 length = max(self.index.shortest(node), minlength)
2163 length = max(self.index.shortest(node), minlength)
2127 return disambiguate(hexnode, length)
2164 return disambiguate(hexnode, length)
2128 except error.RevlogError:
2165 except error.RevlogError:
2129 if node != self.nodeconstants.wdirid:
2166 if node != self.nodeconstants.wdirid:
2130 raise error.LookupError(
2167 raise error.LookupError(
2131 node, self.display_id, _(b'no node')
2168 node, self.display_id, _(b'no node')
2132 )
2169 )
2133 except AttributeError:
2170 except AttributeError:
2134 # Fall through to pure code
2171 # Fall through to pure code
2135 pass
2172 pass
2136
2173
2137 if node == self.nodeconstants.wdirid:
2174 if node == self.nodeconstants.wdirid:
2138 for length in range(minlength, len(hexnode) + 1):
2175 for length in range(minlength, len(hexnode) + 1):
2139 prefix = hexnode[:length]
2176 prefix = hexnode[:length]
2140 if isvalid(prefix):
2177 if isvalid(prefix):
2141 return prefix
2178 return prefix
2142
2179
2143 for length in range(minlength, len(hexnode) + 1):
2180 for length in range(minlength, len(hexnode) + 1):
2144 prefix = hexnode[:length]
2181 prefix = hexnode[:length]
2145 if isvalid(prefix):
2182 if isvalid(prefix):
2146 return disambiguate(hexnode, length)
2183 return disambiguate(hexnode, length)
2147
2184
2148 def cmp(self, node, text):
2185 def cmp(self, node, text):
2149 """compare text with a given file revision
2186 """compare text with a given file revision
2150
2187
2151 returns True if text is different than what is stored.
2188 returns True if text is different than what is stored.
2152 """
2189 """
2153 p1, p2 = self.parents(node)
2190 p1, p2 = self.parents(node)
2154 return storageutil.hashrevisionsha1(text, p1, p2) != node
2191 return storageutil.hashrevisionsha1(text, p1, p2) != node
2155
2192
2156 def _getsegmentforrevs(self, startrev, endrev):
2157 """Obtain a segment of raw data corresponding to a range of revisions.
2158
2159 Accepts the start and end revisions and an optional already-open
2160 file handle to be used for reading. If the file handle is read, its
2161 seek position will not be preserved.
2162
2163 Requests for data may be satisfied by a cache.
2164
2165 Returns a 2-tuple of (offset, data) for the requested range of
2166 revisions. Offset is the integer offset from the beginning of the
2167 revlog and data is a str or buffer of the raw byte data.
2168
2169 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2170 to determine where each revision's data begins and ends.
2171 """
2172 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2173 # (functions are expensive).
2174 index = self.index
2175 istart = index[startrev]
2176 start = int(istart[0] >> 16)
2177 if startrev == endrev:
2178 end = start + istart[1]
2179 else:
2180 iend = index[endrev]
2181 end = int(iend[0] >> 16) + iend[1]
2182
2183 if self._inline:
2184 start += (startrev + 1) * self.index.entry_size
2185 end += (endrev + 1) * self.index.entry_size
2186 length = end - start
2187
2188 return start, self._inner._segmentfile.read_chunk(start, length)
2189
2190 def _chunk(self, rev):
2193 def _chunk(self, rev):
2191 """Obtain a single decompressed chunk for a revision.
2194 """Obtain a single decompressed chunk for a revision.
2192
2195
2193 Accepts an integer revision and an optional already-open file handle
2196 Accepts an integer revision and an optional already-open file handle
2194 to be used for reading. If used, the seek position of the file will not
2197 to be used for reading. If used, the seek position of the file will not
2195 be preserved.
2198 be preserved.
2196
2199
2197 Returns a str holding uncompressed data for the requested revision.
2200 Returns a str holding uncompressed data for the requested revision.
2198 """
2201 """
2199 compression_mode = self.index[rev][10]
2202 compression_mode = self.index[rev][10]
2200 data = self._getsegmentforrevs(rev, rev)[1]
2203 data = self._inner.get_segment_for_revs(rev, rev)[1]
2201 if compression_mode == COMP_MODE_PLAIN:
2204 if compression_mode == COMP_MODE_PLAIN:
2202 return data
2205 return data
2203 elif compression_mode == COMP_MODE_DEFAULT:
2206 elif compression_mode == COMP_MODE_DEFAULT:
2204 return self._decompressor(data)
2207 return self._decompressor(data)
2205 elif compression_mode == COMP_MODE_INLINE:
2208 elif compression_mode == COMP_MODE_INLINE:
2206 return self.decompress(data)
2209 return self.decompress(data)
2207 else:
2210 else:
2208 msg = b'unknown compression mode %d'
2211 msg = b'unknown compression mode %d'
2209 msg %= compression_mode
2212 msg %= compression_mode
2210 raise error.RevlogError(msg)
2213 raise error.RevlogError(msg)
2211
2214
2212 def _chunks(self, revs, targetsize=None):
2215 def _chunks(self, revs, targetsize=None):
2213 """Obtain decompressed chunks for the specified revisions.
2216 """Obtain decompressed chunks for the specified revisions.
2214
2217
2215 Accepts an iterable of numeric revisions that are assumed to be in
2218 Accepts an iterable of numeric revisions that are assumed to be in
2216 ascending order. Also accepts an optional already-open file handle
2219 ascending order. Also accepts an optional already-open file handle
2217 to be used for reading. If used, the seek position of the file will
2220 to be used for reading. If used, the seek position of the file will
2218 not be preserved.
2221 not be preserved.
2219
2222
2220 This function is similar to calling ``self._chunk()`` multiple times,
2223 This function is similar to calling ``self._chunk()`` multiple times,
2221 but is faster.
2224 but is faster.
2222
2225
2223 Returns a list with decompressed data for each requested revision.
2226 Returns a list with decompressed data for each requested revision.
2224 """
2227 """
2225 if not revs:
2228 if not revs:
2226 return []
2229 return []
2227 start = self.start
2230 start = self.start
2228 length = self.length
2231 length = self.length
2229 inline = self._inline
2232 inline = self._inline
2230 iosize = self.index.entry_size
2233 iosize = self.index.entry_size
2231 buffer = util.buffer
2234 buffer = util.buffer
2232
2235
2233 l = []
2236 l = []
2234 ladd = l.append
2237 ladd = l.append
2235
2238
2236 if not self.data_config.with_sparse_read:
2239 if not self.data_config.with_sparse_read:
2237 slicedchunks = (revs,)
2240 slicedchunks = (revs,)
2238 else:
2241 else:
2239 slicedchunks = deltautil.slicechunk(
2242 slicedchunks = deltautil.slicechunk(
2240 self, revs, targetsize=targetsize
2243 self, revs, targetsize=targetsize
2241 )
2244 )
2242
2245
2243 for revschunk in slicedchunks:
2246 for revschunk in slicedchunks:
2244 firstrev = revschunk[0]
2247 firstrev = revschunk[0]
2245 # Skip trailing revisions with empty diff
2248 # Skip trailing revisions with empty diff
2246 for lastrev in revschunk[::-1]:
2249 for lastrev in revschunk[::-1]:
2247 if length(lastrev) != 0:
2250 if length(lastrev) != 0:
2248 break
2251 break
2249
2252
2250 try:
2253 try:
2251 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2254 offset, data = self._inner.get_segment_for_revs(
2255 firstrev,
2256 lastrev,
2257 )
2252 except OverflowError:
2258 except OverflowError:
2253 # issue4215 - we can't cache a run of chunks greater than
2259 # issue4215 - we can't cache a run of chunks greater than
2254 # 2G on Windows
2260 # 2G on Windows
2255 return [self._chunk(rev) for rev in revschunk]
2261 return [self._chunk(rev) for rev in revschunk]
2256
2262
2257 decomp = self.decompress
2263 decomp = self.decompress
2258 # self._decompressor might be None, but will not be used in that case
2264 # self._decompressor might be None, but will not be used in that case
2259 def_decomp = self._decompressor
2265 def_decomp = self._decompressor
2260 for rev in revschunk:
2266 for rev in revschunk:
2261 chunkstart = start(rev)
2267 chunkstart = start(rev)
2262 if inline:
2268 if inline:
2263 chunkstart += (rev + 1) * iosize
2269 chunkstart += (rev + 1) * iosize
2264 chunklength = length(rev)
2270 chunklength = length(rev)
2265 comp_mode = self.index[rev][10]
2271 comp_mode = self.index[rev][10]
2266 c = buffer(data, chunkstart - offset, chunklength)
2272 c = buffer(data, chunkstart - offset, chunklength)
2267 if comp_mode == COMP_MODE_PLAIN:
2273 if comp_mode == COMP_MODE_PLAIN:
2268 ladd(c)
2274 ladd(c)
2269 elif comp_mode == COMP_MODE_INLINE:
2275 elif comp_mode == COMP_MODE_INLINE:
2270 ladd(decomp(c))
2276 ladd(decomp(c))
2271 elif comp_mode == COMP_MODE_DEFAULT:
2277 elif comp_mode == COMP_MODE_DEFAULT:
2272 ladd(def_decomp(c))
2278 ladd(def_decomp(c))
2273 else:
2279 else:
2274 msg = b'unknown compression mode %d'
2280 msg = b'unknown compression mode %d'
2275 msg %= comp_mode
2281 msg %= comp_mode
2276 raise error.RevlogError(msg)
2282 raise error.RevlogError(msg)
2277
2283
2278 return l
2284 return l
2279
2285
2280 def deltaparent(self, rev):
2286 def deltaparent(self, rev):
2281 """return deltaparent of the given revision"""
2287 """return deltaparent of the given revision"""
2282 base = self.index[rev][3]
2288 base = self.index[rev][3]
2283 if base == rev:
2289 if base == rev:
2284 return nullrev
2290 return nullrev
2285 elif self.delta_config.general_delta:
2291 elif self.delta_config.general_delta:
2286 return base
2292 return base
2287 else:
2293 else:
2288 return rev - 1
2294 return rev - 1
2289
2295
2290 def issnapshot(self, rev):
2296 def issnapshot(self, rev):
2291 """tells whether rev is a snapshot"""
2297 """tells whether rev is a snapshot"""
2292 if not self.delta_config.sparse_revlog:
2298 if not self.delta_config.sparse_revlog:
2293 return self.deltaparent(rev) == nullrev
2299 return self.deltaparent(rev) == nullrev
2294 elif hasattr(self.index, 'issnapshot'):
2300 elif hasattr(self.index, 'issnapshot'):
2295 # directly assign the method to cache the testing and access
2301 # directly assign the method to cache the testing and access
2296 self.issnapshot = self.index.issnapshot
2302 self.issnapshot = self.index.issnapshot
2297 return self.issnapshot(rev)
2303 return self.issnapshot(rev)
2298 if rev == nullrev:
2304 if rev == nullrev:
2299 return True
2305 return True
2300 entry = self.index[rev]
2306 entry = self.index[rev]
2301 base = entry[3]
2307 base = entry[3]
2302 if base == rev:
2308 if base == rev:
2303 return True
2309 return True
2304 if base == nullrev:
2310 if base == nullrev:
2305 return True
2311 return True
2306 p1 = entry[5]
2312 p1 = entry[5]
2307 while self.length(p1) == 0:
2313 while self.length(p1) == 0:
2308 b = self.deltaparent(p1)
2314 b = self.deltaparent(p1)
2309 if b == p1:
2315 if b == p1:
2310 break
2316 break
2311 p1 = b
2317 p1 = b
2312 p2 = entry[6]
2318 p2 = entry[6]
2313 while self.length(p2) == 0:
2319 while self.length(p2) == 0:
2314 b = self.deltaparent(p2)
2320 b = self.deltaparent(p2)
2315 if b == p2:
2321 if b == p2:
2316 break
2322 break
2317 p2 = b
2323 p2 = b
2318 if base == p1 or base == p2:
2324 if base == p1 or base == p2:
2319 return False
2325 return False
2320 return self.issnapshot(base)
2326 return self.issnapshot(base)
2321
2327
2322 def snapshotdepth(self, rev):
2328 def snapshotdepth(self, rev):
2323 """number of snapshot in the chain before this one"""
2329 """number of snapshot in the chain before this one"""
2324 if not self.issnapshot(rev):
2330 if not self.issnapshot(rev):
2325 raise error.ProgrammingError(b'revision %d not a snapshot')
2331 raise error.ProgrammingError(b'revision %d not a snapshot')
2326 return len(self._deltachain(rev)[0]) - 1
2332 return len(self._deltachain(rev)[0]) - 1
2327
2333
2328 def revdiff(self, rev1, rev2):
2334 def revdiff(self, rev1, rev2):
2329 """return or calculate a delta between two revisions
2335 """return or calculate a delta between two revisions
2330
2336
2331 The delta calculated is in binary form and is intended to be written to
2337 The delta calculated is in binary form and is intended to be written to
2332 revlog data directly. So this function needs raw revision data.
2338 revlog data directly. So this function needs raw revision data.
2333 """
2339 """
2334 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2340 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2335 return bytes(self._chunk(rev2))
2341 return bytes(self._chunk(rev2))
2336
2342
2337 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2343 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2338
2344
2339 def revision(self, nodeorrev):
2345 def revision(self, nodeorrev):
2340 """return an uncompressed revision of a given node or revision
2346 """return an uncompressed revision of a given node or revision
2341 number.
2347 number.
2342 """
2348 """
2343 return self._revisiondata(nodeorrev)
2349 return self._revisiondata(nodeorrev)
2344
2350
2345 def sidedata(self, nodeorrev):
2351 def sidedata(self, nodeorrev):
2346 """a map of extra data related to the changeset but not part of the hash
2352 """a map of extra data related to the changeset but not part of the hash
2347
2353
2348 This function currently return a dictionary. However, more advanced
2354 This function currently return a dictionary. However, more advanced
2349 mapping object will likely be used in the future for a more
2355 mapping object will likely be used in the future for a more
2350 efficient/lazy code.
2356 efficient/lazy code.
2351 """
2357 """
2352 # deal with <nodeorrev> argument type
2358 # deal with <nodeorrev> argument type
2353 if isinstance(nodeorrev, int):
2359 if isinstance(nodeorrev, int):
2354 rev = nodeorrev
2360 rev = nodeorrev
2355 else:
2361 else:
2356 rev = self.rev(nodeorrev)
2362 rev = self.rev(nodeorrev)
2357 return self._sidedata(rev)
2363 return self._sidedata(rev)
2358
2364
2359 def _revisiondata(self, nodeorrev, raw=False):
2365 def _revisiondata(self, nodeorrev, raw=False):
2360 # deal with <nodeorrev> argument type
2366 # deal with <nodeorrev> argument type
2361 if isinstance(nodeorrev, int):
2367 if isinstance(nodeorrev, int):
2362 rev = nodeorrev
2368 rev = nodeorrev
2363 node = self.node(rev)
2369 node = self.node(rev)
2364 else:
2370 else:
2365 node = nodeorrev
2371 node = nodeorrev
2366 rev = None
2372 rev = None
2367
2373
2368 # fast path the special `nullid` rev
2374 # fast path the special `nullid` rev
2369 if node == self.nullid:
2375 if node == self.nullid:
2370 return b""
2376 return b""
2371
2377
2372 # ``rawtext`` is the text as stored inside the revlog. Might be the
2378 # ``rawtext`` is the text as stored inside the revlog. Might be the
2373 # revision or might need to be processed to retrieve the revision.
2379 # revision or might need to be processed to retrieve the revision.
2374 rev, rawtext, validated = self._rawtext(node, rev)
2380 rev, rawtext, validated = self._rawtext(node, rev)
2375
2381
2376 if raw and validated:
2382 if raw and validated:
2377 # if we don't want to process the raw text and that raw
2383 # if we don't want to process the raw text and that raw
2378 # text is cached, we can exit early.
2384 # text is cached, we can exit early.
2379 return rawtext
2385 return rawtext
2380 if rev is None:
2386 if rev is None:
2381 rev = self.rev(node)
2387 rev = self.rev(node)
2382 # the revlog's flag for this revision
2388 # the revlog's flag for this revision
2383 # (usually alter its state or content)
2389 # (usually alter its state or content)
2384 flags = self.flags(rev)
2390 flags = self.flags(rev)
2385
2391
2386 if validated and flags == REVIDX_DEFAULT_FLAGS:
2392 if validated and flags == REVIDX_DEFAULT_FLAGS:
2387 # no extra flags set, no flag processor runs, text = rawtext
2393 # no extra flags set, no flag processor runs, text = rawtext
2388 return rawtext
2394 return rawtext
2389
2395
2390 if raw:
2396 if raw:
2391 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2397 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2392 text = rawtext
2398 text = rawtext
2393 else:
2399 else:
2394 r = flagutil.processflagsread(self, rawtext, flags)
2400 r = flagutil.processflagsread(self, rawtext, flags)
2395 text, validatehash = r
2401 text, validatehash = r
2396 if validatehash:
2402 if validatehash:
2397 self.checkhash(text, node, rev=rev)
2403 self.checkhash(text, node, rev=rev)
2398 if not validated:
2404 if not validated:
2399 self._revisioncache = (node, rev, rawtext)
2405 self._revisioncache = (node, rev, rawtext)
2400
2406
2401 return text
2407 return text
2402
2408
2403 def _rawtext(self, node, rev):
2409 def _rawtext(self, node, rev):
2404 """return the possibly unvalidated rawtext for a revision
2410 """return the possibly unvalidated rawtext for a revision
2405
2411
2406 returns (rev, rawtext, validated)
2412 returns (rev, rawtext, validated)
2407 """
2413 """
2408
2414
2409 # revision in the cache (could be useful to apply delta)
2415 # revision in the cache (could be useful to apply delta)
2410 cachedrev = None
2416 cachedrev = None
2411 # An intermediate text to apply deltas to
2417 # An intermediate text to apply deltas to
2412 basetext = None
2418 basetext = None
2413
2419
2414 # Check if we have the entry in cache
2420 # Check if we have the entry in cache
2415 # The cache entry looks like (node, rev, rawtext)
2421 # The cache entry looks like (node, rev, rawtext)
2416 if self._revisioncache:
2422 if self._revisioncache:
2417 if self._revisioncache[0] == node:
2423 if self._revisioncache[0] == node:
2418 return (rev, self._revisioncache[2], True)
2424 return (rev, self._revisioncache[2], True)
2419 cachedrev = self._revisioncache[1]
2425 cachedrev = self._revisioncache[1]
2420
2426
2421 if rev is None:
2427 if rev is None:
2422 rev = self.rev(node)
2428 rev = self.rev(node)
2423
2429
2424 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2430 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2425 if stopped:
2431 if stopped:
2426 basetext = self._revisioncache[2]
2432 basetext = self._revisioncache[2]
2427
2433
2428 # drop cache to save memory, the caller is expected to
2434 # drop cache to save memory, the caller is expected to
2429 # update self._revisioncache after validating the text
2435 # update self._revisioncache after validating the text
2430 self._revisioncache = None
2436 self._revisioncache = None
2431
2437
2432 targetsize = None
2438 targetsize = None
2433 rawsize = self.index[rev][2]
2439 rawsize = self.index[rev][2]
2434 if 0 <= rawsize:
2440 if 0 <= rawsize:
2435 targetsize = 4 * rawsize
2441 targetsize = 4 * rawsize
2436
2442
2437 bins = self._chunks(chain, targetsize=targetsize)
2443 bins = self._chunks(chain, targetsize=targetsize)
2438 if basetext is None:
2444 if basetext is None:
2439 basetext = bytes(bins[0])
2445 basetext = bytes(bins[0])
2440 bins = bins[1:]
2446 bins = bins[1:]
2441
2447
2442 rawtext = mdiff.patches(basetext, bins)
2448 rawtext = mdiff.patches(basetext, bins)
2443 del basetext # let us have a chance to free memory early
2449 del basetext # let us have a chance to free memory early
2444 return (rev, rawtext, False)
2450 return (rev, rawtext, False)
2445
2451
2446 def _sidedata(self, rev):
2452 def _sidedata(self, rev):
2447 """Return the sidedata for a given revision number."""
2453 """Return the sidedata for a given revision number."""
2448 index_entry = self.index[rev]
2454 index_entry = self.index[rev]
2449 sidedata_offset = index_entry[8]
2455 sidedata_offset = index_entry[8]
2450 sidedata_size = index_entry[9]
2456 sidedata_size = index_entry[9]
2451
2457
2452 if self._inline:
2458 if self._inline:
2453 sidedata_offset += self.index.entry_size * (1 + rev)
2459 sidedata_offset += self.index.entry_size * (1 + rev)
2454 if sidedata_size == 0:
2460 if sidedata_size == 0:
2455 return {}
2461 return {}
2456
2462
2457 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2463 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2458 filename = self._sidedatafile
2464 filename = self._sidedatafile
2459 end = self._docket.sidedata_end
2465 end = self._docket.sidedata_end
2460 offset = sidedata_offset
2466 offset = sidedata_offset
2461 length = sidedata_size
2467 length = sidedata_size
2462 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2468 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2463 raise error.RevlogError(m)
2469 raise error.RevlogError(m)
2464
2470
2465 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2471 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2466 sidedata_offset, sidedata_size
2472 sidedata_offset, sidedata_size
2467 )
2473 )
2468
2474
2469 comp = self.index[rev][11]
2475 comp = self.index[rev][11]
2470 if comp == COMP_MODE_PLAIN:
2476 if comp == COMP_MODE_PLAIN:
2471 segment = comp_segment
2477 segment = comp_segment
2472 elif comp == COMP_MODE_DEFAULT:
2478 elif comp == COMP_MODE_DEFAULT:
2473 segment = self._decompressor(comp_segment)
2479 segment = self._decompressor(comp_segment)
2474 elif comp == COMP_MODE_INLINE:
2480 elif comp == COMP_MODE_INLINE:
2475 segment = self.decompress(comp_segment)
2481 segment = self.decompress(comp_segment)
2476 else:
2482 else:
2477 msg = b'unknown compression mode %d'
2483 msg = b'unknown compression mode %d'
2478 msg %= comp
2484 msg %= comp
2479 raise error.RevlogError(msg)
2485 raise error.RevlogError(msg)
2480
2486
2481 sidedata = sidedatautil.deserialize_sidedata(segment)
2487 sidedata = sidedatautil.deserialize_sidedata(segment)
2482 return sidedata
2488 return sidedata
2483
2489
2484 def rawdata(self, nodeorrev):
2490 def rawdata(self, nodeorrev):
2485 """return an uncompressed raw data of a given node or revision number."""
2491 """return an uncompressed raw data of a given node or revision number."""
2486 return self._revisiondata(nodeorrev, raw=True)
2492 return self._revisiondata(nodeorrev, raw=True)
2487
2493
2488 def hash(self, text, p1, p2):
2494 def hash(self, text, p1, p2):
2489 """Compute a node hash.
2495 """Compute a node hash.
2490
2496
2491 Available as a function so that subclasses can replace the hash
2497 Available as a function so that subclasses can replace the hash
2492 as needed.
2498 as needed.
2493 """
2499 """
2494 return storageutil.hashrevisionsha1(text, p1, p2)
2500 return storageutil.hashrevisionsha1(text, p1, p2)
2495
2501
2496 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2502 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2497 """Check node hash integrity.
2503 """Check node hash integrity.
2498
2504
2499 Available as a function so that subclasses can extend hash mismatch
2505 Available as a function so that subclasses can extend hash mismatch
2500 behaviors as needed.
2506 behaviors as needed.
2501 """
2507 """
2502 try:
2508 try:
2503 if p1 is None and p2 is None:
2509 if p1 is None and p2 is None:
2504 p1, p2 = self.parents(node)
2510 p1, p2 = self.parents(node)
2505 if node != self.hash(text, p1, p2):
2511 if node != self.hash(text, p1, p2):
2506 # Clear the revision cache on hash failure. The revision cache
2512 # Clear the revision cache on hash failure. The revision cache
2507 # only stores the raw revision and clearing the cache does have
2513 # only stores the raw revision and clearing the cache does have
2508 # the side-effect that we won't have a cache hit when the raw
2514 # the side-effect that we won't have a cache hit when the raw
2509 # revision data is accessed. But this case should be rare and
2515 # revision data is accessed. But this case should be rare and
2510 # it is extra work to teach the cache about the hash
2516 # it is extra work to teach the cache about the hash
2511 # verification state.
2517 # verification state.
2512 if self._revisioncache and self._revisioncache[0] == node:
2518 if self._revisioncache and self._revisioncache[0] == node:
2513 self._revisioncache = None
2519 self._revisioncache = None
2514
2520
2515 revornode = rev
2521 revornode = rev
2516 if revornode is None:
2522 if revornode is None:
2517 revornode = templatefilters.short(hex(node))
2523 revornode = templatefilters.short(hex(node))
2518 raise error.RevlogError(
2524 raise error.RevlogError(
2519 _(b"integrity check failed on %s:%s")
2525 _(b"integrity check failed on %s:%s")
2520 % (self.display_id, pycompat.bytestr(revornode))
2526 % (self.display_id, pycompat.bytestr(revornode))
2521 )
2527 )
2522 except error.RevlogError:
2528 except error.RevlogError:
2523 if self.feature_config.censorable and storageutil.iscensoredtext(
2529 if self.feature_config.censorable and storageutil.iscensoredtext(
2524 text
2530 text
2525 ):
2531 ):
2526 raise error.CensoredNodeError(self.display_id, node, text)
2532 raise error.CensoredNodeError(self.display_id, node, text)
2527 raise
2533 raise
2528
2534
2529 @property
2535 @property
2530 def _split_index_file(self):
2536 def _split_index_file(self):
2531 """the path where to expect the index of an ongoing splitting operation
2537 """the path where to expect the index of an ongoing splitting operation
2532
2538
2533 The file will only exist if a splitting operation is in progress, but
2539 The file will only exist if a splitting operation is in progress, but
2534 it is always expected at the same location."""
2540 it is always expected at the same location."""
2535 parts = self.radix.split(b'/')
2541 parts = self.radix.split(b'/')
2536 if len(parts) > 1:
2542 if len(parts) > 1:
2537 # adds a '-s' prefix to the ``data/` or `meta/` base
2543 # adds a '-s' prefix to the ``data/` or `meta/` base
2538 head = parts[0] + b'-s'
2544 head = parts[0] + b'-s'
2539 mids = parts[1:-1]
2545 mids = parts[1:-1]
2540 tail = parts[-1] + b'.i'
2546 tail = parts[-1] + b'.i'
2541 pieces = [head] + mids + [tail]
2547 pieces = [head] + mids + [tail]
2542 return b'/'.join(pieces)
2548 return b'/'.join(pieces)
2543 else:
2549 else:
2544 # the revlog is stored at the root of the store (changelog or
2550 # the revlog is stored at the root of the store (changelog or
2545 # manifest), no risk of collision.
2551 # manifest), no risk of collision.
2546 return self.radix + b'.i.s'
2552 return self.radix + b'.i.s'
2547
2553
2548 def _enforceinlinesize(self, tr, side_write=True):
2554 def _enforceinlinesize(self, tr, side_write=True):
2549 """Check if the revlog is too big for inline and convert if so.
2555 """Check if the revlog is too big for inline and convert if so.
2550
2556
2551 This should be called after revisions are added to the revlog. If the
2557 This should be called after revisions are added to the revlog. If the
2552 revlog has grown too large to be an inline revlog, it will convert it
2558 revlog has grown too large to be an inline revlog, it will convert it
2553 to use multiple index and data files.
2559 to use multiple index and data files.
2554 """
2560 """
2555 tiprev = len(self) - 1
2561 tiprev = len(self) - 1
2556 total_size = self.start(tiprev) + self.length(tiprev)
2562 total_size = self.start(tiprev) + self.length(tiprev)
2557 if not self._inline or total_size < _maxinline:
2563 if not self._inline or total_size < _maxinline:
2558 return
2564 return
2559
2565
2560 troffset = tr.findoffset(self._indexfile)
2566 troffset = tr.findoffset(self._indexfile)
2561 if troffset is None:
2567 if troffset is None:
2562 raise error.RevlogError(
2568 raise error.RevlogError(
2563 _(b"%s not found in the transaction") % self._indexfile
2569 _(b"%s not found in the transaction") % self._indexfile
2564 )
2570 )
2565 if troffset:
2571 if troffset:
2566 tr.addbackup(self._indexfile, for_offset=True)
2572 tr.addbackup(self._indexfile, for_offset=True)
2567 tr.add(self._datafile, 0)
2573 tr.add(self._datafile, 0)
2568
2574
2569 existing_handles = False
2575 existing_handles = False
2570 if self._inner._writinghandles is not None:
2576 if self._inner._writinghandles is not None:
2571 existing_handles = True
2577 existing_handles = True
2572 fp = self._inner._writinghandles[0]
2578 fp = self._inner._writinghandles[0]
2573 fp.flush()
2579 fp.flush()
2574 fp.close()
2580 fp.close()
2575 # We can't use the cached file handle after close(). So prevent
2581 # We can't use the cached file handle after close(). So prevent
2576 # its usage.
2582 # its usage.
2577 self._inner._writinghandles = None
2583 self._inner._writinghandles = None
2578 self._inner._segmentfile.writing_handle = None
2584 self._inner._segmentfile.writing_handle = None
2579 # No need to deal with sidedata writing handle as it is only
2585 # No need to deal with sidedata writing handle as it is only
2580 # relevant with revlog-v2 which is never inline, not reaching
2586 # relevant with revlog-v2 which is never inline, not reaching
2581 # this code
2587 # this code
2582 if side_write:
2588 if side_write:
2583 old_index_file_path = self._indexfile
2589 old_index_file_path = self._indexfile
2584 new_index_file_path = self._split_index_file
2590 new_index_file_path = self._split_index_file
2585 opener = self.opener
2591 opener = self.opener
2586 weak_self = weakref.ref(self)
2592 weak_self = weakref.ref(self)
2587
2593
2588 # the "split" index replace the real index when the transaction is finalized
2594 # the "split" index replace the real index when the transaction is finalized
2589 def finalize_callback(tr):
2595 def finalize_callback(tr):
2590 opener.rename(
2596 opener.rename(
2591 new_index_file_path,
2597 new_index_file_path,
2592 old_index_file_path,
2598 old_index_file_path,
2593 checkambig=True,
2599 checkambig=True,
2594 )
2600 )
2595 maybe_self = weak_self()
2601 maybe_self = weak_self()
2596 if maybe_self is not None:
2602 if maybe_self is not None:
2597 maybe_self._indexfile = old_index_file_path
2603 maybe_self._indexfile = old_index_file_path
2598 maybe_self._inner.index_file = maybe_self._indexfile
2604 maybe_self._inner.index_file = maybe_self._indexfile
2599
2605
2600 def abort_callback(tr):
2606 def abort_callback(tr):
2601 maybe_self = weak_self()
2607 maybe_self = weak_self()
2602 if maybe_self is not None:
2608 if maybe_self is not None:
2603 maybe_self._indexfile = old_index_file_path
2609 maybe_self._indexfile = old_index_file_path
2604 maybe_self._inner.index_file = old_index_file_path
2610 maybe_self._inner.index_file = old_index_file_path
2605
2611
2606 tr.registertmp(new_index_file_path)
2612 tr.registertmp(new_index_file_path)
2607 if self.target[1] is not None:
2613 if self.target[1] is not None:
2608 callback_id = b'000-revlog-split-%d-%s' % self.target
2614 callback_id = b'000-revlog-split-%d-%s' % self.target
2609 else:
2615 else:
2610 callback_id = b'000-revlog-split-%d' % self.target[0]
2616 callback_id = b'000-revlog-split-%d' % self.target[0]
2611 tr.addfinalize(callback_id, finalize_callback)
2617 tr.addfinalize(callback_id, finalize_callback)
2612 tr.addabort(callback_id, abort_callback)
2618 tr.addabort(callback_id, abort_callback)
2613
2619
2614 new_dfh = self._datafp(b'w+')
2620 new_dfh = self._datafp(b'w+')
2615 new_dfh.truncate(0) # drop any potentially existing data
2621 new_dfh.truncate(0) # drop any potentially existing data
2616 try:
2622 try:
2617 with self.reading():
2623 with self.reading():
2618 for r in self:
2624 for r in self:
2619 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2625 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2620 new_dfh.flush()
2626 new_dfh.flush()
2621
2627
2622 if side_write:
2628 if side_write:
2623 self._indexfile = new_index_file_path
2629 self._indexfile = new_index_file_path
2624 self._inner.index_file = self._indexfile
2630 self._inner.index_file = self._indexfile
2625 with self._inner._InnerRevlog__index_new_fp() as fp:
2631 with self._inner._InnerRevlog__index_new_fp() as fp:
2626 self._format_flags &= ~FLAG_INLINE_DATA
2632 self._format_flags &= ~FLAG_INLINE_DATA
2627 self._inline = False
2633 self._inline = False
2628 self._inner.inline = False
2634 self._inner.inline = False
2629 for i in self:
2635 for i in self:
2630 e = self.index.entry_binary(i)
2636 e = self.index.entry_binary(i)
2631 if i == 0 and self._docket is None:
2637 if i == 0 and self._docket is None:
2632 header = self._format_flags | self._format_version
2638 header = self._format_flags | self._format_version
2633 header = self.index.pack_header(header)
2639 header = self.index.pack_header(header)
2634 e = header + e
2640 e = header + e
2635 fp.write(e)
2641 fp.write(e)
2636 if self._docket is not None:
2642 if self._docket is not None:
2637 self._docket.index_end = fp.tell()
2643 self._docket.index_end = fp.tell()
2638
2644
2639 # If we don't use side-write, the temp file replace the real
2645 # If we don't use side-write, the temp file replace the real
2640 # index when we exit the context manager
2646 # index when we exit the context manager
2641
2647
2642 nodemaputil.setup_persistent_nodemap(tr, self)
2648 nodemaputil.setup_persistent_nodemap(tr, self)
2643 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2649 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2644 self.opener,
2650 self.opener,
2645 self._datafile,
2651 self._datafile,
2646 self.data_config.chunk_cache_size,
2652 self.data_config.chunk_cache_size,
2647 )
2653 )
2648
2654
2649 if existing_handles:
2655 if existing_handles:
2650 # switched from inline to conventional reopen the index
2656 # switched from inline to conventional reopen the index
2651 index_end = None
2657 index_end = None
2652 if self._docket is not None:
2658 if self._docket is not None:
2653 index_end = self._docket.index_end
2659 index_end = self._docket.index_end
2654 ifh = self._inner._InnerRevlog__index_write_fp(
2660 ifh = self._inner._InnerRevlog__index_write_fp(
2655 index_end=index_end
2661 index_end=index_end
2656 )
2662 )
2657 self._inner._writinghandles = (ifh, new_dfh, None)
2663 self._inner._writinghandles = (ifh, new_dfh, None)
2658 self._inner._segmentfile.writing_handle = new_dfh
2664 self._inner._segmentfile.writing_handle = new_dfh
2659 new_dfh = None
2665 new_dfh = None
2660 # No need to deal with sidedata writing handle as it is only
2666 # No need to deal with sidedata writing handle as it is only
2661 # relevant with revlog-v2 which is never inline, not reaching
2667 # relevant with revlog-v2 which is never inline, not reaching
2662 # this code
2668 # this code
2663 finally:
2669 finally:
2664 if new_dfh is not None:
2670 if new_dfh is not None:
2665 new_dfh.close()
2671 new_dfh.close()
2666
2672
2667 def _nodeduplicatecallback(self, transaction, node):
2673 def _nodeduplicatecallback(self, transaction, node):
2668 """called when trying to add a node already stored."""
2674 """called when trying to add a node already stored."""
2669
2675
2670 @contextlib.contextmanager
2676 @contextlib.contextmanager
2671 def reading(self):
2677 def reading(self):
2672 with self._inner.reading():
2678 with self._inner.reading():
2673 yield
2679 yield
2674
2680
2675 @contextlib.contextmanager
2681 @contextlib.contextmanager
2676 def _writing(self, transaction):
2682 def _writing(self, transaction):
2677 if self._trypending:
2683 if self._trypending:
2678 msg = b'try to write in a `trypending` revlog: %s'
2684 msg = b'try to write in a `trypending` revlog: %s'
2679 msg %= self.display_id
2685 msg %= self.display_id
2680 raise error.ProgrammingError(msg)
2686 raise error.ProgrammingError(msg)
2681 if self._inner.is_writing:
2687 if self._inner.is_writing:
2682 yield
2688 yield
2683 else:
2689 else:
2684 data_end = None
2690 data_end = None
2685 sidedata_end = None
2691 sidedata_end = None
2686 if self._docket is not None:
2692 if self._docket is not None:
2687 data_end = self._docket.data_end
2693 data_end = self._docket.data_end
2688 sidedata_end = self._docket.sidedata_end
2694 sidedata_end = self._docket.sidedata_end
2689 with self._inner.writing(
2695 with self._inner.writing(
2690 transaction,
2696 transaction,
2691 data_end=data_end,
2697 data_end=data_end,
2692 sidedata_end=sidedata_end,
2698 sidedata_end=sidedata_end,
2693 ):
2699 ):
2694 yield
2700 yield
2695 if self._docket is not None:
2701 if self._docket is not None:
2696 self._write_docket(transaction)
2702 self._write_docket(transaction)
2697
2703
2698 def _write_docket(self, transaction):
2704 def _write_docket(self, transaction):
2699 """write the current docket on disk
2705 """write the current docket on disk
2700
2706
2701 Exist as a method to help changelog to implement transaction logic
2707 Exist as a method to help changelog to implement transaction logic
2702
2708
2703 We could also imagine using the same transaction logic for all revlog
2709 We could also imagine using the same transaction logic for all revlog
2704 since docket are cheap."""
2710 since docket are cheap."""
2705 self._docket.write(transaction)
2711 self._docket.write(transaction)
2706
2712
2707 def addrevision(
2713 def addrevision(
2708 self,
2714 self,
2709 text,
2715 text,
2710 transaction,
2716 transaction,
2711 link,
2717 link,
2712 p1,
2718 p1,
2713 p2,
2719 p2,
2714 cachedelta=None,
2720 cachedelta=None,
2715 node=None,
2721 node=None,
2716 flags=REVIDX_DEFAULT_FLAGS,
2722 flags=REVIDX_DEFAULT_FLAGS,
2717 deltacomputer=None,
2723 deltacomputer=None,
2718 sidedata=None,
2724 sidedata=None,
2719 ):
2725 ):
2720 """add a revision to the log
2726 """add a revision to the log
2721
2727
2722 text - the revision data to add
2728 text - the revision data to add
2723 transaction - the transaction object used for rollback
2729 transaction - the transaction object used for rollback
2724 link - the linkrev data to add
2730 link - the linkrev data to add
2725 p1, p2 - the parent nodeids of the revision
2731 p1, p2 - the parent nodeids of the revision
2726 cachedelta - an optional precomputed delta
2732 cachedelta - an optional precomputed delta
2727 node - nodeid of revision; typically node is not specified, and it is
2733 node - nodeid of revision; typically node is not specified, and it is
2728 computed by default as hash(text, p1, p2), however subclasses might
2734 computed by default as hash(text, p1, p2), however subclasses might
2729 use different hashing method (and override checkhash() in such case)
2735 use different hashing method (and override checkhash() in such case)
2730 flags - the known flags to set on the revision
2736 flags - the known flags to set on the revision
2731 deltacomputer - an optional deltacomputer instance shared between
2737 deltacomputer - an optional deltacomputer instance shared between
2732 multiple calls
2738 multiple calls
2733 """
2739 """
2734 if link == nullrev:
2740 if link == nullrev:
2735 raise error.RevlogError(
2741 raise error.RevlogError(
2736 _(b"attempted to add linkrev -1 to %s") % self.display_id
2742 _(b"attempted to add linkrev -1 to %s") % self.display_id
2737 )
2743 )
2738
2744
2739 if sidedata is None:
2745 if sidedata is None:
2740 sidedata = {}
2746 sidedata = {}
2741 elif sidedata and not self.feature_config.has_side_data:
2747 elif sidedata and not self.feature_config.has_side_data:
2742 raise error.ProgrammingError(
2748 raise error.ProgrammingError(
2743 _(b"trying to add sidedata to a revlog who don't support them")
2749 _(b"trying to add sidedata to a revlog who don't support them")
2744 )
2750 )
2745
2751
2746 if flags:
2752 if flags:
2747 node = node or self.hash(text, p1, p2)
2753 node = node or self.hash(text, p1, p2)
2748
2754
2749 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2755 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2750
2756
2751 # If the flag processor modifies the revision data, ignore any provided
2757 # If the flag processor modifies the revision data, ignore any provided
2752 # cachedelta.
2758 # cachedelta.
2753 if rawtext != text:
2759 if rawtext != text:
2754 cachedelta = None
2760 cachedelta = None
2755
2761
2756 if len(rawtext) > _maxentrysize:
2762 if len(rawtext) > _maxentrysize:
2757 raise error.RevlogError(
2763 raise error.RevlogError(
2758 _(
2764 _(
2759 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2765 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2760 )
2766 )
2761 % (self.display_id, len(rawtext))
2767 % (self.display_id, len(rawtext))
2762 )
2768 )
2763
2769
2764 node = node or self.hash(rawtext, p1, p2)
2770 node = node or self.hash(rawtext, p1, p2)
2765 rev = self.index.get_rev(node)
2771 rev = self.index.get_rev(node)
2766 if rev is not None:
2772 if rev is not None:
2767 return rev
2773 return rev
2768
2774
2769 if validatehash:
2775 if validatehash:
2770 self.checkhash(rawtext, node, p1=p1, p2=p2)
2776 self.checkhash(rawtext, node, p1=p1, p2=p2)
2771
2777
2772 return self.addrawrevision(
2778 return self.addrawrevision(
2773 rawtext,
2779 rawtext,
2774 transaction,
2780 transaction,
2775 link,
2781 link,
2776 p1,
2782 p1,
2777 p2,
2783 p2,
2778 node,
2784 node,
2779 flags,
2785 flags,
2780 cachedelta=cachedelta,
2786 cachedelta=cachedelta,
2781 deltacomputer=deltacomputer,
2787 deltacomputer=deltacomputer,
2782 sidedata=sidedata,
2788 sidedata=sidedata,
2783 )
2789 )
2784
2790
2785 def addrawrevision(
2791 def addrawrevision(
2786 self,
2792 self,
2787 rawtext,
2793 rawtext,
2788 transaction,
2794 transaction,
2789 link,
2795 link,
2790 p1,
2796 p1,
2791 p2,
2797 p2,
2792 node,
2798 node,
2793 flags,
2799 flags,
2794 cachedelta=None,
2800 cachedelta=None,
2795 deltacomputer=None,
2801 deltacomputer=None,
2796 sidedata=None,
2802 sidedata=None,
2797 ):
2803 ):
2798 """add a raw revision with known flags, node and parents
2804 """add a raw revision with known flags, node and parents
2799 useful when reusing a revision not stored in this revlog (ex: received
2805 useful when reusing a revision not stored in this revlog (ex: received
2800 over wire, or read from an external bundle).
2806 over wire, or read from an external bundle).
2801 """
2807 """
2802 with self._writing(transaction):
2808 with self._writing(transaction):
2803 return self._addrevision(
2809 return self._addrevision(
2804 node,
2810 node,
2805 rawtext,
2811 rawtext,
2806 transaction,
2812 transaction,
2807 link,
2813 link,
2808 p1,
2814 p1,
2809 p2,
2815 p2,
2810 flags,
2816 flags,
2811 cachedelta,
2817 cachedelta,
2812 deltacomputer=deltacomputer,
2818 deltacomputer=deltacomputer,
2813 sidedata=sidedata,
2819 sidedata=sidedata,
2814 )
2820 )
2815
2821
2816 def compress(self, data):
2822 def compress(self, data):
2817 """Generate a possibly-compressed representation of data."""
2823 """Generate a possibly-compressed representation of data."""
2818 if not data:
2824 if not data:
2819 return b'', data
2825 return b'', data
2820
2826
2821 compressed = self._compressor.compress(data)
2827 compressed = self._compressor.compress(data)
2822
2828
2823 if compressed:
2829 if compressed:
2824 # The revlog compressor added the header in the returned data.
2830 # The revlog compressor added the header in the returned data.
2825 return b'', compressed
2831 return b'', compressed
2826
2832
2827 if data[0:1] == b'\0':
2833 if data[0:1] == b'\0':
2828 return b'', data
2834 return b'', data
2829 return b'u', data
2835 return b'u', data
2830
2836
2831 def decompress(self, data):
2837 def decompress(self, data):
2832 """Decompress a revlog chunk.
2838 """Decompress a revlog chunk.
2833
2839
2834 The chunk is expected to begin with a header identifying the
2840 The chunk is expected to begin with a header identifying the
2835 format type so it can be routed to an appropriate decompressor.
2841 format type so it can be routed to an appropriate decompressor.
2836 """
2842 """
2837 if not data:
2843 if not data:
2838 return data
2844 return data
2839
2845
2840 # Revlogs are read much more frequently than they are written and many
2846 # Revlogs are read much more frequently than they are written and many
2841 # chunks only take microseconds to decompress, so performance is
2847 # chunks only take microseconds to decompress, so performance is
2842 # important here.
2848 # important here.
2843 #
2849 #
2844 # We can make a few assumptions about revlogs:
2850 # We can make a few assumptions about revlogs:
2845 #
2851 #
2846 # 1) the majority of chunks will be compressed (as opposed to inline
2852 # 1) the majority of chunks will be compressed (as opposed to inline
2847 # raw data).
2853 # raw data).
2848 # 2) decompressing *any* data will likely by at least 10x slower than
2854 # 2) decompressing *any* data will likely by at least 10x slower than
2849 # returning raw inline data.
2855 # returning raw inline data.
2850 # 3) we want to prioritize common and officially supported compression
2856 # 3) we want to prioritize common and officially supported compression
2851 # engines
2857 # engines
2852 #
2858 #
2853 # It follows that we want to optimize for "decompress compressed data
2859 # It follows that we want to optimize for "decompress compressed data
2854 # when encoded with common and officially supported compression engines"
2860 # when encoded with common and officially supported compression engines"
2855 # case over "raw data" and "data encoded by less common or non-official
2861 # case over "raw data" and "data encoded by less common or non-official
2856 # compression engines." That is why we have the inline lookup first
2862 # compression engines." That is why we have the inline lookup first
2857 # followed by the compengines lookup.
2863 # followed by the compengines lookup.
2858 #
2864 #
2859 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2860 # compressed chunks. And this matters for changelog and manifest reads.
2866 # compressed chunks. And this matters for changelog and manifest reads.
2861 t = data[0:1]
2867 t = data[0:1]
2862
2868
2863 if t == b'x':
2869 if t == b'x':
2864 try:
2870 try:
2865 return _zlibdecompress(data)
2871 return _zlibdecompress(data)
2866 except zlib.error as e:
2872 except zlib.error as e:
2867 raise error.RevlogError(
2873 raise error.RevlogError(
2868 _(b'revlog decompress error: %s')
2874 _(b'revlog decompress error: %s')
2869 % stringutil.forcebytestr(e)
2875 % stringutil.forcebytestr(e)
2870 )
2876 )
2871 # '\0' is more common than 'u' so it goes first.
2877 # '\0' is more common than 'u' so it goes first.
2872 elif t == b'\0':
2878 elif t == b'\0':
2873 return data
2879 return data
2874 elif t == b'u':
2880 elif t == b'u':
2875 return util.buffer(data, 1)
2881 return util.buffer(data, 1)
2876
2882
2877 compressor = self._get_decompressor(t)
2883 compressor = self._get_decompressor(t)
2878
2884
2879 return compressor.decompress(data)
2885 return compressor.decompress(data)
2880
2886
2881 def _addrevision(
2887 def _addrevision(
2882 self,
2888 self,
2883 node,
2889 node,
2884 rawtext,
2890 rawtext,
2885 transaction,
2891 transaction,
2886 link,
2892 link,
2887 p1,
2893 p1,
2888 p2,
2894 p2,
2889 flags,
2895 flags,
2890 cachedelta,
2896 cachedelta,
2891 alwayscache=False,
2897 alwayscache=False,
2892 deltacomputer=None,
2898 deltacomputer=None,
2893 sidedata=None,
2899 sidedata=None,
2894 ):
2900 ):
2895 """internal function to add revisions to the log
2901 """internal function to add revisions to the log
2896
2902
2897 see addrevision for argument descriptions.
2903 see addrevision for argument descriptions.
2898
2904
2899 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2905 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2900
2906
2901 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2907 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2902 be used.
2908 be used.
2903
2909
2904 invariants:
2910 invariants:
2905 - rawtext is optional (can be None); if not set, cachedelta must be set.
2911 - rawtext is optional (can be None); if not set, cachedelta must be set.
2906 if both are set, they must correspond to each other.
2912 if both are set, they must correspond to each other.
2907 """
2913 """
2908 if node == self.nullid:
2914 if node == self.nullid:
2909 raise error.RevlogError(
2915 raise error.RevlogError(
2910 _(b"%s: attempt to add null revision") % self.display_id
2916 _(b"%s: attempt to add null revision") % self.display_id
2911 )
2917 )
2912 if (
2918 if (
2913 node == self.nodeconstants.wdirid
2919 node == self.nodeconstants.wdirid
2914 or node in self.nodeconstants.wdirfilenodeids
2920 or node in self.nodeconstants.wdirfilenodeids
2915 ):
2921 ):
2916 raise error.RevlogError(
2922 raise error.RevlogError(
2917 _(b"%s: attempt to add wdir revision") % self.display_id
2923 _(b"%s: attempt to add wdir revision") % self.display_id
2918 )
2924 )
2919 if self._inner._writinghandles is None:
2925 if self._inner._writinghandles is None:
2920 msg = b'adding revision outside `revlog._writing` context'
2926 msg = b'adding revision outside `revlog._writing` context'
2921 raise error.ProgrammingError(msg)
2927 raise error.ProgrammingError(msg)
2922
2928
2923 btext = [rawtext]
2929 btext = [rawtext]
2924
2930
2925 curr = len(self)
2931 curr = len(self)
2926 prev = curr - 1
2932 prev = curr - 1
2927
2933
2928 offset = self._get_data_offset(prev)
2934 offset = self._get_data_offset(prev)
2929
2935
2930 if self._concurrencychecker:
2936 if self._concurrencychecker:
2931 ifh, dfh, sdfh = self._inner._writinghandles
2937 ifh, dfh, sdfh = self._inner._writinghandles
2932 # XXX no checking for the sidedata file
2938 # XXX no checking for the sidedata file
2933 if self._inline:
2939 if self._inline:
2934 # offset is "as if" it were in the .d file, so we need to add on
2940 # offset is "as if" it were in the .d file, so we need to add on
2935 # the size of the entry metadata.
2941 # the size of the entry metadata.
2936 self._concurrencychecker(
2942 self._concurrencychecker(
2937 ifh, self._indexfile, offset + curr * self.index.entry_size
2943 ifh, self._indexfile, offset + curr * self.index.entry_size
2938 )
2944 )
2939 else:
2945 else:
2940 # Entries in the .i are a consistent size.
2946 # Entries in the .i are a consistent size.
2941 self._concurrencychecker(
2947 self._concurrencychecker(
2942 ifh, self._indexfile, curr * self.index.entry_size
2948 ifh, self._indexfile, curr * self.index.entry_size
2943 )
2949 )
2944 self._concurrencychecker(dfh, self._datafile, offset)
2950 self._concurrencychecker(dfh, self._datafile, offset)
2945
2951
2946 p1r, p2r = self.rev(p1), self.rev(p2)
2952 p1r, p2r = self.rev(p1), self.rev(p2)
2947
2953
2948 # full versions are inserted when the needed deltas
2954 # full versions are inserted when the needed deltas
2949 # become comparable to the uncompressed text
2955 # become comparable to the uncompressed text
2950 if rawtext is None:
2956 if rawtext is None:
2951 # need rawtext size, before changed by flag processors, which is
2957 # need rawtext size, before changed by flag processors, which is
2952 # the non-raw size. use revlog explicitly to avoid filelog's extra
2958 # the non-raw size. use revlog explicitly to avoid filelog's extra
2953 # logic that might remove metadata size.
2959 # logic that might remove metadata size.
2954 textlen = mdiff.patchedsize(
2960 textlen = mdiff.patchedsize(
2955 revlog.size(self, cachedelta[0]), cachedelta[1]
2961 revlog.size(self, cachedelta[0]), cachedelta[1]
2956 )
2962 )
2957 else:
2963 else:
2958 textlen = len(rawtext)
2964 textlen = len(rawtext)
2959
2965
2960 if deltacomputer is None:
2966 if deltacomputer is None:
2961 write_debug = None
2967 write_debug = None
2962 if self.delta_config.debug_delta:
2968 if self.delta_config.debug_delta:
2963 write_debug = transaction._report
2969 write_debug = transaction._report
2964 deltacomputer = deltautil.deltacomputer(
2970 deltacomputer = deltautil.deltacomputer(
2965 self, write_debug=write_debug
2971 self, write_debug=write_debug
2966 )
2972 )
2967
2973
2968 if cachedelta is not None and len(cachedelta) == 2:
2974 if cachedelta is not None and len(cachedelta) == 2:
2969 # If the cached delta has no information about how it should be
2975 # If the cached delta has no information about how it should be
2970 # reused, add the default reuse instruction according to the
2976 # reused, add the default reuse instruction according to the
2971 # revlog's configuration.
2977 # revlog's configuration.
2972 if (
2978 if (
2973 self.delta_config.general_delta
2979 self.delta_config.general_delta
2974 and self.delta_config.lazy_delta_base
2980 and self.delta_config.lazy_delta_base
2975 ):
2981 ):
2976 delta_base_reuse = DELTA_BASE_REUSE_TRY
2982 delta_base_reuse = DELTA_BASE_REUSE_TRY
2977 else:
2983 else:
2978 delta_base_reuse = DELTA_BASE_REUSE_NO
2984 delta_base_reuse = DELTA_BASE_REUSE_NO
2979 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2985 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2980
2986
2981 revinfo = revlogutils.revisioninfo(
2987 revinfo = revlogutils.revisioninfo(
2982 node,
2988 node,
2983 p1,
2989 p1,
2984 p2,
2990 p2,
2985 btext,
2991 btext,
2986 textlen,
2992 textlen,
2987 cachedelta,
2993 cachedelta,
2988 flags,
2994 flags,
2989 )
2995 )
2990
2996
2991 deltainfo = deltacomputer.finddeltainfo(revinfo)
2997 deltainfo = deltacomputer.finddeltainfo(revinfo)
2992
2998
2993 compression_mode = COMP_MODE_INLINE
2999 compression_mode = COMP_MODE_INLINE
2994 if self._docket is not None:
3000 if self._docket is not None:
2995 default_comp = self._docket.default_compression_header
3001 default_comp = self._docket.default_compression_header
2996 r = deltautil.delta_compression(default_comp, deltainfo)
3002 r = deltautil.delta_compression(default_comp, deltainfo)
2997 compression_mode, deltainfo = r
3003 compression_mode, deltainfo = r
2998
3004
2999 sidedata_compression_mode = COMP_MODE_INLINE
3005 sidedata_compression_mode = COMP_MODE_INLINE
3000 if sidedata and self.feature_config.has_side_data:
3006 if sidedata and self.feature_config.has_side_data:
3001 sidedata_compression_mode = COMP_MODE_PLAIN
3007 sidedata_compression_mode = COMP_MODE_PLAIN
3002 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3008 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3003 sidedata_offset = self._docket.sidedata_end
3009 sidedata_offset = self._docket.sidedata_end
3004 h, comp_sidedata = self.compress(serialized_sidedata)
3010 h, comp_sidedata = self.compress(serialized_sidedata)
3005 if (
3011 if (
3006 h != b'u'
3012 h != b'u'
3007 and comp_sidedata[0:1] != b'\0'
3013 and comp_sidedata[0:1] != b'\0'
3008 and len(comp_sidedata) < len(serialized_sidedata)
3014 and len(comp_sidedata) < len(serialized_sidedata)
3009 ):
3015 ):
3010 assert not h
3016 assert not h
3011 if (
3017 if (
3012 comp_sidedata[0:1]
3018 comp_sidedata[0:1]
3013 == self._docket.default_compression_header
3019 == self._docket.default_compression_header
3014 ):
3020 ):
3015 sidedata_compression_mode = COMP_MODE_DEFAULT
3021 sidedata_compression_mode = COMP_MODE_DEFAULT
3016 serialized_sidedata = comp_sidedata
3022 serialized_sidedata = comp_sidedata
3017 else:
3023 else:
3018 sidedata_compression_mode = COMP_MODE_INLINE
3024 sidedata_compression_mode = COMP_MODE_INLINE
3019 serialized_sidedata = comp_sidedata
3025 serialized_sidedata = comp_sidedata
3020 else:
3026 else:
3021 serialized_sidedata = b""
3027 serialized_sidedata = b""
3022 # Don't store the offset if the sidedata is empty, that way
3028 # Don't store the offset if the sidedata is empty, that way
3023 # we can easily detect empty sidedata and they will be no different
3029 # we can easily detect empty sidedata and they will be no different
3024 # than ones we manually add.
3030 # than ones we manually add.
3025 sidedata_offset = 0
3031 sidedata_offset = 0
3026
3032
3027 rank = RANK_UNKNOWN
3033 rank = RANK_UNKNOWN
3028 if self.feature_config.compute_rank:
3034 if self.feature_config.compute_rank:
3029 if (p1r, p2r) == (nullrev, nullrev):
3035 if (p1r, p2r) == (nullrev, nullrev):
3030 rank = 1
3036 rank = 1
3031 elif p1r != nullrev and p2r == nullrev:
3037 elif p1r != nullrev and p2r == nullrev:
3032 rank = 1 + self.fast_rank(p1r)
3038 rank = 1 + self.fast_rank(p1r)
3033 elif p1r == nullrev and p2r != nullrev:
3039 elif p1r == nullrev and p2r != nullrev:
3034 rank = 1 + self.fast_rank(p2r)
3040 rank = 1 + self.fast_rank(p2r)
3035 else: # merge node
3041 else: # merge node
3036 if rustdagop is not None and self.index.rust_ext_compat:
3042 if rustdagop is not None and self.index.rust_ext_compat:
3037 rank = rustdagop.rank(self.index, p1r, p2r)
3043 rank = rustdagop.rank(self.index, p1r, p2r)
3038 else:
3044 else:
3039 pmin, pmax = sorted((p1r, p2r))
3045 pmin, pmax = sorted((p1r, p2r))
3040 rank = 1 + self.fast_rank(pmax)
3046 rank = 1 + self.fast_rank(pmax)
3041 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3047 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3042
3048
3043 e = revlogutils.entry(
3049 e = revlogutils.entry(
3044 flags=flags,
3050 flags=flags,
3045 data_offset=offset,
3051 data_offset=offset,
3046 data_compressed_length=deltainfo.deltalen,
3052 data_compressed_length=deltainfo.deltalen,
3047 data_uncompressed_length=textlen,
3053 data_uncompressed_length=textlen,
3048 data_compression_mode=compression_mode,
3054 data_compression_mode=compression_mode,
3049 data_delta_base=deltainfo.base,
3055 data_delta_base=deltainfo.base,
3050 link_rev=link,
3056 link_rev=link,
3051 parent_rev_1=p1r,
3057 parent_rev_1=p1r,
3052 parent_rev_2=p2r,
3058 parent_rev_2=p2r,
3053 node_id=node,
3059 node_id=node,
3054 sidedata_offset=sidedata_offset,
3060 sidedata_offset=sidedata_offset,
3055 sidedata_compressed_length=len(serialized_sidedata),
3061 sidedata_compressed_length=len(serialized_sidedata),
3056 sidedata_compression_mode=sidedata_compression_mode,
3062 sidedata_compression_mode=sidedata_compression_mode,
3057 rank=rank,
3063 rank=rank,
3058 )
3064 )
3059
3065
3060 self.index.append(e)
3066 self.index.append(e)
3061 entry = self.index.entry_binary(curr)
3067 entry = self.index.entry_binary(curr)
3062 if curr == 0 and self._docket is None:
3068 if curr == 0 and self._docket is None:
3063 header = self._format_flags | self._format_version
3069 header = self._format_flags | self._format_version
3064 header = self.index.pack_header(header)
3070 header = self.index.pack_header(header)
3065 entry = header + entry
3071 entry = header + entry
3066 self._writeentry(
3072 self._writeentry(
3067 transaction,
3073 transaction,
3068 entry,
3074 entry,
3069 deltainfo.data,
3075 deltainfo.data,
3070 link,
3076 link,
3071 offset,
3077 offset,
3072 serialized_sidedata,
3078 serialized_sidedata,
3073 sidedata_offset,
3079 sidedata_offset,
3074 )
3080 )
3075
3081
3076 rawtext = btext[0]
3082 rawtext = btext[0]
3077
3083
3078 if alwayscache and rawtext is None:
3084 if alwayscache and rawtext is None:
3079 rawtext = deltacomputer.buildtext(revinfo)
3085 rawtext = deltacomputer.buildtext(revinfo)
3080
3086
3081 if type(rawtext) == bytes: # only accept immutable objects
3087 if type(rawtext) == bytes: # only accept immutable objects
3082 self._revisioncache = (node, curr, rawtext)
3088 self._revisioncache = (node, curr, rawtext)
3083 self._chainbasecache[curr] = deltainfo.chainbase
3089 self._chainbasecache[curr] = deltainfo.chainbase
3084 return curr
3090 return curr
3085
3091
3086 def _get_data_offset(self, prev):
3092 def _get_data_offset(self, prev):
3087 """Returns the current offset in the (in-transaction) data file.
3093 """Returns the current offset in the (in-transaction) data file.
3088 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3094 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3089 file to store that information: since sidedata can be rewritten to the
3095 file to store that information: since sidedata can be rewritten to the
3090 end of the data file within a transaction, you can have cases where, for
3096 end of the data file within a transaction, you can have cases where, for
3091 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3097 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3092 to `n - 1`'s sidedata being written after `n`'s data.
3098 to `n - 1`'s sidedata being written after `n`'s data.
3093
3099
3094 TODO cache this in a docket file before getting out of experimental."""
3100 TODO cache this in a docket file before getting out of experimental."""
3095 if self._docket is None:
3101 if self._docket is None:
3096 return self.end(prev)
3102 return self.end(prev)
3097 else:
3103 else:
3098 return self._docket.data_end
3104 return self._docket.data_end
3099
3105
3100 def _writeentry(
3106 def _writeentry(
3101 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3107 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3102 ):
3108 ):
3103 # Files opened in a+ mode have inconsistent behavior on various
3109 # Files opened in a+ mode have inconsistent behavior on various
3104 # platforms. Windows requires that a file positioning call be made
3110 # platforms. Windows requires that a file positioning call be made
3105 # when the file handle transitions between reads and writes. See
3111 # when the file handle transitions between reads and writes. See
3106 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3112 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3107 # platforms, Python or the platform itself can be buggy. Some versions
3113 # platforms, Python or the platform itself can be buggy. Some versions
3108 # of Solaris have been observed to not append at the end of the file
3114 # of Solaris have been observed to not append at the end of the file
3109 # if the file was seeked to before the end. See issue4943 for more.
3115 # if the file was seeked to before the end. See issue4943 for more.
3110 #
3116 #
3111 # We work around this issue by inserting a seek() before writing.
3117 # We work around this issue by inserting a seek() before writing.
3112 # Note: This is likely not necessary on Python 3. However, because
3118 # Note: This is likely not necessary on Python 3. However, because
3113 # the file handle is reused for reads and may be seeked there, we need
3119 # the file handle is reused for reads and may be seeked there, we need
3114 # to be careful before changing this.
3120 # to be careful before changing this.
3115 if self._inner._writinghandles is None:
3121 if self._inner._writinghandles is None:
3116 msg = b'adding revision outside `revlog._writing` context'
3122 msg = b'adding revision outside `revlog._writing` context'
3117 raise error.ProgrammingError(msg)
3123 raise error.ProgrammingError(msg)
3118 ifh, dfh, sdfh = self._inner._writinghandles
3124 ifh, dfh, sdfh = self._inner._writinghandles
3119 if self._docket is None:
3125 if self._docket is None:
3120 ifh.seek(0, os.SEEK_END)
3126 ifh.seek(0, os.SEEK_END)
3121 else:
3127 else:
3122 ifh.seek(self._docket.index_end, os.SEEK_SET)
3128 ifh.seek(self._docket.index_end, os.SEEK_SET)
3123 if dfh:
3129 if dfh:
3124 if self._docket is None:
3130 if self._docket is None:
3125 dfh.seek(0, os.SEEK_END)
3131 dfh.seek(0, os.SEEK_END)
3126 else:
3132 else:
3127 dfh.seek(self._docket.data_end, os.SEEK_SET)
3133 dfh.seek(self._docket.data_end, os.SEEK_SET)
3128 if sdfh:
3134 if sdfh:
3129 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3135 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3130
3136
3131 curr = len(self) - 1
3137 curr = len(self) - 1
3132 if not self._inline:
3138 if not self._inline:
3133 transaction.add(self._datafile, offset)
3139 transaction.add(self._datafile, offset)
3134 if self._sidedatafile:
3140 if self._sidedatafile:
3135 transaction.add(self._sidedatafile, sidedata_offset)
3141 transaction.add(self._sidedatafile, sidedata_offset)
3136 transaction.add(self._indexfile, curr * len(entry))
3142 transaction.add(self._indexfile, curr * len(entry))
3137 if data[0]:
3143 if data[0]:
3138 dfh.write(data[0])
3144 dfh.write(data[0])
3139 dfh.write(data[1])
3145 dfh.write(data[1])
3140 if sidedata:
3146 if sidedata:
3141 sdfh.write(sidedata)
3147 sdfh.write(sidedata)
3142 ifh.write(entry)
3148 ifh.write(entry)
3143 else:
3149 else:
3144 offset += curr * self.index.entry_size
3150 offset += curr * self.index.entry_size
3145 transaction.add(self._indexfile, offset)
3151 transaction.add(self._indexfile, offset)
3146 ifh.write(entry)
3152 ifh.write(entry)
3147 ifh.write(data[0])
3153 ifh.write(data[0])
3148 ifh.write(data[1])
3154 ifh.write(data[1])
3149 assert not sidedata
3155 assert not sidedata
3150 self._enforceinlinesize(transaction)
3156 self._enforceinlinesize(transaction)
3151 if self._docket is not None:
3157 if self._docket is not None:
3152 # revlog-v2 always has 3 writing handles, help Pytype
3158 # revlog-v2 always has 3 writing handles, help Pytype
3153 wh1 = self._inner._writinghandles[0]
3159 wh1 = self._inner._writinghandles[0]
3154 wh2 = self._inner._writinghandles[1]
3160 wh2 = self._inner._writinghandles[1]
3155 wh3 = self._inner._writinghandles[2]
3161 wh3 = self._inner._writinghandles[2]
3156 assert wh1 is not None
3162 assert wh1 is not None
3157 assert wh2 is not None
3163 assert wh2 is not None
3158 assert wh3 is not None
3164 assert wh3 is not None
3159 self._docket.index_end = wh1.tell()
3165 self._docket.index_end = wh1.tell()
3160 self._docket.data_end = wh2.tell()
3166 self._docket.data_end = wh2.tell()
3161 self._docket.sidedata_end = wh3.tell()
3167 self._docket.sidedata_end = wh3.tell()
3162
3168
3163 nodemaputil.setup_persistent_nodemap(transaction, self)
3169 nodemaputil.setup_persistent_nodemap(transaction, self)
3164
3170
3165 def addgroup(
3171 def addgroup(
3166 self,
3172 self,
3167 deltas,
3173 deltas,
3168 linkmapper,
3174 linkmapper,
3169 transaction,
3175 transaction,
3170 alwayscache=False,
3176 alwayscache=False,
3171 addrevisioncb=None,
3177 addrevisioncb=None,
3172 duplicaterevisioncb=None,
3178 duplicaterevisioncb=None,
3173 debug_info=None,
3179 debug_info=None,
3174 delta_base_reuse_policy=None,
3180 delta_base_reuse_policy=None,
3175 ):
3181 ):
3176 """
3182 """
3177 add a delta group
3183 add a delta group
3178
3184
3179 given a set of deltas, add them to the revision log. the
3185 given a set of deltas, add them to the revision log. the
3180 first delta is against its parent, which should be in our
3186 first delta is against its parent, which should be in our
3181 log, the rest are against the previous delta.
3187 log, the rest are against the previous delta.
3182
3188
3183 If ``addrevisioncb`` is defined, it will be called with arguments of
3189 If ``addrevisioncb`` is defined, it will be called with arguments of
3184 this revlog and the node that was added.
3190 this revlog and the node that was added.
3185 """
3191 """
3186
3192
3187 if self._adding_group:
3193 if self._adding_group:
3188 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3194 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3189
3195
3190 # read the default delta-base reuse policy from revlog config if the
3196 # read the default delta-base reuse policy from revlog config if the
3191 # group did not specify one.
3197 # group did not specify one.
3192 if delta_base_reuse_policy is None:
3198 if delta_base_reuse_policy is None:
3193 if (
3199 if (
3194 self.delta_config.general_delta
3200 self.delta_config.general_delta
3195 and self.delta_config.lazy_delta_base
3201 and self.delta_config.lazy_delta_base
3196 ):
3202 ):
3197 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3203 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3198 else:
3204 else:
3199 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3205 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3200
3206
3201 self._adding_group = True
3207 self._adding_group = True
3202 empty = True
3208 empty = True
3203 try:
3209 try:
3204 with self._writing(transaction):
3210 with self._writing(transaction):
3205 write_debug = None
3211 write_debug = None
3206 if self.delta_config.debug_delta:
3212 if self.delta_config.debug_delta:
3207 write_debug = transaction._report
3213 write_debug = transaction._report
3208 deltacomputer = deltautil.deltacomputer(
3214 deltacomputer = deltautil.deltacomputer(
3209 self,
3215 self,
3210 write_debug=write_debug,
3216 write_debug=write_debug,
3211 debug_info=debug_info,
3217 debug_info=debug_info,
3212 )
3218 )
3213 # loop through our set of deltas
3219 # loop through our set of deltas
3214 for data in deltas:
3220 for data in deltas:
3215 (
3221 (
3216 node,
3222 node,
3217 p1,
3223 p1,
3218 p2,
3224 p2,
3219 linknode,
3225 linknode,
3220 deltabase,
3226 deltabase,
3221 delta,
3227 delta,
3222 flags,
3228 flags,
3223 sidedata,
3229 sidedata,
3224 ) = data
3230 ) = data
3225 link = linkmapper(linknode)
3231 link = linkmapper(linknode)
3226 flags = flags or REVIDX_DEFAULT_FLAGS
3232 flags = flags or REVIDX_DEFAULT_FLAGS
3227
3233
3228 rev = self.index.get_rev(node)
3234 rev = self.index.get_rev(node)
3229 if rev is not None:
3235 if rev is not None:
3230 # this can happen if two branches make the same change
3236 # this can happen if two branches make the same change
3231 self._nodeduplicatecallback(transaction, rev)
3237 self._nodeduplicatecallback(transaction, rev)
3232 if duplicaterevisioncb:
3238 if duplicaterevisioncb:
3233 duplicaterevisioncb(self, rev)
3239 duplicaterevisioncb(self, rev)
3234 empty = False
3240 empty = False
3235 continue
3241 continue
3236
3242
3237 for p in (p1, p2):
3243 for p in (p1, p2):
3238 if not self.index.has_node(p):
3244 if not self.index.has_node(p):
3239 raise error.LookupError(
3245 raise error.LookupError(
3240 p, self.radix, _(b'unknown parent')
3246 p, self.radix, _(b'unknown parent')
3241 )
3247 )
3242
3248
3243 if not self.index.has_node(deltabase):
3249 if not self.index.has_node(deltabase):
3244 raise error.LookupError(
3250 raise error.LookupError(
3245 deltabase, self.display_id, _(b'unknown delta base')
3251 deltabase, self.display_id, _(b'unknown delta base')
3246 )
3252 )
3247
3253
3248 baserev = self.rev(deltabase)
3254 baserev = self.rev(deltabase)
3249
3255
3250 if baserev != nullrev and self.iscensored(baserev):
3256 if baserev != nullrev and self.iscensored(baserev):
3251 # if base is censored, delta must be full replacement in a
3257 # if base is censored, delta must be full replacement in a
3252 # single patch operation
3258 # single patch operation
3253 hlen = struct.calcsize(b">lll")
3259 hlen = struct.calcsize(b">lll")
3254 oldlen = self.rawsize(baserev)
3260 oldlen = self.rawsize(baserev)
3255 newlen = len(delta) - hlen
3261 newlen = len(delta) - hlen
3256 if delta[:hlen] != mdiff.replacediffheader(
3262 if delta[:hlen] != mdiff.replacediffheader(
3257 oldlen, newlen
3263 oldlen, newlen
3258 ):
3264 ):
3259 raise error.CensoredBaseError(
3265 raise error.CensoredBaseError(
3260 self.display_id, self.node(baserev)
3266 self.display_id, self.node(baserev)
3261 )
3267 )
3262
3268
3263 if not flags and self._peek_iscensored(baserev, delta):
3269 if not flags and self._peek_iscensored(baserev, delta):
3264 flags |= REVIDX_ISCENSORED
3270 flags |= REVIDX_ISCENSORED
3265
3271
3266 # We assume consumers of addrevisioncb will want to retrieve
3272 # We assume consumers of addrevisioncb will want to retrieve
3267 # the added revision, which will require a call to
3273 # the added revision, which will require a call to
3268 # revision(). revision() will fast path if there is a cache
3274 # revision(). revision() will fast path if there is a cache
3269 # hit. So, we tell _addrevision() to always cache in this case.
3275 # hit. So, we tell _addrevision() to always cache in this case.
3270 # We're only using addgroup() in the context of changegroup
3276 # We're only using addgroup() in the context of changegroup
3271 # generation so the revision data can always be handled as raw
3277 # generation so the revision data can always be handled as raw
3272 # by the flagprocessor.
3278 # by the flagprocessor.
3273 rev = self._addrevision(
3279 rev = self._addrevision(
3274 node,
3280 node,
3275 None,
3281 None,
3276 transaction,
3282 transaction,
3277 link,
3283 link,
3278 p1,
3284 p1,
3279 p2,
3285 p2,
3280 flags,
3286 flags,
3281 (baserev, delta, delta_base_reuse_policy),
3287 (baserev, delta, delta_base_reuse_policy),
3282 alwayscache=alwayscache,
3288 alwayscache=alwayscache,
3283 deltacomputer=deltacomputer,
3289 deltacomputer=deltacomputer,
3284 sidedata=sidedata,
3290 sidedata=sidedata,
3285 )
3291 )
3286
3292
3287 if addrevisioncb:
3293 if addrevisioncb:
3288 addrevisioncb(self, rev)
3294 addrevisioncb(self, rev)
3289 empty = False
3295 empty = False
3290 finally:
3296 finally:
3291 self._adding_group = False
3297 self._adding_group = False
3292 return not empty
3298 return not empty
3293
3299
3294 def iscensored(self, rev):
3300 def iscensored(self, rev):
3295 """Check if a file revision is censored."""
3301 """Check if a file revision is censored."""
3296 if not self.feature_config.censorable:
3302 if not self.feature_config.censorable:
3297 return False
3303 return False
3298
3304
3299 return self.flags(rev) & REVIDX_ISCENSORED
3305 return self.flags(rev) & REVIDX_ISCENSORED
3300
3306
3301 def _peek_iscensored(self, baserev, delta):
3307 def _peek_iscensored(self, baserev, delta):
3302 """Quickly check if a delta produces a censored revision."""
3308 """Quickly check if a delta produces a censored revision."""
3303 if not self.feature_config.censorable:
3309 if not self.feature_config.censorable:
3304 return False
3310 return False
3305
3311
3306 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3312 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3307
3313
3308 def getstrippoint(self, minlink):
3314 def getstrippoint(self, minlink):
3309 """find the minimum rev that must be stripped to strip the linkrev
3315 """find the minimum rev that must be stripped to strip the linkrev
3310
3316
3311 Returns a tuple containing the minimum rev and a set of all revs that
3317 Returns a tuple containing the minimum rev and a set of all revs that
3312 have linkrevs that will be broken by this strip.
3318 have linkrevs that will be broken by this strip.
3313 """
3319 """
3314 return storageutil.resolvestripinfo(
3320 return storageutil.resolvestripinfo(
3315 minlink,
3321 minlink,
3316 len(self) - 1,
3322 len(self) - 1,
3317 self.headrevs(),
3323 self.headrevs(),
3318 self.linkrev,
3324 self.linkrev,
3319 self.parentrevs,
3325 self.parentrevs,
3320 )
3326 )
3321
3327
3322 def strip(self, minlink, transaction):
3328 def strip(self, minlink, transaction):
3323 """truncate the revlog on the first revision with a linkrev >= minlink
3329 """truncate the revlog on the first revision with a linkrev >= minlink
3324
3330
3325 This function is called when we're stripping revision minlink and
3331 This function is called when we're stripping revision minlink and
3326 its descendants from the repository.
3332 its descendants from the repository.
3327
3333
3328 We have to remove all revisions with linkrev >= minlink, because
3334 We have to remove all revisions with linkrev >= minlink, because
3329 the equivalent changelog revisions will be renumbered after the
3335 the equivalent changelog revisions will be renumbered after the
3330 strip.
3336 strip.
3331
3337
3332 So we truncate the revlog on the first of these revisions, and
3338 So we truncate the revlog on the first of these revisions, and
3333 trust that the caller has saved the revisions that shouldn't be
3339 trust that the caller has saved the revisions that shouldn't be
3334 removed and that it'll re-add them after this truncation.
3340 removed and that it'll re-add them after this truncation.
3335 """
3341 """
3336 if len(self) == 0:
3342 if len(self) == 0:
3337 return
3343 return
3338
3344
3339 rev, _ = self.getstrippoint(minlink)
3345 rev, _ = self.getstrippoint(minlink)
3340 if rev == len(self):
3346 if rev == len(self):
3341 return
3347 return
3342
3348
3343 # first truncate the files on disk
3349 # first truncate the files on disk
3344 data_end = self.start(rev)
3350 data_end = self.start(rev)
3345 if not self._inline:
3351 if not self._inline:
3346 transaction.add(self._datafile, data_end)
3352 transaction.add(self._datafile, data_end)
3347 end = rev * self.index.entry_size
3353 end = rev * self.index.entry_size
3348 else:
3354 else:
3349 end = data_end + (rev * self.index.entry_size)
3355 end = data_end + (rev * self.index.entry_size)
3350
3356
3351 if self._sidedatafile:
3357 if self._sidedatafile:
3352 sidedata_end = self.sidedata_cut_off(rev)
3358 sidedata_end = self.sidedata_cut_off(rev)
3353 transaction.add(self._sidedatafile, sidedata_end)
3359 transaction.add(self._sidedatafile, sidedata_end)
3354
3360
3355 transaction.add(self._indexfile, end)
3361 transaction.add(self._indexfile, end)
3356 if self._docket is not None:
3362 if self._docket is not None:
3357 # XXX we could, leverage the docket while stripping. However it is
3363 # XXX we could, leverage the docket while stripping. However it is
3358 # not powerfull enough at the time of this comment
3364 # not powerfull enough at the time of this comment
3359 self._docket.index_end = end
3365 self._docket.index_end = end
3360 self._docket.data_end = data_end
3366 self._docket.data_end = data_end
3361 self._docket.sidedata_end = sidedata_end
3367 self._docket.sidedata_end = sidedata_end
3362 self._docket.write(transaction, stripping=True)
3368 self._docket.write(transaction, stripping=True)
3363
3369
3364 # then reset internal state in memory to forget those revisions
3370 # then reset internal state in memory to forget those revisions
3365 self._revisioncache = None
3371 self._revisioncache = None
3366 self._chaininfocache = util.lrucachedict(500)
3372 self._chaininfocache = util.lrucachedict(500)
3367 self._inner._segmentfile.clear_cache()
3373 self._inner._segmentfile.clear_cache()
3368 self._inner._segmentfile_sidedata.clear_cache()
3374 self._inner._segmentfile_sidedata.clear_cache()
3369
3375
3370 del self.index[rev:-1]
3376 del self.index[rev:-1]
3371
3377
3372 def checksize(self):
3378 def checksize(self):
3373 """Check size of index and data files
3379 """Check size of index and data files
3374
3380
3375 return a (dd, di) tuple.
3381 return a (dd, di) tuple.
3376 - dd: extra bytes for the "data" file
3382 - dd: extra bytes for the "data" file
3377 - di: extra bytes for the "index" file
3383 - di: extra bytes for the "index" file
3378
3384
3379 A healthy revlog will return (0, 0).
3385 A healthy revlog will return (0, 0).
3380 """
3386 """
3381 expected = 0
3387 expected = 0
3382 if len(self):
3388 if len(self):
3383 expected = max(0, self.end(len(self) - 1))
3389 expected = max(0, self.end(len(self) - 1))
3384
3390
3385 try:
3391 try:
3386 with self._datafp() as f:
3392 with self._datafp() as f:
3387 f.seek(0, io.SEEK_END)
3393 f.seek(0, io.SEEK_END)
3388 actual = f.tell()
3394 actual = f.tell()
3389 dd = actual - expected
3395 dd = actual - expected
3390 except FileNotFoundError:
3396 except FileNotFoundError:
3391 dd = 0
3397 dd = 0
3392
3398
3393 try:
3399 try:
3394 f = self.opener(self._indexfile)
3400 f = self.opener(self._indexfile)
3395 f.seek(0, io.SEEK_END)
3401 f.seek(0, io.SEEK_END)
3396 actual = f.tell()
3402 actual = f.tell()
3397 f.close()
3403 f.close()
3398 s = self.index.entry_size
3404 s = self.index.entry_size
3399 i = max(0, actual // s)
3405 i = max(0, actual // s)
3400 di = actual - (i * s)
3406 di = actual - (i * s)
3401 if self._inline:
3407 if self._inline:
3402 databytes = 0
3408 databytes = 0
3403 for r in self:
3409 for r in self:
3404 databytes += max(0, self.length(r))
3410 databytes += max(0, self.length(r))
3405 dd = 0
3411 dd = 0
3406 di = actual - len(self) * s - databytes
3412 di = actual - len(self) * s - databytes
3407 except FileNotFoundError:
3413 except FileNotFoundError:
3408 di = 0
3414 di = 0
3409
3415
3410 return (dd, di)
3416 return (dd, di)
3411
3417
3412 def files(self):
3418 def files(self):
3413 """return list of files that compose this revlog"""
3419 """return list of files that compose this revlog"""
3414 res = [self._indexfile]
3420 res = [self._indexfile]
3415 if self._docket_file is None:
3421 if self._docket_file is None:
3416 if not self._inline:
3422 if not self._inline:
3417 res.append(self._datafile)
3423 res.append(self._datafile)
3418 else:
3424 else:
3419 res.append(self._docket_file)
3425 res.append(self._docket_file)
3420 res.extend(self._docket.old_index_filepaths(include_empty=False))
3426 res.extend(self._docket.old_index_filepaths(include_empty=False))
3421 if self._docket.data_end:
3427 if self._docket.data_end:
3422 res.append(self._datafile)
3428 res.append(self._datafile)
3423 res.extend(self._docket.old_data_filepaths(include_empty=False))
3429 res.extend(self._docket.old_data_filepaths(include_empty=False))
3424 if self._docket.sidedata_end:
3430 if self._docket.sidedata_end:
3425 res.append(self._sidedatafile)
3431 res.append(self._sidedatafile)
3426 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3432 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3427 return res
3433 return res
3428
3434
3429 def emitrevisions(
3435 def emitrevisions(
3430 self,
3436 self,
3431 nodes,
3437 nodes,
3432 nodesorder=None,
3438 nodesorder=None,
3433 revisiondata=False,
3439 revisiondata=False,
3434 assumehaveparentrevisions=False,
3440 assumehaveparentrevisions=False,
3435 deltamode=repository.CG_DELTAMODE_STD,
3441 deltamode=repository.CG_DELTAMODE_STD,
3436 sidedata_helpers=None,
3442 sidedata_helpers=None,
3437 debug_info=None,
3443 debug_info=None,
3438 ):
3444 ):
3439 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3445 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3440 raise error.ProgrammingError(
3446 raise error.ProgrammingError(
3441 b'unhandled value for nodesorder: %s' % nodesorder
3447 b'unhandled value for nodesorder: %s' % nodesorder
3442 )
3448 )
3443
3449
3444 if nodesorder is None and not self.delta_config.general_delta:
3450 if nodesorder is None and not self.delta_config.general_delta:
3445 nodesorder = b'storage'
3451 nodesorder = b'storage'
3446
3452
3447 if (
3453 if (
3448 not self._storedeltachains
3454 not self._storedeltachains
3449 and deltamode != repository.CG_DELTAMODE_PREV
3455 and deltamode != repository.CG_DELTAMODE_PREV
3450 ):
3456 ):
3451 deltamode = repository.CG_DELTAMODE_FULL
3457 deltamode = repository.CG_DELTAMODE_FULL
3452
3458
3453 return storageutil.emitrevisions(
3459 return storageutil.emitrevisions(
3454 self,
3460 self,
3455 nodes,
3461 nodes,
3456 nodesorder,
3462 nodesorder,
3457 revlogrevisiondelta,
3463 revlogrevisiondelta,
3458 deltaparentfn=self.deltaparent,
3464 deltaparentfn=self.deltaparent,
3459 candeltafn=self._candelta,
3465 candeltafn=self._candelta,
3460 rawsizefn=self.rawsize,
3466 rawsizefn=self.rawsize,
3461 revdifffn=self.revdiff,
3467 revdifffn=self.revdiff,
3462 flagsfn=self.flags,
3468 flagsfn=self.flags,
3463 deltamode=deltamode,
3469 deltamode=deltamode,
3464 revisiondata=revisiondata,
3470 revisiondata=revisiondata,
3465 assumehaveparentrevisions=assumehaveparentrevisions,
3471 assumehaveparentrevisions=assumehaveparentrevisions,
3466 sidedata_helpers=sidedata_helpers,
3472 sidedata_helpers=sidedata_helpers,
3467 debug_info=debug_info,
3473 debug_info=debug_info,
3468 )
3474 )
3469
3475
3470 DELTAREUSEALWAYS = b'always'
3476 DELTAREUSEALWAYS = b'always'
3471 DELTAREUSESAMEREVS = b'samerevs'
3477 DELTAREUSESAMEREVS = b'samerevs'
3472 DELTAREUSENEVER = b'never'
3478 DELTAREUSENEVER = b'never'
3473
3479
3474 DELTAREUSEFULLADD = b'fulladd'
3480 DELTAREUSEFULLADD = b'fulladd'
3475
3481
3476 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3482 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3477
3483
3478 def clone(
3484 def clone(
3479 self,
3485 self,
3480 tr,
3486 tr,
3481 destrevlog,
3487 destrevlog,
3482 addrevisioncb=None,
3488 addrevisioncb=None,
3483 deltareuse=DELTAREUSESAMEREVS,
3489 deltareuse=DELTAREUSESAMEREVS,
3484 forcedeltabothparents=None,
3490 forcedeltabothparents=None,
3485 sidedata_helpers=None,
3491 sidedata_helpers=None,
3486 ):
3492 ):
3487 """Copy this revlog to another, possibly with format changes.
3493 """Copy this revlog to another, possibly with format changes.
3488
3494
3489 The destination revlog will contain the same revisions and nodes.
3495 The destination revlog will contain the same revisions and nodes.
3490 However, it may not be bit-for-bit identical due to e.g. delta encoding
3496 However, it may not be bit-for-bit identical due to e.g. delta encoding
3491 differences.
3497 differences.
3492
3498
3493 The ``deltareuse`` argument control how deltas from the existing revlog
3499 The ``deltareuse`` argument control how deltas from the existing revlog
3494 are preserved in the destination revlog. The argument can have the
3500 are preserved in the destination revlog. The argument can have the
3495 following values:
3501 following values:
3496
3502
3497 DELTAREUSEALWAYS
3503 DELTAREUSEALWAYS
3498 Deltas will always be reused (if possible), even if the destination
3504 Deltas will always be reused (if possible), even if the destination
3499 revlog would not select the same revisions for the delta. This is the
3505 revlog would not select the same revisions for the delta. This is the
3500 fastest mode of operation.
3506 fastest mode of operation.
3501 DELTAREUSESAMEREVS
3507 DELTAREUSESAMEREVS
3502 Deltas will be reused if the destination revlog would pick the same
3508 Deltas will be reused if the destination revlog would pick the same
3503 revisions for the delta. This mode strikes a balance between speed
3509 revisions for the delta. This mode strikes a balance between speed
3504 and optimization.
3510 and optimization.
3505 DELTAREUSENEVER
3511 DELTAREUSENEVER
3506 Deltas will never be reused. This is the slowest mode of execution.
3512 Deltas will never be reused. This is the slowest mode of execution.
3507 This mode can be used to recompute deltas (e.g. if the diff/delta
3513 This mode can be used to recompute deltas (e.g. if the diff/delta
3508 algorithm changes).
3514 algorithm changes).
3509 DELTAREUSEFULLADD
3515 DELTAREUSEFULLADD
3510 Revision will be re-added as if their were new content. This is
3516 Revision will be re-added as if their were new content. This is
3511 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3517 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3512 eg: large file detection and handling.
3518 eg: large file detection and handling.
3513
3519
3514 Delta computation can be slow, so the choice of delta reuse policy can
3520 Delta computation can be slow, so the choice of delta reuse policy can
3515 significantly affect run time.
3521 significantly affect run time.
3516
3522
3517 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3523 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3518 two extremes. Deltas will be reused if they are appropriate. But if the
3524 two extremes. Deltas will be reused if they are appropriate. But if the
3519 delta could choose a better revision, it will do so. This means if you
3525 delta could choose a better revision, it will do so. This means if you
3520 are converting a non-generaldelta revlog to a generaldelta revlog,
3526 are converting a non-generaldelta revlog to a generaldelta revlog,
3521 deltas will be recomputed if the delta's parent isn't a parent of the
3527 deltas will be recomputed if the delta's parent isn't a parent of the
3522 revision.
3528 revision.
3523
3529
3524 In addition to the delta policy, the ``forcedeltabothparents``
3530 In addition to the delta policy, the ``forcedeltabothparents``
3525 argument controls whether to force compute deltas against both parents
3531 argument controls whether to force compute deltas against both parents
3526 for merges. By default, the current default is used.
3532 for merges. By default, the current default is used.
3527
3533
3528 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3534 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3529 `sidedata_helpers`.
3535 `sidedata_helpers`.
3530 """
3536 """
3531 if deltareuse not in self.DELTAREUSEALL:
3537 if deltareuse not in self.DELTAREUSEALL:
3532 raise ValueError(
3538 raise ValueError(
3533 _(b'value for deltareuse invalid: %s') % deltareuse
3539 _(b'value for deltareuse invalid: %s') % deltareuse
3534 )
3540 )
3535
3541
3536 if len(destrevlog):
3542 if len(destrevlog):
3537 raise ValueError(_(b'destination revlog is not empty'))
3543 raise ValueError(_(b'destination revlog is not empty'))
3538
3544
3539 if getattr(self, 'filteredrevs', None):
3545 if getattr(self, 'filteredrevs', None):
3540 raise ValueError(_(b'source revlog has filtered revisions'))
3546 raise ValueError(_(b'source revlog has filtered revisions'))
3541 if getattr(destrevlog, 'filteredrevs', None):
3547 if getattr(destrevlog, 'filteredrevs', None):
3542 raise ValueError(_(b'destination revlog has filtered revisions'))
3548 raise ValueError(_(b'destination revlog has filtered revisions'))
3543
3549
3544 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3550 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3545 # if possible.
3551 # if possible.
3546 old_delta_config = destrevlog.delta_config
3552 old_delta_config = destrevlog.delta_config
3547 destrevlog.delta_config = destrevlog.delta_config.copy()
3553 destrevlog.delta_config = destrevlog.delta_config.copy()
3548
3554
3549 try:
3555 try:
3550 if deltareuse == self.DELTAREUSEALWAYS:
3556 if deltareuse == self.DELTAREUSEALWAYS:
3551 destrevlog.delta_config.lazy_delta_base = True
3557 destrevlog.delta_config.lazy_delta_base = True
3552 destrevlog.delta_config.lazy_delta = True
3558 destrevlog.delta_config.lazy_delta = True
3553 elif deltareuse == self.DELTAREUSESAMEREVS:
3559 elif deltareuse == self.DELTAREUSESAMEREVS:
3554 destrevlog.delta_config.lazy_delta_base = False
3560 destrevlog.delta_config.lazy_delta_base = False
3555 destrevlog.delta_config.lazy_delta = True
3561 destrevlog.delta_config.lazy_delta = True
3556 elif deltareuse == self.DELTAREUSENEVER:
3562 elif deltareuse == self.DELTAREUSENEVER:
3557 destrevlog.delta_config.lazy_delta_base = False
3563 destrevlog.delta_config.lazy_delta_base = False
3558 destrevlog.delta_config.lazy_delta = False
3564 destrevlog.delta_config.lazy_delta = False
3559
3565
3560 delta_both_parents = (
3566 delta_both_parents = (
3561 forcedeltabothparents or old_delta_config.delta_both_parents
3567 forcedeltabothparents or old_delta_config.delta_both_parents
3562 )
3568 )
3563 destrevlog.delta_config.delta_both_parents = delta_both_parents
3569 destrevlog.delta_config.delta_both_parents = delta_both_parents
3564
3570
3565 with self.reading(), destrevlog._writing(tr):
3571 with self.reading(), destrevlog._writing(tr):
3566 self._clone(
3572 self._clone(
3567 tr,
3573 tr,
3568 destrevlog,
3574 destrevlog,
3569 addrevisioncb,
3575 addrevisioncb,
3570 deltareuse,
3576 deltareuse,
3571 forcedeltabothparents,
3577 forcedeltabothparents,
3572 sidedata_helpers,
3578 sidedata_helpers,
3573 )
3579 )
3574
3580
3575 finally:
3581 finally:
3576 destrevlog.delta_config = old_delta_config
3582 destrevlog.delta_config = old_delta_config
3577
3583
3578 def _clone(
3584 def _clone(
3579 self,
3585 self,
3580 tr,
3586 tr,
3581 destrevlog,
3587 destrevlog,
3582 addrevisioncb,
3588 addrevisioncb,
3583 deltareuse,
3589 deltareuse,
3584 forcedeltabothparents,
3590 forcedeltabothparents,
3585 sidedata_helpers,
3591 sidedata_helpers,
3586 ):
3592 ):
3587 """perform the core duty of `revlog.clone` after parameter processing"""
3593 """perform the core duty of `revlog.clone` after parameter processing"""
3588 write_debug = None
3594 write_debug = None
3589 if self.delta_config.debug_delta:
3595 if self.delta_config.debug_delta:
3590 write_debug = tr._report
3596 write_debug = tr._report
3591 deltacomputer = deltautil.deltacomputer(
3597 deltacomputer = deltautil.deltacomputer(
3592 destrevlog,
3598 destrevlog,
3593 write_debug=write_debug,
3599 write_debug=write_debug,
3594 )
3600 )
3595 index = self.index
3601 index = self.index
3596 for rev in self:
3602 for rev in self:
3597 entry = index[rev]
3603 entry = index[rev]
3598
3604
3599 # Some classes override linkrev to take filtered revs into
3605 # Some classes override linkrev to take filtered revs into
3600 # account. Use raw entry from index.
3606 # account. Use raw entry from index.
3601 flags = entry[0] & 0xFFFF
3607 flags = entry[0] & 0xFFFF
3602 linkrev = entry[4]
3608 linkrev = entry[4]
3603 p1 = index[entry[5]][7]
3609 p1 = index[entry[5]][7]
3604 p2 = index[entry[6]][7]
3610 p2 = index[entry[6]][7]
3605 node = entry[7]
3611 node = entry[7]
3606
3612
3607 # (Possibly) reuse the delta from the revlog if allowed and
3613 # (Possibly) reuse the delta from the revlog if allowed and
3608 # the revlog chunk is a delta.
3614 # the revlog chunk is a delta.
3609 cachedelta = None
3615 cachedelta = None
3610 rawtext = None
3616 rawtext = None
3611 if deltareuse == self.DELTAREUSEFULLADD:
3617 if deltareuse == self.DELTAREUSEFULLADD:
3612 text = self._revisiondata(rev)
3618 text = self._revisiondata(rev)
3613 sidedata = self.sidedata(rev)
3619 sidedata = self.sidedata(rev)
3614
3620
3615 if sidedata_helpers is not None:
3621 if sidedata_helpers is not None:
3616 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3622 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3617 self, sidedata_helpers, sidedata, rev
3623 self, sidedata_helpers, sidedata, rev
3618 )
3624 )
3619 flags = flags | new_flags[0] & ~new_flags[1]
3625 flags = flags | new_flags[0] & ~new_flags[1]
3620
3626
3621 destrevlog.addrevision(
3627 destrevlog.addrevision(
3622 text,
3628 text,
3623 tr,
3629 tr,
3624 linkrev,
3630 linkrev,
3625 p1,
3631 p1,
3626 p2,
3632 p2,
3627 cachedelta=cachedelta,
3633 cachedelta=cachedelta,
3628 node=node,
3634 node=node,
3629 flags=flags,
3635 flags=flags,
3630 deltacomputer=deltacomputer,
3636 deltacomputer=deltacomputer,
3631 sidedata=sidedata,
3637 sidedata=sidedata,
3632 )
3638 )
3633 else:
3639 else:
3634 if destrevlog.delta_config.lazy_delta:
3640 if destrevlog.delta_config.lazy_delta:
3635 dp = self.deltaparent(rev)
3641 dp = self.deltaparent(rev)
3636 if dp != nullrev:
3642 if dp != nullrev:
3637 cachedelta = (dp, bytes(self._chunk(rev)))
3643 cachedelta = (dp, bytes(self._chunk(rev)))
3638
3644
3639 sidedata = None
3645 sidedata = None
3640 if not cachedelta:
3646 if not cachedelta:
3641 try:
3647 try:
3642 rawtext = self._revisiondata(rev)
3648 rawtext = self._revisiondata(rev)
3643 except error.CensoredNodeError as censored:
3649 except error.CensoredNodeError as censored:
3644 assert flags & REVIDX_ISCENSORED
3650 assert flags & REVIDX_ISCENSORED
3645 rawtext = censored.tombstone
3651 rawtext = censored.tombstone
3646 sidedata = self.sidedata(rev)
3652 sidedata = self.sidedata(rev)
3647 if sidedata is None:
3653 if sidedata is None:
3648 sidedata = self.sidedata(rev)
3654 sidedata = self.sidedata(rev)
3649
3655
3650 if sidedata_helpers is not None:
3656 if sidedata_helpers is not None:
3651 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3657 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3652 self, sidedata_helpers, sidedata, rev
3658 self, sidedata_helpers, sidedata, rev
3653 )
3659 )
3654 flags = flags | new_flags[0] & ~new_flags[1]
3660 flags = flags | new_flags[0] & ~new_flags[1]
3655
3661
3656 destrevlog._addrevision(
3662 destrevlog._addrevision(
3657 node,
3663 node,
3658 rawtext,
3664 rawtext,
3659 tr,
3665 tr,
3660 linkrev,
3666 linkrev,
3661 p1,
3667 p1,
3662 p2,
3668 p2,
3663 flags,
3669 flags,
3664 cachedelta,
3670 cachedelta,
3665 deltacomputer=deltacomputer,
3671 deltacomputer=deltacomputer,
3666 sidedata=sidedata,
3672 sidedata=sidedata,
3667 )
3673 )
3668
3674
3669 if addrevisioncb:
3675 if addrevisioncb:
3670 addrevisioncb(self, rev, node)
3676 addrevisioncb(self, rev, node)
3671
3677
3672 def censorrevision(self, tr, censornode, tombstone=b''):
3678 def censorrevision(self, tr, censornode, tombstone=b''):
3673 if self._format_version == REVLOGV0:
3679 if self._format_version == REVLOGV0:
3674 raise error.RevlogError(
3680 raise error.RevlogError(
3675 _(b'cannot censor with version %d revlogs')
3681 _(b'cannot censor with version %d revlogs')
3676 % self._format_version
3682 % self._format_version
3677 )
3683 )
3678 elif self._format_version == REVLOGV1:
3684 elif self._format_version == REVLOGV1:
3679 rewrite.v1_censor(self, tr, censornode, tombstone)
3685 rewrite.v1_censor(self, tr, censornode, tombstone)
3680 else:
3686 else:
3681 rewrite.v2_censor(self, tr, censornode, tombstone)
3687 rewrite.v2_censor(self, tr, censornode, tombstone)
3682
3688
3683 def verifyintegrity(self, state):
3689 def verifyintegrity(self, state):
3684 """Verifies the integrity of the revlog.
3690 """Verifies the integrity of the revlog.
3685
3691
3686 Yields ``revlogproblem`` instances describing problems that are
3692 Yields ``revlogproblem`` instances describing problems that are
3687 found.
3693 found.
3688 """
3694 """
3689 dd, di = self.checksize()
3695 dd, di = self.checksize()
3690 if dd:
3696 if dd:
3691 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3697 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3692 if di:
3698 if di:
3693 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3699 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3694
3700
3695 version = self._format_version
3701 version = self._format_version
3696
3702
3697 # The verifier tells us what version revlog we should be.
3703 # The verifier tells us what version revlog we should be.
3698 if version != state[b'expectedversion']:
3704 if version != state[b'expectedversion']:
3699 yield revlogproblem(
3705 yield revlogproblem(
3700 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3706 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3701 % (self.display_id, version, state[b'expectedversion'])
3707 % (self.display_id, version, state[b'expectedversion'])
3702 )
3708 )
3703
3709
3704 state[b'skipread'] = set()
3710 state[b'skipread'] = set()
3705 state[b'safe_renamed'] = set()
3711 state[b'safe_renamed'] = set()
3706
3712
3707 for rev in self:
3713 for rev in self:
3708 node = self.node(rev)
3714 node = self.node(rev)
3709
3715
3710 # Verify contents. 4 cases to care about:
3716 # Verify contents. 4 cases to care about:
3711 #
3717 #
3712 # common: the most common case
3718 # common: the most common case
3713 # rename: with a rename
3719 # rename: with a rename
3714 # meta: file content starts with b'\1\n', the metadata
3720 # meta: file content starts with b'\1\n', the metadata
3715 # header defined in filelog.py, but without a rename
3721 # header defined in filelog.py, but without a rename
3716 # ext: content stored externally
3722 # ext: content stored externally
3717 #
3723 #
3718 # More formally, their differences are shown below:
3724 # More formally, their differences are shown below:
3719 #
3725 #
3720 # | common | rename | meta | ext
3726 # | common | rename | meta | ext
3721 # -------------------------------------------------------
3727 # -------------------------------------------------------
3722 # flags() | 0 | 0 | 0 | not 0
3728 # flags() | 0 | 0 | 0 | not 0
3723 # renamed() | False | True | False | ?
3729 # renamed() | False | True | False | ?
3724 # rawtext[0:2]=='\1\n'| False | True | True | ?
3730 # rawtext[0:2]=='\1\n'| False | True | True | ?
3725 #
3731 #
3726 # "rawtext" means the raw text stored in revlog data, which
3732 # "rawtext" means the raw text stored in revlog data, which
3727 # could be retrieved by "rawdata(rev)". "text"
3733 # could be retrieved by "rawdata(rev)". "text"
3728 # mentioned below is "revision(rev)".
3734 # mentioned below is "revision(rev)".
3729 #
3735 #
3730 # There are 3 different lengths stored physically:
3736 # There are 3 different lengths stored physically:
3731 # 1. L1: rawsize, stored in revlog index
3737 # 1. L1: rawsize, stored in revlog index
3732 # 2. L2: len(rawtext), stored in revlog data
3738 # 2. L2: len(rawtext), stored in revlog data
3733 # 3. L3: len(text), stored in revlog data if flags==0, or
3739 # 3. L3: len(text), stored in revlog data if flags==0, or
3734 # possibly somewhere else if flags!=0
3740 # possibly somewhere else if flags!=0
3735 #
3741 #
3736 # L1 should be equal to L2. L3 could be different from them.
3742 # L1 should be equal to L2. L3 could be different from them.
3737 # "text" may or may not affect commit hash depending on flag
3743 # "text" may or may not affect commit hash depending on flag
3738 # processors (see flagutil.addflagprocessor).
3744 # processors (see flagutil.addflagprocessor).
3739 #
3745 #
3740 # | common | rename | meta | ext
3746 # | common | rename | meta | ext
3741 # -------------------------------------------------
3747 # -------------------------------------------------
3742 # rawsize() | L1 | L1 | L1 | L1
3748 # rawsize() | L1 | L1 | L1 | L1
3743 # size() | L1 | L2-LM | L1(*) | L1 (?)
3749 # size() | L1 | L2-LM | L1(*) | L1 (?)
3744 # len(rawtext) | L2 | L2 | L2 | L2
3750 # len(rawtext) | L2 | L2 | L2 | L2
3745 # len(text) | L2 | L2 | L2 | L3
3751 # len(text) | L2 | L2 | L2 | L3
3746 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3752 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3747 #
3753 #
3748 # LM: length of metadata, depending on rawtext
3754 # LM: length of metadata, depending on rawtext
3749 # (*): not ideal, see comment in filelog.size
3755 # (*): not ideal, see comment in filelog.size
3750 # (?): could be "- len(meta)" if the resolved content has
3756 # (?): could be "- len(meta)" if the resolved content has
3751 # rename metadata
3757 # rename metadata
3752 #
3758 #
3753 # Checks needed to be done:
3759 # Checks needed to be done:
3754 # 1. length check: L1 == L2, in all cases.
3760 # 1. length check: L1 == L2, in all cases.
3755 # 2. hash check: depending on flag processor, we may need to
3761 # 2. hash check: depending on flag processor, we may need to
3756 # use either "text" (external), or "rawtext" (in revlog).
3762 # use either "text" (external), or "rawtext" (in revlog).
3757
3763
3758 try:
3764 try:
3759 skipflags = state.get(b'skipflags', 0)
3765 skipflags = state.get(b'skipflags', 0)
3760 if skipflags:
3766 if skipflags:
3761 skipflags &= self.flags(rev)
3767 skipflags &= self.flags(rev)
3762
3768
3763 _verify_revision(self, skipflags, state, node)
3769 _verify_revision(self, skipflags, state, node)
3764
3770
3765 l1 = self.rawsize(rev)
3771 l1 = self.rawsize(rev)
3766 l2 = len(self.rawdata(node))
3772 l2 = len(self.rawdata(node))
3767
3773
3768 if l1 != l2:
3774 if l1 != l2:
3769 yield revlogproblem(
3775 yield revlogproblem(
3770 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3776 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3771 node=node,
3777 node=node,
3772 )
3778 )
3773
3779
3774 except error.CensoredNodeError:
3780 except error.CensoredNodeError:
3775 if state[b'erroroncensored']:
3781 if state[b'erroroncensored']:
3776 yield revlogproblem(
3782 yield revlogproblem(
3777 error=_(b'censored file data'), node=node
3783 error=_(b'censored file data'), node=node
3778 )
3784 )
3779 state[b'skipread'].add(node)
3785 state[b'skipread'].add(node)
3780 except Exception as e:
3786 except Exception as e:
3781 yield revlogproblem(
3787 yield revlogproblem(
3782 error=_(b'unpacking %s: %s')
3788 error=_(b'unpacking %s: %s')
3783 % (short(node), stringutil.forcebytestr(e)),
3789 % (short(node), stringutil.forcebytestr(e)),
3784 node=node,
3790 node=node,
3785 )
3791 )
3786 state[b'skipread'].add(node)
3792 state[b'skipread'].add(node)
3787
3793
3788 def storageinfo(
3794 def storageinfo(
3789 self,
3795 self,
3790 exclusivefiles=False,
3796 exclusivefiles=False,
3791 sharedfiles=False,
3797 sharedfiles=False,
3792 revisionscount=False,
3798 revisionscount=False,
3793 trackedsize=False,
3799 trackedsize=False,
3794 storedsize=False,
3800 storedsize=False,
3795 ):
3801 ):
3796 d = {}
3802 d = {}
3797
3803
3798 if exclusivefiles:
3804 if exclusivefiles:
3799 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3805 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3800 if not self._inline:
3806 if not self._inline:
3801 d[b'exclusivefiles'].append((self.opener, self._datafile))
3807 d[b'exclusivefiles'].append((self.opener, self._datafile))
3802
3808
3803 if sharedfiles:
3809 if sharedfiles:
3804 d[b'sharedfiles'] = []
3810 d[b'sharedfiles'] = []
3805
3811
3806 if revisionscount:
3812 if revisionscount:
3807 d[b'revisionscount'] = len(self)
3813 d[b'revisionscount'] = len(self)
3808
3814
3809 if trackedsize:
3815 if trackedsize:
3810 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3816 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3811
3817
3812 if storedsize:
3818 if storedsize:
3813 d[b'storedsize'] = sum(
3819 d[b'storedsize'] = sum(
3814 self.opener.stat(path).st_size for path in self.files()
3820 self.opener.stat(path).st_size for path in self.files()
3815 )
3821 )
3816
3822
3817 return d
3823 return d
3818
3824
3819 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3825 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3820 if not self.feature_config.has_side_data:
3826 if not self.feature_config.has_side_data:
3821 return
3827 return
3822 # revlog formats with sidedata support does not support inline
3828 # revlog formats with sidedata support does not support inline
3823 assert not self._inline
3829 assert not self._inline
3824 if not helpers[1] and not helpers[2]:
3830 if not helpers[1] and not helpers[2]:
3825 # Nothing to generate or remove
3831 # Nothing to generate or remove
3826 return
3832 return
3827
3833
3828 new_entries = []
3834 new_entries = []
3829 # append the new sidedata
3835 # append the new sidedata
3830 with self._writing(transaction):
3836 with self._writing(transaction):
3831 ifh, dfh, sdfh = self._inner._writinghandles
3837 ifh, dfh, sdfh = self._inner._writinghandles
3832 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3838 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3833
3839
3834 current_offset = sdfh.tell()
3840 current_offset = sdfh.tell()
3835 for rev in range(startrev, endrev + 1):
3841 for rev in range(startrev, endrev + 1):
3836 entry = self.index[rev]
3842 entry = self.index[rev]
3837 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3843 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3838 store=self,
3844 store=self,
3839 sidedata_helpers=helpers,
3845 sidedata_helpers=helpers,
3840 sidedata={},
3846 sidedata={},
3841 rev=rev,
3847 rev=rev,
3842 )
3848 )
3843
3849
3844 serialized_sidedata = sidedatautil.serialize_sidedata(
3850 serialized_sidedata = sidedatautil.serialize_sidedata(
3845 new_sidedata
3851 new_sidedata
3846 )
3852 )
3847
3853
3848 sidedata_compression_mode = COMP_MODE_INLINE
3854 sidedata_compression_mode = COMP_MODE_INLINE
3849 if serialized_sidedata and self.feature_config.has_side_data:
3855 if serialized_sidedata and self.feature_config.has_side_data:
3850 sidedata_compression_mode = COMP_MODE_PLAIN
3856 sidedata_compression_mode = COMP_MODE_PLAIN
3851 h, comp_sidedata = self.compress(serialized_sidedata)
3857 h, comp_sidedata = self.compress(serialized_sidedata)
3852 if (
3858 if (
3853 h != b'u'
3859 h != b'u'
3854 and comp_sidedata[0] != b'\0'
3860 and comp_sidedata[0] != b'\0'
3855 and len(comp_sidedata) < len(serialized_sidedata)
3861 and len(comp_sidedata) < len(serialized_sidedata)
3856 ):
3862 ):
3857 assert not h
3863 assert not h
3858 if (
3864 if (
3859 comp_sidedata[0]
3865 comp_sidedata[0]
3860 == self._docket.default_compression_header
3866 == self._docket.default_compression_header
3861 ):
3867 ):
3862 sidedata_compression_mode = COMP_MODE_DEFAULT
3868 sidedata_compression_mode = COMP_MODE_DEFAULT
3863 serialized_sidedata = comp_sidedata
3869 serialized_sidedata = comp_sidedata
3864 else:
3870 else:
3865 sidedata_compression_mode = COMP_MODE_INLINE
3871 sidedata_compression_mode = COMP_MODE_INLINE
3866 serialized_sidedata = comp_sidedata
3872 serialized_sidedata = comp_sidedata
3867 if entry[8] != 0 or entry[9] != 0:
3873 if entry[8] != 0 or entry[9] != 0:
3868 # rewriting entries that already have sidedata is not
3874 # rewriting entries that already have sidedata is not
3869 # supported yet, because it introduces garbage data in the
3875 # supported yet, because it introduces garbage data in the
3870 # revlog.
3876 # revlog.
3871 msg = b"rewriting existing sidedata is not supported yet"
3877 msg = b"rewriting existing sidedata is not supported yet"
3872 raise error.Abort(msg)
3878 raise error.Abort(msg)
3873
3879
3874 # Apply (potential) flags to add and to remove after running
3880 # Apply (potential) flags to add and to remove after running
3875 # the sidedata helpers
3881 # the sidedata helpers
3876 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3882 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3877 entry_update = (
3883 entry_update = (
3878 current_offset,
3884 current_offset,
3879 len(serialized_sidedata),
3885 len(serialized_sidedata),
3880 new_offset_flags,
3886 new_offset_flags,
3881 sidedata_compression_mode,
3887 sidedata_compression_mode,
3882 )
3888 )
3883
3889
3884 # the sidedata computation might have move the file cursors around
3890 # the sidedata computation might have move the file cursors around
3885 sdfh.seek(current_offset, os.SEEK_SET)
3891 sdfh.seek(current_offset, os.SEEK_SET)
3886 sdfh.write(serialized_sidedata)
3892 sdfh.write(serialized_sidedata)
3887 new_entries.append(entry_update)
3893 new_entries.append(entry_update)
3888 current_offset += len(serialized_sidedata)
3894 current_offset += len(serialized_sidedata)
3889 self._docket.sidedata_end = sdfh.tell()
3895 self._docket.sidedata_end = sdfh.tell()
3890
3896
3891 # rewrite the new index entries
3897 # rewrite the new index entries
3892 ifh.seek(startrev * self.index.entry_size)
3898 ifh.seek(startrev * self.index.entry_size)
3893 for i, e in enumerate(new_entries):
3899 for i, e in enumerate(new_entries):
3894 rev = startrev + i
3900 rev = startrev + i
3895 self.index.replace_sidedata_info(rev, *e)
3901 self.index.replace_sidedata_info(rev, *e)
3896 packed = self.index.entry_binary(rev)
3902 packed = self.index.entry_binary(rev)
3897 if rev == 0 and self._docket is None:
3903 if rev == 0 and self._docket is None:
3898 header = self._format_flags | self._format_version
3904 header = self._format_flags | self._format_version
3899 header = self.index.pack_header(header)
3905 header = self.index.pack_header(header)
3900 packed = header + packed
3906 packed = header + packed
3901 ifh.write(packed)
3907 ifh.write(packed)
@@ -1,943 +1,943 b''
1 # revlogutils/debug.py - utility used for revlog debuging
1 # revlogutils/debug.py - utility used for revlog debuging
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2022 Octobus <contact@octobus.net>
4 # Copyright 2022 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 import collections
9 import collections
10 import string
10 import string
11
11
12 from .. import (
12 from .. import (
13 mdiff,
13 mdiff,
14 node as nodemod,
14 node as nodemod,
15 revlogutils,
15 revlogutils,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 constants,
19 constants,
20 deltas as deltautil,
20 deltas as deltautil,
21 )
21 )
22
22
23 INDEX_ENTRY_DEBUG_COLUMN = []
23 INDEX_ENTRY_DEBUG_COLUMN = []
24
24
25 NODE_SIZE = object()
25 NODE_SIZE = object()
26
26
27
27
28 class _column_base:
28 class _column_base:
29 """constains the definition of a revlog column
29 """constains the definition of a revlog column
30
30
31 name: the column header,
31 name: the column header,
32 value_func: the function called to get a value,
32 value_func: the function called to get a value,
33 size: the width of the column,
33 size: the width of the column,
34 verbose_only: only include the column in verbose mode.
34 verbose_only: only include the column in verbose mode.
35 """
35 """
36
36
37 def __init__(self, name, value_func, size=None, verbose=False):
37 def __init__(self, name, value_func, size=None, verbose=False):
38 self.name = name
38 self.name = name
39 self.value_func = value_func
39 self.value_func = value_func
40 if size is not NODE_SIZE:
40 if size is not NODE_SIZE:
41 if size is None:
41 if size is None:
42 size = 8 # arbitrary default
42 size = 8 # arbitrary default
43 size = max(len(name), size)
43 size = max(len(name), size)
44 self._size = size
44 self._size = size
45 self.verbose_only = verbose
45 self.verbose_only = verbose
46
46
47 def get_size(self, node_size):
47 def get_size(self, node_size):
48 if self._size is NODE_SIZE:
48 if self._size is NODE_SIZE:
49 return node_size
49 return node_size
50 else:
50 else:
51 return self._size
51 return self._size
52
52
53
53
54 def debug_column(name, size=None, verbose=False):
54 def debug_column(name, size=None, verbose=False):
55 """decorated function is registered as a column
55 """decorated function is registered as a column
56
56
57 name: the name of the column,
57 name: the name of the column,
58 size: the expected size of the column.
58 size: the expected size of the column.
59 """
59 """
60
60
61 def register(func):
61 def register(func):
62 entry = _column_base(
62 entry = _column_base(
63 name=name,
63 name=name,
64 value_func=func,
64 value_func=func,
65 size=size,
65 size=size,
66 verbose=verbose,
66 verbose=verbose,
67 )
67 )
68 INDEX_ENTRY_DEBUG_COLUMN.append(entry)
68 INDEX_ENTRY_DEBUG_COLUMN.append(entry)
69 return entry
69 return entry
70
70
71 return register
71 return register
72
72
73
73
74 @debug_column(b"rev", size=6)
74 @debug_column(b"rev", size=6)
75 def _rev(index, rev, entry, hexfn):
75 def _rev(index, rev, entry, hexfn):
76 return b"%d" % rev
76 return b"%d" % rev
77
77
78
78
79 @debug_column(b"rank", size=6, verbose=True)
79 @debug_column(b"rank", size=6, verbose=True)
80 def rank(index, rev, entry, hexfn):
80 def rank(index, rev, entry, hexfn):
81 return b"%d" % entry[constants.ENTRY_RANK]
81 return b"%d" % entry[constants.ENTRY_RANK]
82
82
83
83
84 @debug_column(b"linkrev", size=6)
84 @debug_column(b"linkrev", size=6)
85 def _linkrev(index, rev, entry, hexfn):
85 def _linkrev(index, rev, entry, hexfn):
86 return b"%d" % entry[constants.ENTRY_LINK_REV]
86 return b"%d" % entry[constants.ENTRY_LINK_REV]
87
87
88
88
89 @debug_column(b"nodeid", size=NODE_SIZE)
89 @debug_column(b"nodeid", size=NODE_SIZE)
90 def _nodeid(index, rev, entry, hexfn):
90 def _nodeid(index, rev, entry, hexfn):
91 return hexfn(entry[constants.ENTRY_NODE_ID])
91 return hexfn(entry[constants.ENTRY_NODE_ID])
92
92
93
93
94 @debug_column(b"p1-rev", size=6, verbose=True)
94 @debug_column(b"p1-rev", size=6, verbose=True)
95 def _p1_rev(index, rev, entry, hexfn):
95 def _p1_rev(index, rev, entry, hexfn):
96 return b"%d" % entry[constants.ENTRY_PARENT_1]
96 return b"%d" % entry[constants.ENTRY_PARENT_1]
97
97
98
98
99 @debug_column(b"p1-nodeid", size=NODE_SIZE)
99 @debug_column(b"p1-nodeid", size=NODE_SIZE)
100 def _p1_node(index, rev, entry, hexfn):
100 def _p1_node(index, rev, entry, hexfn):
101 parent = entry[constants.ENTRY_PARENT_1]
101 parent = entry[constants.ENTRY_PARENT_1]
102 p_entry = index[parent]
102 p_entry = index[parent]
103 return hexfn(p_entry[constants.ENTRY_NODE_ID])
103 return hexfn(p_entry[constants.ENTRY_NODE_ID])
104
104
105
105
106 @debug_column(b"p2-rev", size=6, verbose=True)
106 @debug_column(b"p2-rev", size=6, verbose=True)
107 def _p2_rev(index, rev, entry, hexfn):
107 def _p2_rev(index, rev, entry, hexfn):
108 return b"%d" % entry[constants.ENTRY_PARENT_2]
108 return b"%d" % entry[constants.ENTRY_PARENT_2]
109
109
110
110
111 @debug_column(b"p2-nodeid", size=NODE_SIZE)
111 @debug_column(b"p2-nodeid", size=NODE_SIZE)
112 def _p2_node(index, rev, entry, hexfn):
112 def _p2_node(index, rev, entry, hexfn):
113 parent = entry[constants.ENTRY_PARENT_2]
113 parent = entry[constants.ENTRY_PARENT_2]
114 p_entry = index[parent]
114 p_entry = index[parent]
115 return hexfn(p_entry[constants.ENTRY_NODE_ID])
115 return hexfn(p_entry[constants.ENTRY_NODE_ID])
116
116
117
117
118 @debug_column(b"full-size", size=20, verbose=True)
118 @debug_column(b"full-size", size=20, verbose=True)
119 def full_size(index, rev, entry, hexfn):
119 def full_size(index, rev, entry, hexfn):
120 return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
120 return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
121
121
122
122
123 @debug_column(b"delta-base", size=6, verbose=True)
123 @debug_column(b"delta-base", size=6, verbose=True)
124 def delta_base(index, rev, entry, hexfn):
124 def delta_base(index, rev, entry, hexfn):
125 return b"%d" % entry[constants.ENTRY_DELTA_BASE]
125 return b"%d" % entry[constants.ENTRY_DELTA_BASE]
126
126
127
127
128 @debug_column(b"flags", size=2, verbose=True)
128 @debug_column(b"flags", size=2, verbose=True)
129 def flags(index, rev, entry, hexfn):
129 def flags(index, rev, entry, hexfn):
130 field = entry[constants.ENTRY_DATA_OFFSET]
130 field = entry[constants.ENTRY_DATA_OFFSET]
131 field &= 0xFFFF
131 field &= 0xFFFF
132 return b"%d" % field
132 return b"%d" % field
133
133
134
134
135 @debug_column(b"comp-mode", size=4, verbose=True)
135 @debug_column(b"comp-mode", size=4, verbose=True)
136 def compression_mode(index, rev, entry, hexfn):
136 def compression_mode(index, rev, entry, hexfn):
137 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE]
137 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE]
138
138
139
139
140 @debug_column(b"data-offset", size=20, verbose=True)
140 @debug_column(b"data-offset", size=20, verbose=True)
141 def data_offset(index, rev, entry, hexfn):
141 def data_offset(index, rev, entry, hexfn):
142 field = entry[constants.ENTRY_DATA_OFFSET]
142 field = entry[constants.ENTRY_DATA_OFFSET]
143 field >>= 16
143 field >>= 16
144 return b"%d" % field
144 return b"%d" % field
145
145
146
146
147 @debug_column(b"chunk-size", size=10, verbose=True)
147 @debug_column(b"chunk-size", size=10, verbose=True)
148 def data_chunk_size(index, rev, entry, hexfn):
148 def data_chunk_size(index, rev, entry, hexfn):
149 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH]
149 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH]
150
150
151
151
152 @debug_column(b"sd-comp-mode", size=7, verbose=True)
152 @debug_column(b"sd-comp-mode", size=7, verbose=True)
153 def sidedata_compression_mode(index, rev, entry, hexfn):
153 def sidedata_compression_mode(index, rev, entry, hexfn):
154 compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE]
154 compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE]
155 if compression == constants.COMP_MODE_PLAIN:
155 if compression == constants.COMP_MODE_PLAIN:
156 return b"plain"
156 return b"plain"
157 elif compression == constants.COMP_MODE_DEFAULT:
157 elif compression == constants.COMP_MODE_DEFAULT:
158 return b"default"
158 return b"default"
159 elif compression == constants.COMP_MODE_INLINE:
159 elif compression == constants.COMP_MODE_INLINE:
160 return b"inline"
160 return b"inline"
161 else:
161 else:
162 return b"%d" % compression
162 return b"%d" % compression
163
163
164
164
165 @debug_column(b"sidedata-offset", size=20, verbose=True)
165 @debug_column(b"sidedata-offset", size=20, verbose=True)
166 def sidedata_offset(index, rev, entry, hexfn):
166 def sidedata_offset(index, rev, entry, hexfn):
167 return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET]
167 return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET]
168
168
169
169
170 @debug_column(b"sd-chunk-size", size=10, verbose=True)
170 @debug_column(b"sd-chunk-size", size=10, verbose=True)
171 def sidedata_chunk_size(index, rev, entry, hexfn):
171 def sidedata_chunk_size(index, rev, entry, hexfn):
172 return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH]
172 return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH]
173
173
174
174
175 def debug_index(
175 def debug_index(
176 ui,
176 ui,
177 repo,
177 repo,
178 formatter,
178 formatter,
179 revlog,
179 revlog,
180 full_node,
180 full_node,
181 ):
181 ):
182 """display index data for a revlog"""
182 """display index data for a revlog"""
183 if full_node:
183 if full_node:
184 hexfn = nodemod.hex
184 hexfn = nodemod.hex
185 else:
185 else:
186 hexfn = nodemod.short
186 hexfn = nodemod.short
187
187
188 idlen = 12
188 idlen = 12
189 for i in revlog:
189 for i in revlog:
190 idlen = len(hexfn(revlog.node(i)))
190 idlen = len(hexfn(revlog.node(i)))
191 break
191 break
192
192
193 fm = formatter
193 fm = formatter
194
194
195 header_pieces = []
195 header_pieces = []
196 for column in INDEX_ENTRY_DEBUG_COLUMN:
196 for column in INDEX_ENTRY_DEBUG_COLUMN:
197 if column.verbose_only and not ui.verbose:
197 if column.verbose_only and not ui.verbose:
198 continue
198 continue
199 size = column.get_size(idlen)
199 size = column.get_size(idlen)
200 name = column.name
200 name = column.name
201 header_pieces.append(name.rjust(size))
201 header_pieces.append(name.rjust(size))
202
202
203 fm.plain(b' '.join(header_pieces) + b'\n')
203 fm.plain(b' '.join(header_pieces) + b'\n')
204
204
205 index = revlog.index
205 index = revlog.index
206
206
207 for rev in revlog:
207 for rev in revlog:
208 fm.startitem()
208 fm.startitem()
209 entry = index[rev]
209 entry = index[rev]
210 first = True
210 first = True
211 for column in INDEX_ENTRY_DEBUG_COLUMN:
211 for column in INDEX_ENTRY_DEBUG_COLUMN:
212 if column.verbose_only and not ui.verbose:
212 if column.verbose_only and not ui.verbose:
213 continue
213 continue
214 if not first:
214 if not first:
215 fm.plain(b' ')
215 fm.plain(b' ')
216 first = False
216 first = False
217
217
218 size = column.get_size(idlen)
218 size = column.get_size(idlen)
219 value = column.value_func(index, rev, entry, hexfn)
219 value = column.value_func(index, rev, entry, hexfn)
220 display = b"%%%ds" % size
220 display = b"%%%ds" % size
221 fm.write(column.name, display, value)
221 fm.write(column.name, display, value)
222 fm.plain(b'\n')
222 fm.plain(b'\n')
223
223
224 fm.end()
224 fm.end()
225
225
226
226
227 def dump(ui, revlog):
227 def dump(ui, revlog):
228 """perform the work for `hg debugrevlog --dump"""
228 """perform the work for `hg debugrevlog --dump"""
229 # XXX seems redundant with debug index ?
229 # XXX seems redundant with debug index ?
230 r = revlog
230 r = revlog
231 numrevs = len(r)
231 numrevs = len(r)
232 ui.write(
232 ui.write(
233 (
233 (
234 b"# rev p1rev p2rev start end deltastart base p1 p2"
234 b"# rev p1rev p2rev start end deltastart base p1 p2"
235 b" rawsize totalsize compression heads chainlen\n"
235 b" rawsize totalsize compression heads chainlen\n"
236 )
236 )
237 )
237 )
238 ts = 0
238 ts = 0
239 heads = set()
239 heads = set()
240
240
241 for rev in range(numrevs):
241 for rev in range(numrevs):
242 dbase = r.deltaparent(rev)
242 dbase = r.deltaparent(rev)
243 if dbase == -1:
243 if dbase == -1:
244 dbase = rev
244 dbase = rev
245 cbase = r.chainbase(rev)
245 cbase = r.chainbase(rev)
246 clen = r.chainlen(rev)
246 clen = r.chainlen(rev)
247 p1, p2 = r.parentrevs(rev)
247 p1, p2 = r.parentrevs(rev)
248 rs = r.rawsize(rev)
248 rs = r.rawsize(rev)
249 ts = ts + rs
249 ts = ts + rs
250 heads -= set(r.parentrevs(rev))
250 heads -= set(r.parentrevs(rev))
251 heads.add(rev)
251 heads.add(rev)
252 try:
252 try:
253 compression = ts / r.end(rev)
253 compression = ts / r.end(rev)
254 except ZeroDivisionError:
254 except ZeroDivisionError:
255 compression = 0
255 compression = 0
256 ui.write(
256 ui.write(
257 b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
257 b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
258 b"%11d %5d %8d\n"
258 b"%11d %5d %8d\n"
259 % (
259 % (
260 rev,
260 rev,
261 p1,
261 p1,
262 p2,
262 p2,
263 r.start(rev),
263 r.start(rev),
264 r.end(rev),
264 r.end(rev),
265 r.start(dbase),
265 r.start(dbase),
266 r.start(cbase),
266 r.start(cbase),
267 r.start(p1),
267 r.start(p1),
268 r.start(p2),
268 r.start(p2),
269 rs,
269 rs,
270 ts,
270 ts,
271 compression,
271 compression,
272 len(heads),
272 len(heads),
273 clen,
273 clen,
274 )
274 )
275 )
275 )
276
276
277
277
278 def debug_revlog(ui, revlog):
278 def debug_revlog(ui, revlog):
279 """code for `hg debugrevlog`"""
279 """code for `hg debugrevlog`"""
280 r = revlog
280 r = revlog
281 format = r._format_version
281 format = r._format_version
282 v = r._format_flags
282 v = r._format_flags
283 flags = []
283 flags = []
284 gdelta = False
284 gdelta = False
285 if v & constants.FLAG_INLINE_DATA:
285 if v & constants.FLAG_INLINE_DATA:
286 flags.append(b'inline')
286 flags.append(b'inline')
287 if v & constants.FLAG_GENERALDELTA:
287 if v & constants.FLAG_GENERALDELTA:
288 gdelta = True
288 gdelta = True
289 flags.append(b'generaldelta')
289 flags.append(b'generaldelta')
290 if not flags:
290 if not flags:
291 flags = [b'(none)']
291 flags = [b'(none)']
292
292
293 ### the total size of stored content if incompressed.
293 ### the total size of stored content if incompressed.
294 full_text_total_size = 0
294 full_text_total_size = 0
295 ### tracks merge vs single parent
295 ### tracks merge vs single parent
296 nummerges = 0
296 nummerges = 0
297
297
298 ### tracks ways the "delta" are build
298 ### tracks ways the "delta" are build
299 # nodelta
299 # nodelta
300 numempty = 0
300 numempty = 0
301 numemptytext = 0
301 numemptytext = 0
302 numemptydelta = 0
302 numemptydelta = 0
303 # full file content
303 # full file content
304 numfull = 0
304 numfull = 0
305 # intermediate snapshot against a prior snapshot
305 # intermediate snapshot against a prior snapshot
306 numsemi = 0
306 numsemi = 0
307 # snapshot count per depth
307 # snapshot count per depth
308 numsnapdepth = collections.defaultdict(lambda: 0)
308 numsnapdepth = collections.defaultdict(lambda: 0)
309 # number of snapshots with a non-ancestor delta
309 # number of snapshots with a non-ancestor delta
310 numsnapdepth_nad = collections.defaultdict(lambda: 0)
310 numsnapdepth_nad = collections.defaultdict(lambda: 0)
311 # delta against previous revision
311 # delta against previous revision
312 numprev = 0
312 numprev = 0
313 # delta against prev, where prev is a non-ancestor
313 # delta against prev, where prev is a non-ancestor
314 numprev_nad = 0
314 numprev_nad = 0
315 # delta against first or second parent (not prev)
315 # delta against first or second parent (not prev)
316 nump1 = 0
316 nump1 = 0
317 nump2 = 0
317 nump2 = 0
318 # delta against neither prev nor parents
318 # delta against neither prev nor parents
319 numother = 0
319 numother = 0
320 # delta against other that is a non-ancestor
320 # delta against other that is a non-ancestor
321 numother_nad = 0
321 numother_nad = 0
322 # delta against prev that are also first or second parent
322 # delta against prev that are also first or second parent
323 # (details of `numprev`)
323 # (details of `numprev`)
324 nump1prev = 0
324 nump1prev = 0
325 nump2prev = 0
325 nump2prev = 0
326
326
327 # data about delta chain of each revs
327 # data about delta chain of each revs
328 chainlengths = []
328 chainlengths = []
329 chainbases = []
329 chainbases = []
330 chainspans = []
330 chainspans = []
331
331
332 # data about each revision
332 # data about each revision
333 datasize = [None, 0, 0]
333 datasize = [None, 0, 0]
334 fullsize = [None, 0, 0]
334 fullsize = [None, 0, 0]
335 semisize = [None, 0, 0]
335 semisize = [None, 0, 0]
336 # snapshot count per depth
336 # snapshot count per depth
337 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
337 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
338 deltasize = [None, 0, 0]
338 deltasize = [None, 0, 0]
339 chunktypecounts = {}
339 chunktypecounts = {}
340 chunktypesizes = {}
340 chunktypesizes = {}
341
341
342 def addsize(size, l):
342 def addsize(size, l):
343 if l[0] is None or size < l[0]:
343 if l[0] is None or size < l[0]:
344 l[0] = size
344 l[0] = size
345 if size > l[1]:
345 if size > l[1]:
346 l[1] = size
346 l[1] = size
347 l[2] += size
347 l[2] += size
348
348
349 with r.reading():
349 with r.reading():
350 numrevs = len(r)
350 numrevs = len(r)
351 for rev in range(numrevs):
351 for rev in range(numrevs):
352 p1, p2 = r.parentrevs(rev)
352 p1, p2 = r.parentrevs(rev)
353 delta = r.deltaparent(rev)
353 delta = r.deltaparent(rev)
354 if format > 0:
354 if format > 0:
355 s = r.rawsize(rev)
355 s = r.rawsize(rev)
356 full_text_total_size += s
356 full_text_total_size += s
357 addsize(s, datasize)
357 addsize(s, datasize)
358 if p2 != nodemod.nullrev:
358 if p2 != nodemod.nullrev:
359 nummerges += 1
359 nummerges += 1
360 size = r.length(rev)
360 size = r.length(rev)
361 if delta == nodemod.nullrev:
361 if delta == nodemod.nullrev:
362 chainlengths.append(0)
362 chainlengths.append(0)
363 chainbases.append(r.start(rev))
363 chainbases.append(r.start(rev))
364 chainspans.append(size)
364 chainspans.append(size)
365 if size == 0:
365 if size == 0:
366 numempty += 1
366 numempty += 1
367 numemptytext += 1
367 numemptytext += 1
368 else:
368 else:
369 numfull += 1
369 numfull += 1
370 numsnapdepth[0] += 1
370 numsnapdepth[0] += 1
371 addsize(size, fullsize)
371 addsize(size, fullsize)
372 addsize(size, snapsizedepth[0])
372 addsize(size, snapsizedepth[0])
373 else:
373 else:
374 nad = (
374 nad = (
375 delta != p1
375 delta != p1
376 and delta != p2
376 and delta != p2
377 and not r.isancestorrev(delta, rev)
377 and not r.isancestorrev(delta, rev)
378 )
378 )
379 chainlengths.append(chainlengths[delta] + 1)
379 chainlengths.append(chainlengths[delta] + 1)
380 baseaddr = chainbases[delta]
380 baseaddr = chainbases[delta]
381 revaddr = r.start(rev)
381 revaddr = r.start(rev)
382 chainbases.append(baseaddr)
382 chainbases.append(baseaddr)
383 chainspans.append((revaddr - baseaddr) + size)
383 chainspans.append((revaddr - baseaddr) + size)
384 if size == 0:
384 if size == 0:
385 numempty += 1
385 numempty += 1
386 numemptydelta += 1
386 numemptydelta += 1
387 elif r.issnapshot(rev):
387 elif r.issnapshot(rev):
388 addsize(size, semisize)
388 addsize(size, semisize)
389 numsemi += 1
389 numsemi += 1
390 depth = r.snapshotdepth(rev)
390 depth = r.snapshotdepth(rev)
391 numsnapdepth[depth] += 1
391 numsnapdepth[depth] += 1
392 if nad:
392 if nad:
393 numsnapdepth_nad[depth] += 1
393 numsnapdepth_nad[depth] += 1
394 addsize(size, snapsizedepth[depth])
394 addsize(size, snapsizedepth[depth])
395 else:
395 else:
396 addsize(size, deltasize)
396 addsize(size, deltasize)
397 if delta == rev - 1:
397 if delta == rev - 1:
398 numprev += 1
398 numprev += 1
399 if delta == p1:
399 if delta == p1:
400 nump1prev += 1
400 nump1prev += 1
401 elif delta == p2:
401 elif delta == p2:
402 nump2prev += 1
402 nump2prev += 1
403 elif nad:
403 elif nad:
404 numprev_nad += 1
404 numprev_nad += 1
405 elif delta == p1:
405 elif delta == p1:
406 nump1 += 1
406 nump1 += 1
407 elif delta == p2:
407 elif delta == p2:
408 nump2 += 1
408 nump2 += 1
409 elif delta != nodemod.nullrev:
409 elif delta != nodemod.nullrev:
410 numother += 1
410 numother += 1
411 numother_nad += 1
411 numother_nad += 1
412
412
413 # Obtain data on the raw chunks in the revlog.
413 # Obtain data on the raw chunks in the revlog.
414 if hasattr(r, '_getsegmentforrevs'):
414 if hasattr(r, '_inner'):
415 segment = r._getsegmentforrevs(rev, rev)[1]
415 segment = r._inner.get_segment_for_revs(rev, rev)[1]
416 else:
416 else:
417 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
417 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
418 if segment:
418 if segment:
419 chunktype = bytes(segment[0:1])
419 chunktype = bytes(segment[0:1])
420 else:
420 else:
421 chunktype = b'empty'
421 chunktype = b'empty'
422
422
423 if chunktype not in chunktypecounts:
423 if chunktype not in chunktypecounts:
424 chunktypecounts[chunktype] = 0
424 chunktypecounts[chunktype] = 0
425 chunktypesizes[chunktype] = 0
425 chunktypesizes[chunktype] = 0
426
426
427 chunktypecounts[chunktype] += 1
427 chunktypecounts[chunktype] += 1
428 chunktypesizes[chunktype] += size
428 chunktypesizes[chunktype] += size
429
429
430 # Adjust size min value for empty cases
430 # Adjust size min value for empty cases
431 for size in (datasize, fullsize, semisize, deltasize):
431 for size in (datasize, fullsize, semisize, deltasize):
432 if size[0] is None:
432 if size[0] is None:
433 size[0] = 0
433 size[0] = 0
434
434
435 numdeltas = numrevs - numfull - numempty - numsemi
435 numdeltas = numrevs - numfull - numempty - numsemi
436 numoprev = numprev - nump1prev - nump2prev - numprev_nad
436 numoprev = numprev - nump1prev - nump2prev - numprev_nad
437 num_other_ancestors = numother - numother_nad
437 num_other_ancestors = numother - numother_nad
438 totalrawsize = datasize[2]
438 totalrawsize = datasize[2]
439 datasize[2] /= numrevs
439 datasize[2] /= numrevs
440 fulltotal = fullsize[2]
440 fulltotal = fullsize[2]
441 if numfull == 0:
441 if numfull == 0:
442 fullsize[2] = 0
442 fullsize[2] = 0
443 else:
443 else:
444 fullsize[2] /= numfull
444 fullsize[2] /= numfull
445 semitotal = semisize[2]
445 semitotal = semisize[2]
446 snaptotal = {}
446 snaptotal = {}
447 if numsemi > 0:
447 if numsemi > 0:
448 semisize[2] /= numsemi
448 semisize[2] /= numsemi
449 for depth in snapsizedepth:
449 for depth in snapsizedepth:
450 snaptotal[depth] = snapsizedepth[depth][2]
450 snaptotal[depth] = snapsizedepth[depth][2]
451 snapsizedepth[depth][2] /= numsnapdepth[depth]
451 snapsizedepth[depth][2] /= numsnapdepth[depth]
452
452
453 deltatotal = deltasize[2]
453 deltatotal = deltasize[2]
454 if numdeltas > 0:
454 if numdeltas > 0:
455 deltasize[2] /= numdeltas
455 deltasize[2] /= numdeltas
456 totalsize = fulltotal + semitotal + deltatotal
456 totalsize = fulltotal + semitotal + deltatotal
457 avgchainlen = sum(chainlengths) / numrevs
457 avgchainlen = sum(chainlengths) / numrevs
458 maxchainlen = max(chainlengths)
458 maxchainlen = max(chainlengths)
459 maxchainspan = max(chainspans)
459 maxchainspan = max(chainspans)
460 compratio = 1
460 compratio = 1
461 if totalsize:
461 if totalsize:
462 compratio = totalrawsize / totalsize
462 compratio = totalrawsize / totalsize
463
463
464 basedfmtstr = b'%%%dd\n'
464 basedfmtstr = b'%%%dd\n'
465 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
465 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
466
466
467 def dfmtstr(max):
467 def dfmtstr(max):
468 return basedfmtstr % len(str(max))
468 return basedfmtstr % len(str(max))
469
469
470 def pcfmtstr(max, padding=0):
470 def pcfmtstr(max, padding=0):
471 return basepcfmtstr % (len(str(max)), b' ' * padding)
471 return basepcfmtstr % (len(str(max)), b' ' * padding)
472
472
473 def pcfmt(value, total):
473 def pcfmt(value, total):
474 if total:
474 if total:
475 return (value, 100 * float(value) / total)
475 return (value, 100 * float(value) / total)
476 else:
476 else:
477 return value, 100.0
477 return value, 100.0
478
478
479 ui.writenoi18n(b'format : %d\n' % format)
479 ui.writenoi18n(b'format : %d\n' % format)
480 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
480 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
481
481
482 ui.write(b'\n')
482 ui.write(b'\n')
483 fmt = pcfmtstr(totalsize)
483 fmt = pcfmtstr(totalsize)
484 fmt2 = dfmtstr(totalsize)
484 fmt2 = dfmtstr(totalsize)
485 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
485 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
486 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
486 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
487 ui.writenoi18n(
487 ui.writenoi18n(
488 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
488 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
489 )
489 )
490 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
490 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
491 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
491 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
492 ui.writenoi18n(
492 ui.writenoi18n(
493 b' text : '
493 b' text : '
494 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
494 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
495 )
495 )
496 ui.writenoi18n(
496 ui.writenoi18n(
497 b' delta : '
497 b' delta : '
498 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
498 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
499 )
499 )
500 ui.writenoi18n(
500 ui.writenoi18n(
501 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
501 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
502 )
502 )
503 for depth in sorted(numsnapdepth):
503 for depth in sorted(numsnapdepth):
504 base = b' lvl-%-3d : ' % depth
504 base = b' lvl-%-3d : ' % depth
505 count = fmt % pcfmt(numsnapdepth[depth], numrevs)
505 count = fmt % pcfmt(numsnapdepth[depth], numrevs)
506 pieces = [base, count]
506 pieces = [base, count]
507 if numsnapdepth_nad[depth]:
507 if numsnapdepth_nad[depth]:
508 pieces[-1] = count = count[:-1] # drop the final '\n'
508 pieces[-1] = count = count[:-1] # drop the final '\n'
509 more = b' non-ancestor-bases: '
509 more = b' non-ancestor-bases: '
510 anc_count = fmt
510 anc_count = fmt
511 anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth])
511 anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth])
512 pieces.append(more)
512 pieces.append(more)
513 pieces.append(anc_count)
513 pieces.append(anc_count)
514 ui.write(b''.join(pieces))
514 ui.write(b''.join(pieces))
515 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
515 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
516 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
516 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
517 ui.writenoi18n(
517 ui.writenoi18n(
518 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
518 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
519 )
519 )
520 for depth in sorted(numsnapdepth):
520 for depth in sorted(numsnapdepth):
521 ui.write(
521 ui.write(
522 (b' lvl-%-3d : ' % depth)
522 (b' lvl-%-3d : ' % depth)
523 + fmt % pcfmt(snaptotal[depth], totalsize)
523 + fmt % pcfmt(snaptotal[depth], totalsize)
524 )
524 )
525 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
525 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
526
526
527 letters = string.ascii_letters.encode('ascii')
527 letters = string.ascii_letters.encode('ascii')
528
528
529 def fmtchunktype(chunktype):
529 def fmtchunktype(chunktype):
530 if chunktype == b'empty':
530 if chunktype == b'empty':
531 return b' %s : ' % chunktype
531 return b' %s : ' % chunktype
532 elif chunktype in letters:
532 elif chunktype in letters:
533 return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
533 return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
534 else:
534 else:
535 return b' 0x%s : ' % nodemod.hex(chunktype)
535 return b' 0x%s : ' % nodemod.hex(chunktype)
536
536
537 ui.write(b'\n')
537 ui.write(b'\n')
538 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
538 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
539 for chunktype in sorted(chunktypecounts):
539 for chunktype in sorted(chunktypecounts):
540 ui.write(fmtchunktype(chunktype))
540 ui.write(fmtchunktype(chunktype))
541 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
541 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
542 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
542 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
543 for chunktype in sorted(chunktypecounts):
543 for chunktype in sorted(chunktypecounts):
544 ui.write(fmtchunktype(chunktype))
544 ui.write(fmtchunktype(chunktype))
545 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
545 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
546
546
547 ui.write(b'\n')
547 ui.write(b'\n')
548 b_total = b"%d" % full_text_total_size
548 b_total = b"%d" % full_text_total_size
549 p_total = []
549 p_total = []
550 while len(b_total) > 3:
550 while len(b_total) > 3:
551 p_total.append(b_total[-3:])
551 p_total.append(b_total[-3:])
552 b_total = b_total[:-3]
552 b_total = b_total[:-3]
553 p_total.append(b_total)
553 p_total.append(b_total)
554 p_total.reverse()
554 p_total.reverse()
555 b_total = b' '.join(p_total)
555 b_total = b' '.join(p_total)
556
556
557 ui.write(b'\n')
557 ui.write(b'\n')
558 ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total)
558 ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total)
559 ui.write(b'\n')
559 ui.write(b'\n')
560 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
560 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
561 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
561 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
562 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
562 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
563 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
563 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
564 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
564 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
565
565
566 if format > 0:
566 if format > 0:
567 ui.write(b'\n')
567 ui.write(b'\n')
568 ui.writenoi18n(
568 ui.writenoi18n(
569 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
569 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
570 % tuple(datasize)
570 % tuple(datasize)
571 )
571 )
572 ui.writenoi18n(
572 ui.writenoi18n(
573 b'full revision size (min/max/avg) : %d / %d / %d\n'
573 b'full revision size (min/max/avg) : %d / %d / %d\n'
574 % tuple(fullsize)
574 % tuple(fullsize)
575 )
575 )
576 ui.writenoi18n(
576 ui.writenoi18n(
577 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
577 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
578 % tuple(semisize)
578 % tuple(semisize)
579 )
579 )
580 for depth in sorted(snapsizedepth):
580 for depth in sorted(snapsizedepth):
581 if depth == 0:
581 if depth == 0:
582 continue
582 continue
583 ui.writenoi18n(
583 ui.writenoi18n(
584 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
584 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
585 % ((depth,) + tuple(snapsizedepth[depth]))
585 % ((depth,) + tuple(snapsizedepth[depth]))
586 )
586 )
587 ui.writenoi18n(
587 ui.writenoi18n(
588 b'delta size (min/max/avg) : %d / %d / %d\n'
588 b'delta size (min/max/avg) : %d / %d / %d\n'
589 % tuple(deltasize)
589 % tuple(deltasize)
590 )
590 )
591
591
592 if numdeltas > 0:
592 if numdeltas > 0:
593 ui.write(b'\n')
593 ui.write(b'\n')
594 fmt = pcfmtstr(numdeltas)
594 fmt = pcfmtstr(numdeltas)
595 fmt2 = pcfmtstr(numdeltas, 4)
595 fmt2 = pcfmtstr(numdeltas, 4)
596 ui.writenoi18n(
596 ui.writenoi18n(
597 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
597 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
598 )
598 )
599 if numprev > 0:
599 if numprev > 0:
600 ui.writenoi18n(
600 ui.writenoi18n(
601 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
601 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
602 )
602 )
603 ui.writenoi18n(
603 ui.writenoi18n(
604 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
604 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
605 )
605 )
606 ui.writenoi18n(
606 ui.writenoi18n(
607 b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev)
607 b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev)
608 )
608 )
609 ui.writenoi18n(
609 ui.writenoi18n(
610 b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev)
610 b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev)
611 )
611 )
612 if gdelta:
612 if gdelta:
613 ui.writenoi18n(
613 ui.writenoi18n(
614 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
614 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
615 )
615 )
616 ui.writenoi18n(
616 ui.writenoi18n(
617 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
617 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
618 )
618 )
619 ui.writenoi18n(
619 ui.writenoi18n(
620 b'deltas against ancs : '
620 b'deltas against ancs : '
621 + fmt % pcfmt(num_other_ancestors, numdeltas)
621 + fmt % pcfmt(num_other_ancestors, numdeltas)
622 )
622 )
623 ui.writenoi18n(
623 ui.writenoi18n(
624 b'deltas against other : '
624 b'deltas against other : '
625 + fmt % pcfmt(numother_nad, numdeltas)
625 + fmt % pcfmt(numother_nad, numdeltas)
626 )
626 )
627
627
628
628
629 def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev):
629 def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev):
630 """display the search process for a delta"""
630 """display the search process for a delta"""
631 deltacomputer = deltautil.deltacomputer(
631 deltacomputer = deltautil.deltacomputer(
632 revlog,
632 revlog,
633 write_debug=ui.write,
633 write_debug=ui.write,
634 debug_search=not ui.quiet,
634 debug_search=not ui.quiet,
635 )
635 )
636
636
637 node = revlog.node(rev)
637 node = revlog.node(rev)
638 p1r, p2r = revlog.parentrevs(rev)
638 p1r, p2r = revlog.parentrevs(rev)
639 p1 = revlog.node(p1r)
639 p1 = revlog.node(p1r)
640 p2 = revlog.node(p2r)
640 p2 = revlog.node(p2r)
641 full_text = revlog.revision(rev)
641 full_text = revlog.revision(rev)
642 btext = [full_text]
642 btext = [full_text]
643 textlen = len(btext[0])
643 textlen = len(btext[0])
644 cachedelta = None
644 cachedelta = None
645 flags = revlog.flags(rev)
645 flags = revlog.flags(rev)
646
646
647 if base_rev != nodemod.nullrev:
647 if base_rev != nodemod.nullrev:
648 base_text = revlog.revision(base_rev)
648 base_text = revlog.revision(base_rev)
649 delta = mdiff.textdiff(base_text, full_text)
649 delta = mdiff.textdiff(base_text, full_text)
650
650
651 cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY)
651 cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY)
652 btext = [None]
652 btext = [None]
653
653
654 revinfo = revlogutils.revisioninfo(
654 revinfo = revlogutils.revisioninfo(
655 node,
655 node,
656 p1,
656 p1,
657 p2,
657 p2,
658 btext,
658 btext,
659 textlen,
659 textlen,
660 cachedelta,
660 cachedelta,
661 flags,
661 flags,
662 )
662 )
663
663
664 fh = revlog._datafp()
664 fh = revlog._datafp()
665 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
665 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
666
666
667
667
668 def debug_revlog_stats(
668 def debug_revlog_stats(
669 repo, fm, changelog: bool, manifest: bool, filelogs: bool
669 repo, fm, changelog: bool, manifest: bool, filelogs: bool
670 ):
670 ):
671 """Format revlog statistics for debugging purposes
671 """Format revlog statistics for debugging purposes
672
672
673 fm: the output formatter.
673 fm: the output formatter.
674 """
674 """
675 fm.plain(b'rev-count data-size inl type target \n')
675 fm.plain(b'rev-count data-size inl type target \n')
676
676
677 revlog_entries = [e for e in repo.store.walk() if e.is_revlog]
677 revlog_entries = [e for e in repo.store.walk() if e.is_revlog]
678 revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id))
678 revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id))
679
679
680 for entry in revlog_entries:
680 for entry in revlog_entries:
681 if not changelog and entry.is_changelog:
681 if not changelog and entry.is_changelog:
682 continue
682 continue
683 elif not manifest and entry.is_manifestlog:
683 elif not manifest and entry.is_manifestlog:
684 continue
684 continue
685 elif not filelogs and entry.is_filelog:
685 elif not filelogs and entry.is_filelog:
686 continue
686 continue
687 rlog = entry.get_revlog_instance(repo).get_revlog()
687 rlog = entry.get_revlog_instance(repo).get_revlog()
688 fm.startitem()
688 fm.startitem()
689 nb_rev = len(rlog)
689 nb_rev = len(rlog)
690 inline = rlog._inline
690 inline = rlog._inline
691 data_size = rlog._get_data_offset(nb_rev - 1)
691 data_size = rlog._get_data_offset(nb_rev - 1)
692
692
693 target = rlog.target
693 target = rlog.target
694 revlog_type = b'unknown'
694 revlog_type = b'unknown'
695 revlog_target = b''
695 revlog_target = b''
696 if target[0] == constants.KIND_CHANGELOG:
696 if target[0] == constants.KIND_CHANGELOG:
697 revlog_type = b'changelog'
697 revlog_type = b'changelog'
698 elif target[0] == constants.KIND_MANIFESTLOG:
698 elif target[0] == constants.KIND_MANIFESTLOG:
699 revlog_type = b'manifest'
699 revlog_type = b'manifest'
700 revlog_target = target[1]
700 revlog_target = target[1]
701 elif target[0] == constants.KIND_FILELOG:
701 elif target[0] == constants.KIND_FILELOG:
702 revlog_type = b'file'
702 revlog_type = b'file'
703 revlog_target = target[1]
703 revlog_target = target[1]
704
704
705 fm.write(b'revlog.rev-count', b'%9d', nb_rev)
705 fm.write(b'revlog.rev-count', b'%9d', nb_rev)
706 fm.write(b'revlog.data-size', b'%12d', data_size)
706 fm.write(b'revlog.data-size', b'%12d', data_size)
707
707
708 fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
708 fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
709 fm.write(b'revlog.type', b' %-9s', revlog_type)
709 fm.write(b'revlog.type', b' %-9s', revlog_type)
710 fm.write(b'revlog.target', b' %s', revlog_target)
710 fm.write(b'revlog.target', b' %s', revlog_target)
711
711
712 fm.plain(b'\n')
712 fm.plain(b'\n')
713
713
714
714
715 class DeltaChainAuditor:
715 class DeltaChainAuditor:
716 def __init__(self, revlog):
716 def __init__(self, revlog):
717 self._revlog = revlog
717 self._revlog = revlog
718 self._index = self._revlog.index
718 self._index = self._revlog.index
719 self._generaldelta = revlog.delta_config.general_delta
719 self._generaldelta = revlog.delta_config.general_delta
720 self._chain_size_cache = {}
720 self._chain_size_cache = {}
721 # security to avoid crash on corrupted revlogs
721 # security to avoid crash on corrupted revlogs
722 self._total_revs = len(self._index)
722 self._total_revs = len(self._index)
723
723
724 def revinfo(self, rev, size_info=True, dist_info=True, sparse_info=True):
724 def revinfo(self, rev, size_info=True, dist_info=True, sparse_info=True):
725 e = self._index[rev]
725 e = self._index[rev]
726 compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
726 compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
727 uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
727 uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
728
728
729 base = e[constants.ENTRY_DELTA_BASE]
729 base = e[constants.ENTRY_DELTA_BASE]
730 p1 = e[constants.ENTRY_PARENT_1]
730 p1 = e[constants.ENTRY_PARENT_1]
731 p2 = e[constants.ENTRY_PARENT_2]
731 p2 = e[constants.ENTRY_PARENT_2]
732
732
733 # If the parents of a revision has an empty delta, we never try to
733 # If the parents of a revision has an empty delta, we never try to
734 # delta against that parent, but directly against the delta base of
734 # delta against that parent, but directly against the delta base of
735 # that parent (recursively). It avoids adding a useless entry in the
735 # that parent (recursively). It avoids adding a useless entry in the
736 # chain.
736 # chain.
737 #
737 #
738 # However we need to detect that as a special case for delta-type, that
738 # However we need to detect that as a special case for delta-type, that
739 # is not simply "other".
739 # is not simply "other".
740 p1_base = p1
740 p1_base = p1
741 if p1 != nodemod.nullrev and p1 < self._total_revs:
741 if p1 != nodemod.nullrev and p1 < self._total_revs:
742 e1 = self._index[p1]
742 e1 = self._index[p1]
743 while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
743 while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
744 new_base = e1[constants.ENTRY_DELTA_BASE]
744 new_base = e1[constants.ENTRY_DELTA_BASE]
745 if (
745 if (
746 new_base == p1_base
746 new_base == p1_base
747 or new_base == nodemod.nullrev
747 or new_base == nodemod.nullrev
748 or new_base >= self._total_revs
748 or new_base >= self._total_revs
749 ):
749 ):
750 break
750 break
751 p1_base = new_base
751 p1_base = new_base
752 e1 = self._index[p1_base]
752 e1 = self._index[p1_base]
753 p2_base = p2
753 p2_base = p2
754 if p2 != nodemod.nullrev and p2 < self._total_revs:
754 if p2 != nodemod.nullrev and p2 < self._total_revs:
755 e2 = self._index[p2]
755 e2 = self._index[p2]
756 while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
756 while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
757 new_base = e2[constants.ENTRY_DELTA_BASE]
757 new_base = e2[constants.ENTRY_DELTA_BASE]
758 if (
758 if (
759 new_base == p2_base
759 new_base == p2_base
760 or new_base == nodemod.nullrev
760 or new_base == nodemod.nullrev
761 or new_base >= self._total_revs
761 or new_base >= self._total_revs
762 ):
762 ):
763 break
763 break
764 p2_base = new_base
764 p2_base = new_base
765 e2 = self._index[p2_base]
765 e2 = self._index[p2_base]
766
766
767 if self._generaldelta:
767 if self._generaldelta:
768 if base == p1:
768 if base == p1:
769 deltatype = b'p1'
769 deltatype = b'p1'
770 elif base == p2:
770 elif base == p2:
771 deltatype = b'p2'
771 deltatype = b'p2'
772 elif base == rev:
772 elif base == rev:
773 deltatype = b'base'
773 deltatype = b'base'
774 elif base == p1_base:
774 elif base == p1_base:
775 deltatype = b'skip1'
775 deltatype = b'skip1'
776 elif base == p2_base:
776 elif base == p2_base:
777 deltatype = b'skip2'
777 deltatype = b'skip2'
778 elif self._revlog.issnapshot(rev):
778 elif self._revlog.issnapshot(rev):
779 deltatype = b'snap'
779 deltatype = b'snap'
780 elif base == rev - 1:
780 elif base == rev - 1:
781 deltatype = b'prev'
781 deltatype = b'prev'
782 else:
782 else:
783 deltatype = b'other'
783 deltatype = b'other'
784 else:
784 else:
785 if base == rev:
785 if base == rev:
786 deltatype = b'base'
786 deltatype = b'base'
787 else:
787 else:
788 deltatype = b'prev'
788 deltatype = b'prev'
789
789
790 chain = self._revlog._deltachain(rev)[0]
790 chain = self._revlog._deltachain(rev)[0]
791
791
792 data = {
792 data = {
793 'p1': p1,
793 'p1': p1,
794 'p2': p2,
794 'p2': p2,
795 'compressed_size': compsize,
795 'compressed_size': compsize,
796 'uncompressed_size': uncompsize,
796 'uncompressed_size': uncompsize,
797 'deltatype': deltatype,
797 'deltatype': deltatype,
798 'chain': chain,
798 'chain': chain,
799 }
799 }
800
800
801 if size_info or dist_info or sparse_info:
801 if size_info or dist_info or sparse_info:
802 chain_size = 0
802 chain_size = 0
803 for iter_rev in reversed(chain):
803 for iter_rev in reversed(chain):
804 cached = self._chain_size_cache.get(iter_rev)
804 cached = self._chain_size_cache.get(iter_rev)
805 if cached is not None:
805 if cached is not None:
806 chain_size += cached
806 chain_size += cached
807 break
807 break
808 e = self._index[iter_rev]
808 e = self._index[iter_rev]
809 chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
809 chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
810 self._chain_size_cache[rev] = chain_size
810 self._chain_size_cache[rev] = chain_size
811 data['chain_size'] = chain_size
811 data['chain_size'] = chain_size
812
812
813 return data
813 return data
814
814
815
815
816 def debug_delta_chain(
816 def debug_delta_chain(
817 revlog,
817 revlog,
818 revs=None,
818 revs=None,
819 size_info=True,
819 size_info=True,
820 dist_info=True,
820 dist_info=True,
821 sparse_info=True,
821 sparse_info=True,
822 ):
822 ):
823 auditor = DeltaChainAuditor(revlog)
823 auditor = DeltaChainAuditor(revlog)
824 r = revlog
824 r = revlog
825 start = r.start
825 start = r.start
826 length = r.length
826 length = r.length
827 withsparseread = revlog.data_config.with_sparse_read
827 withsparseread = revlog.data_config.with_sparse_read
828
828
829 header = (
829 header = (
830 b' rev'
830 b' rev'
831 b' p1'
831 b' p1'
832 b' p2'
832 b' p2'
833 b' chain#'
833 b' chain#'
834 b' chainlen'
834 b' chainlen'
835 b' prev'
835 b' prev'
836 b' delta'
836 b' delta'
837 )
837 )
838 if size_info:
838 if size_info:
839 header += b' size' b' rawsize' b' chainsize' b' ratio'
839 header += b' size' b' rawsize' b' chainsize' b' ratio'
840 if dist_info:
840 if dist_info:
841 header += b' lindist' b' extradist' b' extraratio'
841 header += b' lindist' b' extradist' b' extraratio'
842 if withsparseread and sparse_info:
842 if withsparseread and sparse_info:
843 header += b' readsize' b' largestblk' b' rddensity' b' srchunks'
843 header += b' readsize' b' largestblk' b' rddensity' b' srchunks'
844 header += b'\n'
844 header += b'\n'
845 yield header
845 yield header
846
846
847 if revs is None:
847 if revs is None:
848 all_revs = iter(r)
848 all_revs = iter(r)
849 else:
849 else:
850 revlog_size = len(r)
850 revlog_size = len(r)
851 all_revs = sorted(rev for rev in revs if rev < revlog_size)
851 all_revs = sorted(rev for rev in revs if rev < revlog_size)
852
852
853 chainbases = {}
853 chainbases = {}
854 for rev in all_revs:
854 for rev in all_revs:
855 info = auditor.revinfo(
855 info = auditor.revinfo(
856 rev,
856 rev,
857 size_info=size_info,
857 size_info=size_info,
858 dist_info=dist_info,
858 dist_info=dist_info,
859 sparse_info=sparse_info,
859 sparse_info=sparse_info,
860 )
860 )
861 comp = info['compressed_size']
861 comp = info['compressed_size']
862 uncomp = info['uncompressed_size']
862 uncomp = info['uncompressed_size']
863 chain = info['chain']
863 chain = info['chain']
864 chainbase = chain[0]
864 chainbase = chain[0]
865 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
865 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
866 if dist_info:
866 if dist_info:
867 basestart = start(chainbase)
867 basestart = start(chainbase)
868 revstart = start(rev)
868 revstart = start(rev)
869 lineardist = revstart + comp - basestart
869 lineardist = revstart + comp - basestart
870 extradist = lineardist - info['chain_size']
870 extradist = lineardist - info['chain_size']
871 try:
871 try:
872 prevrev = chain[-2]
872 prevrev = chain[-2]
873 except IndexError:
873 except IndexError:
874 prevrev = -1
874 prevrev = -1
875
875
876 if size_info:
876 if size_info:
877 chainsize = info['chain_size']
877 chainsize = info['chain_size']
878 if uncomp != 0:
878 if uncomp != 0:
879 chainratio = float(chainsize) / float(uncomp)
879 chainratio = float(chainsize) / float(uncomp)
880 else:
880 else:
881 chainratio = chainsize
881 chainratio = chainsize
882
882
883 if dist_info:
883 if dist_info:
884 if chainsize != 0:
884 if chainsize != 0:
885 extraratio = float(extradist) / float(chainsize)
885 extraratio = float(extradist) / float(chainsize)
886 else:
886 else:
887 extraratio = extradist
887 extraratio = extradist
888
888
889 # label, display-format, data-key, value
889 # label, display-format, data-key, value
890 entry = [
890 entry = [
891 (b'rev', b'%7d', 'rev', rev),
891 (b'rev', b'%7d', 'rev', rev),
892 (b'p1', b'%7d', 'p1', info['p1']),
892 (b'p1', b'%7d', 'p1', info['p1']),
893 (b'p2', b'%7d', 'p2', info['p2']),
893 (b'p2', b'%7d', 'p2', info['p2']),
894 (b'chainid', b'%7d', 'chainid', chainid),
894 (b'chainid', b'%7d', 'chainid', chainid),
895 (b'chainlen', b'%8d', 'chainlen', len(chain)),
895 (b'chainlen', b'%8d', 'chainlen', len(chain)),
896 (b'prevrev', b'%8d', 'prevrev', prevrev),
896 (b'prevrev', b'%8d', 'prevrev', prevrev),
897 (b'deltatype', b'%7s', 'deltatype', info['deltatype']),
897 (b'deltatype', b'%7s', 'deltatype', info['deltatype']),
898 ]
898 ]
899 if size_info:
899 if size_info:
900 entry.extend(
900 entry.extend(
901 [
901 [
902 (b'compsize', b'%10d', 'compsize', comp),
902 (b'compsize', b'%10d', 'compsize', comp),
903 (b'uncompsize', b'%10d', 'uncompsize', uncomp),
903 (b'uncompsize', b'%10d', 'uncompsize', uncomp),
904 (b'chainsize', b'%10d', 'chainsize', chainsize),
904 (b'chainsize', b'%10d', 'chainsize', chainsize),
905 (b'chainratio', b'%9.5f', 'chainratio', chainratio),
905 (b'chainratio', b'%9.5f', 'chainratio', chainratio),
906 ]
906 ]
907 )
907 )
908 if dist_info:
908 if dist_info:
909 entry.extend(
909 entry.extend(
910 [
910 [
911 (b'lindist', b'%9d', 'lindist', lineardist),
911 (b'lindist', b'%9d', 'lindist', lineardist),
912 (b'extradist', b'%9d', 'extradist', extradist),
912 (b'extradist', b'%9d', 'extradist', extradist),
913 (b'extraratio', b'%10.5f', 'extraratio', extraratio),
913 (b'extraratio', b'%10.5f', 'extraratio', extraratio),
914 ]
914 ]
915 )
915 )
916 if withsparseread and sparse_info:
916 if withsparseread and sparse_info:
917 chainsize = info['chain_size']
917 chainsize = info['chain_size']
918 readsize = 0
918 readsize = 0
919 largestblock = 0
919 largestblock = 0
920 srchunks = 0
920 srchunks = 0
921
921
922 for revschunk in deltautil.slicechunk(r, chain):
922 for revschunk in deltautil.slicechunk(r, chain):
923 srchunks += 1
923 srchunks += 1
924 blkend = start(revschunk[-1]) + length(revschunk[-1])
924 blkend = start(revschunk[-1]) + length(revschunk[-1])
925 blksize = blkend - start(revschunk[0])
925 blksize = blkend - start(revschunk[0])
926
926
927 readsize += blksize
927 readsize += blksize
928 if largestblock < blksize:
928 if largestblock < blksize:
929 largestblock = blksize
929 largestblock = blksize
930
930
931 if readsize:
931 if readsize:
932 readdensity = float(chainsize) / float(readsize)
932 readdensity = float(chainsize) / float(readsize)
933 else:
933 else:
934 readdensity = 1
934 readdensity = 1
935 entry.extend(
935 entry.extend(
936 [
936 [
937 (b'readsize', b'%10d', 'readsize', readsize),
937 (b'readsize', b'%10d', 'readsize', readsize),
938 (b'largestblock', b'%10d', 'largestblock', largestblock),
938 (b'largestblock', b'%10d', 'largestblock', largestblock),
939 (b'readdensity', b'%9.5f', 'readdensity', readdensity),
939 (b'readdensity', b'%9.5f', 'readdensity', readdensity),
940 (b'srchunks', b'%8d', 'srchunks', srchunks),
940 (b'srchunks', b'%8d', 'srchunks', srchunks),
941 ]
941 ]
942 )
942 )
943 yield entry
943 yield entry
@@ -1,875 +1,876 b''
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl.delta_config.general_delta = rl.delta_config.general_delta
78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 chunk_cache = rl._loadindex()
130 rl._load_inner(chunk_cache)
130
131
131
132
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
134 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 assert revlog._format_version != REVLOGV1, revlog._format_version
136
137
137 censor_revs = {revlog.rev(censornode)}
138 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
140
140
141
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
143 """rewrite a revlog to censor some of its content
143
144
144 General principle
145 General principle
145
146
146 We create new revlog files (index/data/sidedata) to copy the content of
147 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
148 the existing data without the censored data.
148
149
149 We need to recompute new delta for any revision that used the censored
150 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
151 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
152 large, we store them in a temporary file until they are stored in their
152 final destination.
153 final destination.
153
154
154 All data before the censored data can be blindly copied. The rest needs
155 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
156 to be copied as we go and the associated index entry needs adjustement.
156 """
157 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 assert revlog._format_version != REVLOGV1, revlog._format_version
159
160
160 old_index = revlog.index
161 old_index = revlog.index
161 docket = revlog._docket
162 docket = revlog._docket
162
163
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
165
165 first_excl_rev = min(censor_revs)
166 first_excl_rev = min(censor_revs)
166
167
167 first_excl_entry = revlog.index[first_excl_rev]
168 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
172
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
175 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
176 revlog,
176 old_index,
177 old_index,
177 censor_revs,
178 censor_revs,
178 tmp_storage,
179 tmp_storage,
179 )
180 )
180
181
181 all_files = _setup_new_files(
182 all_files = _setup_new_files(
182 revlog,
183 revlog,
183 index_cutoff,
184 index_cutoff,
184 data_cutoff,
185 data_cutoff,
185 sidedata_cutoff,
186 sidedata_cutoff,
186 )
187 )
187
188
188 # we dont need to open the old index file since its content already
189 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
190 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
191 with all_files() as open_files:
191 (
192 (
192 old_data_file,
193 old_data_file,
193 old_sidedata_file,
194 old_sidedata_file,
194 new_index_file,
195 new_index_file,
195 new_data_file,
196 new_data_file,
196 new_sidedata_file,
197 new_sidedata_file,
197 ) = open_files
198 ) = open_files
198
199
199 # writing the censored revision
200 # writing the censored revision
200
201
201 # Writing all subsequent revisions
202 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
203 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
204 if rev in censor_revs:
204 _rewrite_censor(
205 _rewrite_censor(
205 revlog,
206 revlog,
206 old_index,
207 old_index,
207 open_files,
208 open_files,
208 rev,
209 rev,
209 tombstone,
210 tombstone,
210 )
211 )
211 else:
212 else:
212 _rewrite_simple(
213 _rewrite_simple(
213 revlog,
214 revlog,
214 old_index,
215 old_index,
215 open_files,
216 open_files,
216 rev,
217 rev,
217 rewritten_entries,
218 rewritten_entries,
218 tmp_storage,
219 tmp_storage,
219 )
220 )
220 docket.write(transaction=None, stripping=True)
221 docket.write(transaction=None, stripping=True)
221
222
222
223
223 def _precompute_rewritten_delta(
224 def _precompute_rewritten_delta(
224 revlog,
225 revlog,
225 old_index,
226 old_index,
226 excluded_revs,
227 excluded_revs,
227 tmp_storage,
228 tmp_storage,
228 ):
229 ):
229 """Compute new delta for revisions whose delta is based on revision that
230 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
231 will not survive as is.
231
232
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
234 """
234 dc = deltas.deltacomputer(revlog)
235 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
236 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
237 first_excl_rev = min(excluded_revs)
237 with revlog.reading():
238 with revlog.reading():
238 for rev in range(first_excl_rev, len(old_index)):
239 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
240 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
241 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
242 # consider recomputing a delta.
242 continue
243 continue
243 entry = old_index[rev]
244 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
246 continue
246 # This is a revision that use the censored revision as the base
247 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
248 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
250 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
252 else:
252
253
253 text = revlog.rawdata(rev)
254 text = revlog.rawdata(rev)
254 info = revlogutils.revisioninfo(
255 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
256 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
259 btext=[text],
259 textlen=len(text),
260 textlen=len(text),
260 cachedelta=None,
261 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
263 )
263 d = dc.finddeltainfo(
264 d = dc.finddeltainfo(
264 info, excluded_bases=excluded_revs, target_rev=rev
265 info, excluded_bases=excluded_revs, target_rev=rev
265 )
266 )
266 default_comp = revlog._docket.default_compression_header
267 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
269 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
270 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
271 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
272 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
274 return rewritten_entries
274
275
275
276
276 def _setup_new_files(
277 def _setup_new_files(
277 revlog,
278 revlog,
278 index_cutoff,
279 index_cutoff,
279 data_cutoff,
280 data_cutoff,
280 sidedata_cutoff,
281 sidedata_cutoff,
281 ):
282 ):
282 """
283 """
283
284
284 return a context manager to open all the relevant files:
285 return a context manager to open all the relevant files:
285 - old_data_file,
286 - old_data_file,
286 - old_sidedata_file,
287 - old_sidedata_file,
287 - new_index_file,
288 - new_index_file,
288 - new_data_file,
289 - new_data_file,
289 - new_sidedata_file,
290 - new_sidedata_file,
290
291
291 The old_index_file is not here because it is accessed through the
292 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
293 `old_index` object if the caller function.
293 """
294 """
294 docket = revlog._docket
295 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
299
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
303
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
306 util.copyfile(
306 old_sidedata_filepath,
307 old_sidedata_filepath,
307 new_sidedata_filepath,
308 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
309 nb_bytes=sidedata_cutoff,
309 )
310 )
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313 revlog.opener.register_file(docket.sidedata_filepath())
313
314
314 docket.index_end = index_cutoff
315 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
316 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
317 docket.sidedata_end = sidedata_cutoff
317
318
318 # reload the revlog internal information
319 # reload the revlog internal information
319 revlog.clearcaches()
320 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
321 revlog._loadindex(docket=docket)
321
322
322 @contextlib.contextmanager
323 @contextlib.contextmanager
323 def all_files_opener():
324 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
325 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
326 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
331 with open(
331 new_sidedata_filepath, 'r+b'
332 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
333 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
334 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
335 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
336 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
337 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
338 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
340 yield (
340 old_data_file,
341 old_data_file,
341 old_sidedata_file,
342 old_sidedata_file,
342 new_index_file,
343 new_index_file,
343 new_data_file,
344 new_data_file,
344 new_sidedata_file,
345 new_sidedata_file,
345 )
346 )
346
347
347 return all_files_opener
348 return all_files_opener
348
349
349
350
350 def _rewrite_simple(
351 def _rewrite_simple(
351 revlog,
352 revlog,
352 old_index,
353 old_index,
353 all_files,
354 all_files,
354 rev,
355 rev,
355 rewritten_entries,
356 rewritten_entries,
356 tmp_storage,
357 tmp_storage,
357 ):
358 ):
358 """append a normal revision to the index after the rewritten one(s)"""
359 """append a normal revision to the index after the rewritten one(s)"""
359 (
360 (
360 old_data_file,
361 old_data_file,
361 old_sidedata_file,
362 old_sidedata_file,
362 new_index_file,
363 new_index_file,
363 new_data_file,
364 new_data_file,
364 new_sidedata_file,
365 new_sidedata_file,
365 ) = all_files
366 ) = all_files
366 entry = old_index[rev]
367 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
370
370 if rev not in rewritten_entries:
371 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
372 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
374 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
377 else:
377 (
378 (
378 data_delta_base,
379 data_delta_base,
379 start,
380 start,
380 end,
381 end,
381 d_comp_mode,
382 d_comp_mode,
382 ) = rewritten_entries[rev]
383 ) = rewritten_entries[rev]
383 new_data_size = end - start
384 new_data_size = end - start
384 tmp_storage.seek(start)
385 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
386 new_data = tmp_storage.read(new_data_size)
386
387
387 # It might be faster to group continuous read/write operation,
388 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
389 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
390 # around stellar performance. So I have not written this
390 # optimisation yet.
391 # optimisation yet.
391 new_data_offset = new_data_file.tell()
392 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
393 new_data_file.write(new_data)
393
394
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
396 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
397 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
399 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
401 new_sidedata_file.write(new_sidedata)
401
402
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
405 assert data_delta_base <= rev, (data_delta_base, rev)
405
406
406 new_entry = revlogutils.entry(
407 new_entry = revlogutils.entry(
407 flags=flags,
408 flags=flags,
408 data_offset=new_data_offset,
409 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
410 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
411 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
412 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
413 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
416 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
417 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
418 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
419 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
420 sidedata_compression_mode=sd_com_mode,
420 )
421 )
421 revlog.index.append(new_entry)
422 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
423 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
424 new_index_file.write(entry_bin)
424
425
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
429
429
430
430 def _rewrite_censor(
431 def _rewrite_censor(
431 revlog,
432 revlog,
432 old_index,
433 old_index,
433 all_files,
434 all_files,
434 rev,
435 rev,
435 tombstone,
436 tombstone,
436 ):
437 ):
437 """rewrite and append a censored revision"""
438 """rewrite and append a censored revision"""
438 (
439 (
439 old_data_file,
440 old_data_file,
440 old_sidedata_file,
441 old_sidedata_file,
441 new_index_file,
442 new_index_file,
442 new_data_file,
443 new_data_file,
443 new_sidedata_file,
444 new_sidedata_file,
444 ) = all_files
445 ) = all_files
445 entry = old_index[rev]
446 entry = old_index[rev]
446
447
447 # XXX consider trying the default compression too
448 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
449 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
450 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
451 new_data_file.write(tombstone)
451
452
452 # we are not adding any sidedata as they might leak info about the censored version
453 # we are not adding any sidedata as they might leak info about the censored version
453
454
454 link_rev = entry[ENTRY_LINK_REV]
455 link_rev = entry[ENTRY_LINK_REV]
455
456
456 p1 = entry[ENTRY_PARENT_1]
457 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
458 p2 = entry[ENTRY_PARENT_2]
458
459
459 new_entry = revlogutils.entry(
460 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
461 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
462 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
463 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
465 data_delta_base=rev,
465 link_rev=link_rev,
466 link_rev=link_rev,
466 parent_rev_1=p1,
467 parent_rev_1=p1,
467 parent_rev_2=p2,
468 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
469 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
470 sidedata_offset=0,
470 sidedata_compressed_length=0,
471 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
472 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
474 )
474 revlog.index.append(new_entry)
475 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
476 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
477 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479 revlog._docket.data_end = new_data_file.tell()
479
480
480
481
481 def _get_filename_from_filelog_index(path):
482 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
483 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
485 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
487 msg %= path
487 raise error.Abort(msg)
488 raise error.Abort(msg)
488
489
489 return path_part[1]
490 return path_part[1]
490
491
491
492
492 def _filelog_from_filename(repo, path):
493 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
495
495 from .. import filelog # avoid cycle
496 from .. import filelog # avoid cycle
496
497
497 fl = filelog.filelog(repo.svfs, path)
498 fl = filelog.filelog(repo.svfs, path)
498 return fl
499 return fl
499
500
500
501
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
504 from ..pure import parsers # avoid cycle
504
505
505 if repo._currentlock(repo._lockref) is None:
506 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
507 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
508 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
509 raise error.ProgrammingError(msg)
509
510
510 index_format = parsers.IndexObject.index_format
511 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
512 entry = rl.index[rev]
512 new_entry = list(entry)
513 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
515 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
516 fp.seek(offset)
516 fp.write(packed)
517 fp.write(packed)
517
518
518
519
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
521 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
523 new version to disk, overwriting the old one with a rename.
523 """
524 """
524 from ..pure import parsers # avoid cycle
525 from ..pure import parsers # avoid cycle
525
526
526 ui = repo.ui
527 ui = repo.ui
527 assert len(to_fix) > 0
528 assert len(to_fix) > 0
528 rl = fl._revlog
529 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
530 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
532 raise error.ProgrammingError(msg)
532
533
533 index_file = rl._indexfile
534 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
535 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
537
537 with ui.uninterruptible():
538 with ui.uninterruptible():
538 try:
539 try:
539 util.copyfile(
540 util.copyfile(
540 rl.opener.join(index_file),
541 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
542 rl.opener.join(new_file_path),
542 checkambig=rl.data_config.check_ambig,
543 checkambig=rl.data_config.check_ambig,
543 )
544 )
544
545
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
547 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
548 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
549 for rev in fl.revs():
549 if rev in to_fix:
550 if rev in to_fix:
550 offset = index._calculate_index(rev)
551 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
553 ui.write(repaired_msg % (rev, index_file))
553 else:
554 else:
554 index_format = parsers.IndexObject.index_format
555 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
556 for rev in to_fix:
556 offset = rev * index_format.size
557 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
559 ui.write(repaired_msg % (rev, index_file))
559
560
560 rl.opener.rename(new_file_path, index_file)
561 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
562 rl.clearcaches()
562 rl._loadindex()
563 rl._loadindex()
563 finally:
564 finally:
564 util.tryunlink(new_file_path)
565 util.tryunlink(new_file_path)
565
566
566
567
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
571 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
572 full_text, parent_revs, filerev, metadata_cache
572 )
573 )
573
574
574
575
575 def _is_revision_affected_inner(
576 def _is_revision_affected_inner(
576 full_text,
577 full_text,
577 parents_revs,
578 parents_revs,
578 filerev,
579 filerev,
579 metadata_cache=None,
580 metadata_cache=None,
580 ):
581 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
583 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
585 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
586 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
587 "wrong" order."""
587 try:
588 try:
588 raw_text = full_text()
589 raw_text = full_text()
589 except error.CensoredNodeError:
590 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
591 # We don't care about censored nodes as they never carry metadata
591 return False
592 return False
592
593
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 if metadata_cache is not None:
596 if metadata_cache is not None:
596 metadata_cache[filerev] = has_meta
597 metadata_cache[filerev] = has_meta
597 if has_meta:
598 if has_meta:
598 (p1, p2) = parents_revs()
599 (p1, p2) = parents_revs()
599 if p1 != nullrev and p2 == nullrev:
600 if p1 != nullrev and p2 == nullrev:
600 return True
601 return True
601 return False
602 return False
602
603
603
604
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 rl = fl._revlog
606 rl = fl._revlog
606 is_censored = lambda: rl.iscensored(filerev)
607 is_censored = lambda: rl.iscensored(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
608 delta_base = lambda: rl.deltaparent(filerev)
608 delta = lambda: rl._chunk(filerev)
609 delta = lambda: rl._chunk(filerev)
609 full_text = lambda: rl.rawdata(filerev)
610 full_text = lambda: rl.rawdata(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
611 parent_revs = lambda: rl.parentrevs(filerev)
611 return _is_revision_affected_fast_inner(
612 return _is_revision_affected_fast_inner(
612 is_censored,
613 is_censored,
613 delta_base,
614 delta_base,
614 delta,
615 delta,
615 full_text,
616 full_text,
616 parent_revs,
617 parent_revs,
617 filerev,
618 filerev,
618 metadata_cache,
619 metadata_cache,
619 )
620 )
620
621
621
622
622 def _is_revision_affected_fast_inner(
623 def _is_revision_affected_fast_inner(
623 is_censored,
624 is_censored,
624 delta_base,
625 delta_base,
625 delta,
626 delta,
626 full_text,
627 full_text,
627 parent_revs,
628 parent_revs,
628 filerev,
629 filerev,
629 metadata_cache,
630 metadata_cache,
630 ):
631 ):
631 """Optimization fast-path for `_is_revision_affected`.
632 """Optimization fast-path for `_is_revision_affected`.
632
633
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 revision to check if its base has metadata, saving computation of the full
635 revision to check if its base has metadata, saving computation of the full
635 text, instead looking at the current delta.
636 text, instead looking at the current delta.
636
637
637 This optimization only works if the revisions are looked at in order."""
638 This optimization only works if the revisions are looked at in order."""
638
639
639 if is_censored():
640 if is_censored():
640 # Censored revisions don't contain metadata, so they cannot be affected
641 # Censored revisions don't contain metadata, so they cannot be affected
641 metadata_cache[filerev] = False
642 metadata_cache[filerev] = False
642 return False
643 return False
643
644
644 p1, p2 = parent_revs()
645 p1, p2 = parent_revs()
645 if p1 == nullrev or p2 != nullrev:
646 if p1 == nullrev or p2 != nullrev:
646 return False
647 return False
647
648
648 delta_parent = delta_base()
649 delta_parent = delta_base()
649 parent_has_metadata = metadata_cache.get(delta_parent)
650 parent_has_metadata = metadata_cache.get(delta_parent)
650 if parent_has_metadata is None:
651 if parent_has_metadata is None:
651 return _is_revision_affected_inner(
652 return _is_revision_affected_inner(
652 full_text,
653 full_text,
653 parent_revs,
654 parent_revs,
654 filerev,
655 filerev,
655 metadata_cache,
656 metadata_cache,
656 )
657 )
657
658
658 chunk = delta()
659 chunk = delta()
659 if not len(chunk):
660 if not len(chunk):
660 # No diff for this revision
661 # No diff for this revision
661 return parent_has_metadata
662 return parent_has_metadata
662
663
663 header_length = 12
664 header_length = 12
664 if len(chunk) < header_length:
665 if len(chunk) < header_length:
665 raise error.Abort(_(b"patch cannot be decoded"))
666 raise error.Abort(_(b"patch cannot be decoded"))
666
667
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668
669
669 if start < 2: # len(b'\x01\n') == 2
670 if start < 2: # len(b'\x01\n') == 2
670 # This delta does *something* to the metadata marker (if any).
671 # This delta does *something* to the metadata marker (if any).
671 # Check it the slow way
672 # Check it the slow way
672 is_affected = _is_revision_affected_inner(
673 is_affected = _is_revision_affected_inner(
673 full_text,
674 full_text,
674 parent_revs,
675 parent_revs,
675 filerev,
676 filerev,
676 metadata_cache,
677 metadata_cache,
677 )
678 )
678 return is_affected
679 return is_affected
679
680
680 # The diff did not remove or add the metadata header, it's then in the same
681 # The diff did not remove or add the metadata header, it's then in the same
681 # situation as its parent
682 # situation as its parent
682 metadata_cache[filerev] = parent_has_metadata
683 metadata_cache[filerev] = parent_has_metadata
683 return parent_has_metadata
684 return parent_has_metadata
684
685
685
686
686 def _from_report(ui, repo, context, from_report, dry_run):
687 def _from_report(ui, repo, context, from_report, dry_run):
687 """
688 """
688 Fix the revisions given in the `from_report` file, but still checks if the
689 Fix the revisions given in the `from_report` file, but still checks if the
689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 revisions are indeed affected to prevent an unfortunate cyclic situation
690 where we'd swap well-ordered parents again.
691 where we'd swap well-ordered parents again.
691
692
692 See the doc for `debug_fix_issue6528` for the format documentation.
693 See the doc for `debug_fix_issue6528` for the format documentation.
693 """
694 """
694 ui.write(_(b"loading report file '%s'\n") % from_report)
695 ui.write(_(b"loading report file '%s'\n") % from_report)
695
696
696 with context(), open(from_report, mode='rb') as f:
697 with context(), open(from_report, mode='rb') as f:
697 for line in f.read().split(b'\n'):
698 for line in f.read().split(b'\n'):
698 if not line:
699 if not line:
699 continue
700 continue
700 filenodes, filename = line.split(b' ', 1)
701 filenodes, filename = line.split(b' ', 1)
701 fl = _filelog_from_filename(repo, filename)
702 fl = _filelog_from_filename(repo, filename)
702 to_fix = set(
703 to_fix = set(
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 )
705 )
705 excluded = set()
706 excluded = set()
706
707
707 for filerev in to_fix:
708 for filerev in to_fix:
708 if _is_revision_affected(fl, filerev):
709 if _is_revision_affected(fl, filerev):
709 msg = b"found affected revision %d for filelog '%s'\n"
710 msg = b"found affected revision %d for filelog '%s'\n"
710 ui.warn(msg % (filerev, filename))
711 ui.warn(msg % (filerev, filename))
711 else:
712 else:
712 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 ui.warn(msg)
715 ui.warn(msg)
715 excluded.add(filerev)
716 excluded.add(filerev)
716
717
717 to_fix = to_fix - excluded
718 to_fix = to_fix - excluded
718 if not to_fix:
719 if not to_fix:
719 msg = _(b"no affected revisions were found for '%s'\n")
720 msg = _(b"no affected revisions were found for '%s'\n")
720 ui.write(msg % filename)
721 ui.write(msg % filename)
721 continue
722 continue
722 if not dry_run:
723 if not dry_run:
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724
725
725
726
726 def filter_delta_issue6528(revlog, deltas_iter):
727 def filter_delta_issue6528(revlog, deltas_iter):
727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 """filter incomind deltas to repaire issue 6528 on the fly"""
728 metadata_cache = {}
729 metadata_cache = {}
729
730
730 deltacomputer = deltas.deltacomputer(revlog)
731 deltacomputer = deltas.deltacomputer(revlog)
731
732
732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 for rev, d in enumerate(deltas_iter, len(revlog)):
733 (
734 (
734 node,
735 node,
735 p1_node,
736 p1_node,
736 p2_node,
737 p2_node,
737 linknode,
738 linknode,
738 deltabase,
739 deltabase,
739 delta,
740 delta,
740 flags,
741 flags,
741 sidedata,
742 sidedata,
742 ) = d
743 ) = d
743
744
744 if not revlog.index.has_node(deltabase):
745 if not revlog.index.has_node(deltabase):
745 raise error.LookupError(
746 raise error.LookupError(
746 deltabase, revlog.radix, _(b'unknown parent')
747 deltabase, revlog.radix, _(b'unknown parent')
747 )
748 )
748 base_rev = revlog.rev(deltabase)
749 base_rev = revlog.rev(deltabase)
749 if not revlog.index.has_node(p1_node):
750 if not revlog.index.has_node(p1_node):
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 p1_rev = revlog.rev(p1_node)
752 p1_rev = revlog.rev(p1_node)
752 if not revlog.index.has_node(p2_node):
753 if not revlog.index.has_node(p2_node):
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 p2_rev = revlog.rev(p2_node)
755 p2_rev = revlog.rev(p2_node)
755
756
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: base_rev
759 delta_base = lambda: base_rev
759 parent_revs = lambda: (p1_rev, p2_rev)
760 parent_revs = lambda: (p1_rev, p2_rev)
760
761
761 def full_text():
762 def full_text():
762 # note: being able to reuse the full text computation in the
763 # note: being able to reuse the full text computation in the
763 # underlying addrevision would be useful however this is a bit too
764 # underlying addrevision would be useful however this is a bit too
764 # intrusive the for the "quick" issue6528 we are writing before the
765 # intrusive the for the "quick" issue6528 we are writing before the
765 # 5.8 release
766 # 5.8 release
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767
768
768 revinfo = revlogutils.revisioninfo(
769 revinfo = revlogutils.revisioninfo(
769 node,
770 node,
770 p1_node,
771 p1_node,
771 p2_node,
772 p2_node,
772 [None],
773 [None],
773 textlen,
774 textlen,
774 (base_rev, delta),
775 (base_rev, delta),
775 flags,
776 flags,
776 )
777 )
777 return deltacomputer.buildtext(revinfo)
778 return deltacomputer.buildtext(revinfo)
778
779
779 is_affected = _is_revision_affected_fast_inner(
780 is_affected = _is_revision_affected_fast_inner(
780 is_censored,
781 is_censored,
781 delta_base,
782 delta_base,
782 lambda: delta,
783 lambda: delta,
783 full_text,
784 full_text,
784 parent_revs,
785 parent_revs,
785 rev,
786 rev,
786 metadata_cache,
787 metadata_cache,
787 )
788 )
788 if is_affected:
789 if is_affected:
789 d = (
790 d = (
790 node,
791 node,
791 p2_node,
792 p2_node,
792 p1_node,
793 p1_node,
793 linknode,
794 linknode,
794 deltabase,
795 deltabase,
795 delta,
796 delta,
796 flags,
797 flags,
797 sidedata,
798 sidedata,
798 )
799 )
799 yield d
800 yield d
800
801
801
802
802 def repair_issue6528(
803 def repair_issue6528(
803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 ):
805 ):
805 @contextlib.contextmanager
806 @contextlib.contextmanager
806 def context():
807 def context():
807 if dry_run or to_report: # No need for locking
808 if dry_run or to_report: # No need for locking
808 yield
809 yield
809 else:
810 else:
810 with repo.wlock(), repo.lock():
811 with repo.wlock(), repo.lock():
811 yield
812 yield
812
813
813 if from_report:
814 if from_report:
814 return _from_report(ui, repo, context, from_report, dry_run)
815 return _from_report(ui, repo, context, from_report, dry_run)
815
816
816 report_entries = []
817 report_entries = []
817
818
818 with context():
819 with context():
819 files = list(
820 files = list(
820 entry
821 entry
821 for entry in repo.store.data_entries()
822 for entry in repo.store.data_entries()
822 if entry.is_revlog and entry.is_filelog
823 if entry.is_revlog and entry.is_filelog
823 )
824 )
824
825
825 progress = ui.makeprogress(
826 progress = ui.makeprogress(
826 _(b"looking for affected revisions"),
827 _(b"looking for affected revisions"),
827 unit=_(b"filelogs"),
828 unit=_(b"filelogs"),
828 total=len(files),
829 total=len(files),
829 )
830 )
830 found_nothing = True
831 found_nothing = True
831
832
832 for entry in files:
833 for entry in files:
833 progress.increment()
834 progress.increment()
834 filename = entry.target_id
835 filename = entry.target_id
835 fl = _filelog_from_filename(repo, entry.target_id)
836 fl = _filelog_from_filename(repo, entry.target_id)
836
837
837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 to_fix = set()
839 to_fix = set()
839 metadata_cache = {}
840 metadata_cache = {}
840 for filerev in fl.revs():
841 for filerev in fl.revs():
841 affected = _is_revision_affected_fast(
842 affected = _is_revision_affected_fast(
842 repo, fl, filerev, metadata_cache
843 repo, fl, filerev, metadata_cache
843 )
844 )
844 if paranoid:
845 if paranoid:
845 slow = _is_revision_affected(fl, filerev)
846 slow = _is_revision_affected(fl, filerev)
846 if slow != affected:
847 if slow != affected:
847 msg = _(b"paranoid check failed for '%s' at node %s")
848 msg = _(b"paranoid check failed for '%s' at node %s")
848 node = binascii.hexlify(fl.node(filerev))
849 node = binascii.hexlify(fl.node(filerev))
849 raise error.Abort(msg % (filename, node))
850 raise error.Abort(msg % (filename, node))
850 if affected:
851 if affected:
851 msg = b"found affected revision %d for file '%s'\n"
852 msg = b"found affected revision %d for file '%s'\n"
852 ui.warn(msg % (filerev, filename))
853 ui.warn(msg % (filerev, filename))
853 found_nothing = False
854 found_nothing = False
854 if not dry_run:
855 if not dry_run:
855 if to_report:
856 if to_report:
856 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 else:
858 else:
858 to_fix.add(filerev)
859 to_fix.add(filerev)
859
860
860 if to_fix:
861 if to_fix:
861 to_fix = sorted(to_fix)
862 to_fix = sorted(to_fix)
862 if to_report:
863 if to_report:
863 report_entries.append((filename, to_fix))
864 report_entries.append((filename, to_fix))
864 else:
865 else:
865 _reorder_filelog_parents(repo, fl, to_fix)
866 _reorder_filelog_parents(repo, fl, to_fix)
866
867
867 if found_nothing:
868 if found_nothing:
868 ui.write(_(b"no affected revisions were found\n"))
869 ui.write(_(b"no affected revisions were found\n"))
869
870
870 if to_report and report_entries:
871 if to_report and report_entries:
871 with open(to_report, mode="wb") as f:
872 with open(to_report, mode="wb") as f:
872 for path, to_fix in report_entries:
873 for path, to_fix in report_entries:
873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874
875
875 progress.complete()
876 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now