##// END OF EJS Templates
revlog: replace revlog._io.size with a new revlog.index.entry_size...
marmoute -
r47736:3c920870 default
parent child Browse files
Show More
@@ -1,3925 +1,3928 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance
2 '''helper extension to measure performance
3
3
4 Configurations
4 Configurations
5 ==============
5 ==============
6
6
7 ``perf``
7 ``perf``
8 --------
8 --------
9
9
10 ``all-timing``
10 ``all-timing``
11 When set, additional statistics will be reported for each benchmark: best,
11 When set, additional statistics will be reported for each benchmark: best,
12 worst, median average. If not set only the best timing is reported
12 worst, median average. If not set only the best timing is reported
13 (default: off).
13 (default: off).
14
14
15 ``presleep``
15 ``presleep``
16 number of second to wait before any group of runs (default: 1)
16 number of second to wait before any group of runs (default: 1)
17
17
18 ``pre-run``
18 ``pre-run``
19 number of run to perform before starting measurement.
19 number of run to perform before starting measurement.
20
20
21 ``profile-benchmark``
21 ``profile-benchmark``
22 Enable profiling for the benchmarked section.
22 Enable profiling for the benchmarked section.
23 (The first iteration is benchmarked)
23 (The first iteration is benchmarked)
24
24
25 ``run-limits``
25 ``run-limits``
26 Control the number of runs each benchmark will perform. The option value
26 Control the number of runs each benchmark will perform. The option value
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 conditions are considered in order with the following logic:
28 conditions are considered in order with the following logic:
29
29
30 If benchmark has been running for <time> seconds, and we have performed
30 If benchmark has been running for <time> seconds, and we have performed
31 <numberofrun> iterations, stop the benchmark,
31 <numberofrun> iterations, stop the benchmark,
32
32
33 The default value is: `3.0-100, 10.0-3`
33 The default value is: `3.0-100, 10.0-3`
34
34
35 ``stub``
35 ``stub``
36 When set, benchmarks will only be run once, useful for testing
36 When set, benchmarks will only be run once, useful for testing
37 (default: off)
37 (default: off)
38 '''
38 '''
39
39
40 # "historical portability" policy of perf.py:
40 # "historical portability" policy of perf.py:
41 #
41 #
42 # We have to do:
42 # We have to do:
43 # - make perf.py "loadable" with as wide Mercurial version as possible
43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 # This doesn't mean that perf commands work correctly with that Mercurial.
44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 # - make historical perf command work correctly with as wide Mercurial
46 # - make historical perf command work correctly with as wide Mercurial
47 # version as possible
47 # version as possible
48 #
48 #
49 # We have to do, if possible with reasonable cost:
49 # We have to do, if possible with reasonable cost:
50 # - make recent perf command for historical feature work correctly
50 # - make recent perf command for historical feature work correctly
51 # with early Mercurial
51 # with early Mercurial
52 #
52 #
53 # We don't have to do:
53 # We don't have to do:
54 # - make perf command for recent feature work correctly with early
54 # - make perf command for recent feature work correctly with early
55 # Mercurial
55 # Mercurial
56
56
57 from __future__ import absolute_import
57 from __future__ import absolute_import
58 import contextlib
58 import contextlib
59 import functools
59 import functools
60 import gc
60 import gc
61 import os
61 import os
62 import random
62 import random
63 import shutil
63 import shutil
64 import struct
64 import struct
65 import sys
65 import sys
66 import tempfile
66 import tempfile
67 import threading
67 import threading
68 import time
68 import time
69 from mercurial import (
69 from mercurial import (
70 changegroup,
70 changegroup,
71 cmdutil,
71 cmdutil,
72 commands,
72 commands,
73 copies,
73 copies,
74 error,
74 error,
75 extensions,
75 extensions,
76 hg,
76 hg,
77 mdiff,
77 mdiff,
78 merge,
78 merge,
79 revlog,
79 revlog,
80 util,
80 util,
81 )
81 )
82
82
83 # for "historical portability":
83 # for "historical portability":
84 # try to import modules separately (in dict order), and ignore
84 # try to import modules separately (in dict order), and ignore
85 # failure, because these aren't available with early Mercurial
85 # failure, because these aren't available with early Mercurial
86 try:
86 try:
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 except ImportError:
88 except ImportError:
89 pass
89 pass
90 try:
90 try:
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 except ImportError:
92 except ImportError:
93 pass
93 pass
94 try:
94 try:
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96
96
97 dir(registrar) # forcibly load it
97 dir(registrar) # forcibly load it
98 except ImportError:
98 except ImportError:
99 registrar = None
99 registrar = None
100 try:
100 try:
101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 except ImportError:
102 except ImportError:
103 pass
103 pass
104 try:
104 try:
105 from mercurial.utils import repoviewutil # since 5.0
105 from mercurial.utils import repoviewutil # since 5.0
106 except ImportError:
106 except ImportError:
107 repoviewutil = None
107 repoviewutil = None
108 try:
108 try:
109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 except ImportError:
110 except ImportError:
111 pass
111 pass
112 try:
112 try:
113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 except ImportError:
114 except ImportError:
115 pass
115 pass
116
116
117 try:
117 try:
118 from mercurial import profiling
118 from mercurial import profiling
119 except ImportError:
119 except ImportError:
120 profiling = None
120 profiling = None
121
121
122
122
123 def identity(a):
123 def identity(a):
124 return a
124 return a
125
125
126
126
127 try:
127 try:
128 from mercurial import pycompat
128 from mercurial import pycompat
129
129
130 getargspec = pycompat.getargspec # added to module after 4.5
130 getargspec = pycompat.getargspec # added to module after 4.5
131 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
131 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
132 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
132 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
133 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
133 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
134 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
134 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
135 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
135 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
136 if pycompat.ispy3:
136 if pycompat.ispy3:
137 _maxint = sys.maxsize # per py3 docs for replacing maxint
137 _maxint = sys.maxsize # per py3 docs for replacing maxint
138 else:
138 else:
139 _maxint = sys.maxint
139 _maxint = sys.maxint
140 except (NameError, ImportError, AttributeError):
140 except (NameError, ImportError, AttributeError):
141 import inspect
141 import inspect
142
142
143 getargspec = inspect.getargspec
143 getargspec = inspect.getargspec
144 _byteskwargs = identity
144 _byteskwargs = identity
145 _bytestr = str
145 _bytestr = str
146 fsencode = identity # no py3 support
146 fsencode = identity # no py3 support
147 _maxint = sys.maxint # no py3 support
147 _maxint = sys.maxint # no py3 support
148 _sysstr = lambda x: x # no py3 support
148 _sysstr = lambda x: x # no py3 support
149 _xrange = xrange
149 _xrange = xrange
150
150
151 try:
151 try:
152 # 4.7+
152 # 4.7+
153 queue = pycompat.queue.Queue
153 queue = pycompat.queue.Queue
154 except (NameError, AttributeError, ImportError):
154 except (NameError, AttributeError, ImportError):
155 # <4.7.
155 # <4.7.
156 try:
156 try:
157 queue = pycompat.queue
157 queue = pycompat.queue
158 except (NameError, AttributeError, ImportError):
158 except (NameError, AttributeError, ImportError):
159 import Queue as queue
159 import Queue as queue
160
160
161 try:
161 try:
162 from mercurial import logcmdutil
162 from mercurial import logcmdutil
163
163
164 makelogtemplater = logcmdutil.maketemplater
164 makelogtemplater = logcmdutil.maketemplater
165 except (AttributeError, ImportError):
165 except (AttributeError, ImportError):
166 try:
166 try:
167 makelogtemplater = cmdutil.makelogtemplater
167 makelogtemplater = cmdutil.makelogtemplater
168 except (AttributeError, ImportError):
168 except (AttributeError, ImportError):
169 makelogtemplater = None
169 makelogtemplater = None
170
170
171 # for "historical portability":
171 # for "historical portability":
172 # define util.safehasattr forcibly, because util.safehasattr has been
172 # define util.safehasattr forcibly, because util.safehasattr has been
173 # available since 1.9.3 (or 94b200a11cf7)
173 # available since 1.9.3 (or 94b200a11cf7)
174 _undefined = object()
174 _undefined = object()
175
175
176
176
177 def safehasattr(thing, attr):
177 def safehasattr(thing, attr):
178 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
178 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
179
179
180
180
181 setattr(util, 'safehasattr', safehasattr)
181 setattr(util, 'safehasattr', safehasattr)
182
182
183 # for "historical portability":
183 # for "historical portability":
184 # define util.timer forcibly, because util.timer has been available
184 # define util.timer forcibly, because util.timer has been available
185 # since ae5d60bb70c9
185 # since ae5d60bb70c9
186 if safehasattr(time, 'perf_counter'):
186 if safehasattr(time, 'perf_counter'):
187 util.timer = time.perf_counter
187 util.timer = time.perf_counter
188 elif os.name == b'nt':
188 elif os.name == b'nt':
189 util.timer = time.clock
189 util.timer = time.clock
190 else:
190 else:
191 util.timer = time.time
191 util.timer = time.time
192
192
193 # for "historical portability":
193 # for "historical portability":
194 # use locally defined empty option list, if formatteropts isn't
194 # use locally defined empty option list, if formatteropts isn't
195 # available, because commands.formatteropts has been available since
195 # available, because commands.formatteropts has been available since
196 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
196 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
197 # available since 2.2 (or ae5f92e154d3)
197 # available since 2.2 (or ae5f92e154d3)
198 formatteropts = getattr(
198 formatteropts = getattr(
199 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
199 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
200 )
200 )
201
201
202 # for "historical portability":
202 # for "historical portability":
203 # use locally defined option list, if debugrevlogopts isn't available,
203 # use locally defined option list, if debugrevlogopts isn't available,
204 # because commands.debugrevlogopts has been available since 3.7 (or
204 # because commands.debugrevlogopts has been available since 3.7 (or
205 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
205 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
206 # since 1.9 (or a79fea6b3e77).
206 # since 1.9 (or a79fea6b3e77).
207 revlogopts = getattr(
207 revlogopts = getattr(
208 cmdutil,
208 cmdutil,
209 "debugrevlogopts",
209 "debugrevlogopts",
210 getattr(
210 getattr(
211 commands,
211 commands,
212 "debugrevlogopts",
212 "debugrevlogopts",
213 [
213 [
214 (b'c', b'changelog', False, b'open changelog'),
214 (b'c', b'changelog', False, b'open changelog'),
215 (b'm', b'manifest', False, b'open manifest'),
215 (b'm', b'manifest', False, b'open manifest'),
216 (b'', b'dir', False, b'open directory manifest'),
216 (b'', b'dir', False, b'open directory manifest'),
217 ],
217 ],
218 ),
218 ),
219 )
219 )
220
220
221 cmdtable = {}
221 cmdtable = {}
222
222
223 # for "historical portability":
223 # for "historical portability":
224 # define parsealiases locally, because cmdutil.parsealiases has been
224 # define parsealiases locally, because cmdutil.parsealiases has been
225 # available since 1.5 (or 6252852b4332)
225 # available since 1.5 (or 6252852b4332)
226 def parsealiases(cmd):
226 def parsealiases(cmd):
227 return cmd.split(b"|")
227 return cmd.split(b"|")
228
228
229
229
230 if safehasattr(registrar, 'command'):
230 if safehasattr(registrar, 'command'):
231 command = registrar.command(cmdtable)
231 command = registrar.command(cmdtable)
232 elif safehasattr(cmdutil, 'command'):
232 elif safehasattr(cmdutil, 'command'):
233 command = cmdutil.command(cmdtable)
233 command = cmdutil.command(cmdtable)
234 if 'norepo' not in getargspec(command).args:
234 if 'norepo' not in getargspec(command).args:
235 # for "historical portability":
235 # for "historical portability":
236 # wrap original cmdutil.command, because "norepo" option has
236 # wrap original cmdutil.command, because "norepo" option has
237 # been available since 3.1 (or 75a96326cecb)
237 # been available since 3.1 (or 75a96326cecb)
238 _command = command
238 _command = command
239
239
240 def command(name, options=(), synopsis=None, norepo=False):
240 def command(name, options=(), synopsis=None, norepo=False):
241 if norepo:
241 if norepo:
242 commands.norepo += b' %s' % b' '.join(parsealiases(name))
242 commands.norepo += b' %s' % b' '.join(parsealiases(name))
243 return _command(name, list(options), synopsis)
243 return _command(name, list(options), synopsis)
244
244
245
245
246 else:
246 else:
247 # for "historical portability":
247 # for "historical portability":
248 # define "@command" annotation locally, because cmdutil.command
248 # define "@command" annotation locally, because cmdutil.command
249 # has been available since 1.9 (or 2daa5179e73f)
249 # has been available since 1.9 (or 2daa5179e73f)
250 def command(name, options=(), synopsis=None, norepo=False):
250 def command(name, options=(), synopsis=None, norepo=False):
251 def decorator(func):
251 def decorator(func):
252 if synopsis:
252 if synopsis:
253 cmdtable[name] = func, list(options), synopsis
253 cmdtable[name] = func, list(options), synopsis
254 else:
254 else:
255 cmdtable[name] = func, list(options)
255 cmdtable[name] = func, list(options)
256 if norepo:
256 if norepo:
257 commands.norepo += b' %s' % b' '.join(parsealiases(name))
257 commands.norepo += b' %s' % b' '.join(parsealiases(name))
258 return func
258 return func
259
259
260 return decorator
260 return decorator
261
261
262
262
263 try:
263 try:
264 import mercurial.registrar
264 import mercurial.registrar
265 import mercurial.configitems
265 import mercurial.configitems
266
266
267 configtable = {}
267 configtable = {}
268 configitem = mercurial.registrar.configitem(configtable)
268 configitem = mercurial.registrar.configitem(configtable)
269 configitem(
269 configitem(
270 b'perf',
270 b'perf',
271 b'presleep',
271 b'presleep',
272 default=mercurial.configitems.dynamicdefault,
272 default=mercurial.configitems.dynamicdefault,
273 experimental=True,
273 experimental=True,
274 )
274 )
275 configitem(
275 configitem(
276 b'perf',
276 b'perf',
277 b'stub',
277 b'stub',
278 default=mercurial.configitems.dynamicdefault,
278 default=mercurial.configitems.dynamicdefault,
279 experimental=True,
279 experimental=True,
280 )
280 )
281 configitem(
281 configitem(
282 b'perf',
282 b'perf',
283 b'parentscount',
283 b'parentscount',
284 default=mercurial.configitems.dynamicdefault,
284 default=mercurial.configitems.dynamicdefault,
285 experimental=True,
285 experimental=True,
286 )
286 )
287 configitem(
287 configitem(
288 b'perf',
288 b'perf',
289 b'all-timing',
289 b'all-timing',
290 default=mercurial.configitems.dynamicdefault,
290 default=mercurial.configitems.dynamicdefault,
291 experimental=True,
291 experimental=True,
292 )
292 )
293 configitem(
293 configitem(
294 b'perf',
294 b'perf',
295 b'pre-run',
295 b'pre-run',
296 default=mercurial.configitems.dynamicdefault,
296 default=mercurial.configitems.dynamicdefault,
297 )
297 )
298 configitem(
298 configitem(
299 b'perf',
299 b'perf',
300 b'profile-benchmark',
300 b'profile-benchmark',
301 default=mercurial.configitems.dynamicdefault,
301 default=mercurial.configitems.dynamicdefault,
302 )
302 )
303 configitem(
303 configitem(
304 b'perf',
304 b'perf',
305 b'run-limits',
305 b'run-limits',
306 default=mercurial.configitems.dynamicdefault,
306 default=mercurial.configitems.dynamicdefault,
307 experimental=True,
307 experimental=True,
308 )
308 )
309 except (ImportError, AttributeError):
309 except (ImportError, AttributeError):
310 pass
310 pass
311 except TypeError:
311 except TypeError:
312 # compatibility fix for a11fd395e83f
312 # compatibility fix for a11fd395e83f
313 # hg version: 5.2
313 # hg version: 5.2
314 configitem(
314 configitem(
315 b'perf',
315 b'perf',
316 b'presleep',
316 b'presleep',
317 default=mercurial.configitems.dynamicdefault,
317 default=mercurial.configitems.dynamicdefault,
318 )
318 )
319 configitem(
319 configitem(
320 b'perf',
320 b'perf',
321 b'stub',
321 b'stub',
322 default=mercurial.configitems.dynamicdefault,
322 default=mercurial.configitems.dynamicdefault,
323 )
323 )
324 configitem(
324 configitem(
325 b'perf',
325 b'perf',
326 b'parentscount',
326 b'parentscount',
327 default=mercurial.configitems.dynamicdefault,
327 default=mercurial.configitems.dynamicdefault,
328 )
328 )
329 configitem(
329 configitem(
330 b'perf',
330 b'perf',
331 b'all-timing',
331 b'all-timing',
332 default=mercurial.configitems.dynamicdefault,
332 default=mercurial.configitems.dynamicdefault,
333 )
333 )
334 configitem(
334 configitem(
335 b'perf',
335 b'perf',
336 b'pre-run',
336 b'pre-run',
337 default=mercurial.configitems.dynamicdefault,
337 default=mercurial.configitems.dynamicdefault,
338 )
338 )
339 configitem(
339 configitem(
340 b'perf',
340 b'perf',
341 b'profile-benchmark',
341 b'profile-benchmark',
342 default=mercurial.configitems.dynamicdefault,
342 default=mercurial.configitems.dynamicdefault,
343 )
343 )
344 configitem(
344 configitem(
345 b'perf',
345 b'perf',
346 b'run-limits',
346 b'run-limits',
347 default=mercurial.configitems.dynamicdefault,
347 default=mercurial.configitems.dynamicdefault,
348 )
348 )
349
349
350
350
351 def getlen(ui):
351 def getlen(ui):
352 if ui.configbool(b"perf", b"stub", False):
352 if ui.configbool(b"perf", b"stub", False):
353 return lambda x: 1
353 return lambda x: 1
354 return len
354 return len
355
355
356
356
357 class noop(object):
357 class noop(object):
358 """dummy context manager"""
358 """dummy context manager"""
359
359
360 def __enter__(self):
360 def __enter__(self):
361 pass
361 pass
362
362
363 def __exit__(self, *args):
363 def __exit__(self, *args):
364 pass
364 pass
365
365
366
366
367 NOOPCTX = noop()
367 NOOPCTX = noop()
368
368
369
369
370 def gettimer(ui, opts=None):
370 def gettimer(ui, opts=None):
371 """return a timer function and formatter: (timer, formatter)
371 """return a timer function and formatter: (timer, formatter)
372
372
373 This function exists to gather the creation of formatter in a single
373 This function exists to gather the creation of formatter in a single
374 place instead of duplicating it in all performance commands."""
374 place instead of duplicating it in all performance commands."""
375
375
376 # enforce an idle period before execution to counteract power management
376 # enforce an idle period before execution to counteract power management
377 # experimental config: perf.presleep
377 # experimental config: perf.presleep
378 time.sleep(getint(ui, b"perf", b"presleep", 1))
378 time.sleep(getint(ui, b"perf", b"presleep", 1))
379
379
380 if opts is None:
380 if opts is None:
381 opts = {}
381 opts = {}
382 # redirect all to stderr unless buffer api is in use
382 # redirect all to stderr unless buffer api is in use
383 if not ui._buffers:
383 if not ui._buffers:
384 ui = ui.copy()
384 ui = ui.copy()
385 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
385 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
386 if uifout:
386 if uifout:
387 # for "historical portability":
387 # for "historical portability":
388 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
388 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
389 uifout.set(ui.ferr)
389 uifout.set(ui.ferr)
390
390
391 # get a formatter
391 # get a formatter
392 uiformatter = getattr(ui, 'formatter', None)
392 uiformatter = getattr(ui, 'formatter', None)
393 if uiformatter:
393 if uiformatter:
394 fm = uiformatter(b'perf', opts)
394 fm = uiformatter(b'perf', opts)
395 else:
395 else:
396 # for "historical portability":
396 # for "historical portability":
397 # define formatter locally, because ui.formatter has been
397 # define formatter locally, because ui.formatter has been
398 # available since 2.2 (or ae5f92e154d3)
398 # available since 2.2 (or ae5f92e154d3)
399 from mercurial import node
399 from mercurial import node
400
400
401 class defaultformatter(object):
401 class defaultformatter(object):
402 """Minimized composition of baseformatter and plainformatter"""
402 """Minimized composition of baseformatter and plainformatter"""
403
403
404 def __init__(self, ui, topic, opts):
404 def __init__(self, ui, topic, opts):
405 self._ui = ui
405 self._ui = ui
406 if ui.debugflag:
406 if ui.debugflag:
407 self.hexfunc = node.hex
407 self.hexfunc = node.hex
408 else:
408 else:
409 self.hexfunc = node.short
409 self.hexfunc = node.short
410
410
411 def __nonzero__(self):
411 def __nonzero__(self):
412 return False
412 return False
413
413
414 __bool__ = __nonzero__
414 __bool__ = __nonzero__
415
415
416 def startitem(self):
416 def startitem(self):
417 pass
417 pass
418
418
419 def data(self, **data):
419 def data(self, **data):
420 pass
420 pass
421
421
422 def write(self, fields, deftext, *fielddata, **opts):
422 def write(self, fields, deftext, *fielddata, **opts):
423 self._ui.write(deftext % fielddata, **opts)
423 self._ui.write(deftext % fielddata, **opts)
424
424
425 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
425 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
426 if cond:
426 if cond:
427 self._ui.write(deftext % fielddata, **opts)
427 self._ui.write(deftext % fielddata, **opts)
428
428
429 def plain(self, text, **opts):
429 def plain(self, text, **opts):
430 self._ui.write(text, **opts)
430 self._ui.write(text, **opts)
431
431
432 def end(self):
432 def end(self):
433 pass
433 pass
434
434
435 fm = defaultformatter(ui, b'perf', opts)
435 fm = defaultformatter(ui, b'perf', opts)
436
436
437 # stub function, runs code only once instead of in a loop
437 # stub function, runs code only once instead of in a loop
438 # experimental config: perf.stub
438 # experimental config: perf.stub
439 if ui.configbool(b"perf", b"stub", False):
439 if ui.configbool(b"perf", b"stub", False):
440 return functools.partial(stub_timer, fm), fm
440 return functools.partial(stub_timer, fm), fm
441
441
442 # experimental config: perf.all-timing
442 # experimental config: perf.all-timing
443 displayall = ui.configbool(b"perf", b"all-timing", False)
443 displayall = ui.configbool(b"perf", b"all-timing", False)
444
444
445 # experimental config: perf.run-limits
445 # experimental config: perf.run-limits
446 limitspec = ui.configlist(b"perf", b"run-limits", [])
446 limitspec = ui.configlist(b"perf", b"run-limits", [])
447 limits = []
447 limits = []
448 for item in limitspec:
448 for item in limitspec:
449 parts = item.split(b'-', 1)
449 parts = item.split(b'-', 1)
450 if len(parts) < 2:
450 if len(parts) < 2:
451 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
451 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
452 continue
452 continue
453 try:
453 try:
454 time_limit = float(_sysstr(parts[0]))
454 time_limit = float(_sysstr(parts[0]))
455 except ValueError as e:
455 except ValueError as e:
456 ui.warn(
456 ui.warn(
457 (
457 (
458 b'malformatted run limit entry, %s: %s\n'
458 b'malformatted run limit entry, %s: %s\n'
459 % (_bytestr(e), item)
459 % (_bytestr(e), item)
460 )
460 )
461 )
461 )
462 continue
462 continue
463 try:
463 try:
464 run_limit = int(_sysstr(parts[1]))
464 run_limit = int(_sysstr(parts[1]))
465 except ValueError as e:
465 except ValueError as e:
466 ui.warn(
466 ui.warn(
467 (
467 (
468 b'malformatted run limit entry, %s: %s\n'
468 b'malformatted run limit entry, %s: %s\n'
469 % (_bytestr(e), item)
469 % (_bytestr(e), item)
470 )
470 )
471 )
471 )
472 continue
472 continue
473 limits.append((time_limit, run_limit))
473 limits.append((time_limit, run_limit))
474 if not limits:
474 if not limits:
475 limits = DEFAULTLIMITS
475 limits = DEFAULTLIMITS
476
476
477 profiler = None
477 profiler = None
478 if profiling is not None:
478 if profiling is not None:
479 if ui.configbool(b"perf", b"profile-benchmark", False):
479 if ui.configbool(b"perf", b"profile-benchmark", False):
480 profiler = profiling.profile(ui)
480 profiler = profiling.profile(ui)
481
481
482 prerun = getint(ui, b"perf", b"pre-run", 0)
482 prerun = getint(ui, b"perf", b"pre-run", 0)
483 t = functools.partial(
483 t = functools.partial(
484 _timer,
484 _timer,
485 fm,
485 fm,
486 displayall=displayall,
486 displayall=displayall,
487 limits=limits,
487 limits=limits,
488 prerun=prerun,
488 prerun=prerun,
489 profiler=profiler,
489 profiler=profiler,
490 )
490 )
491 return t, fm
491 return t, fm
492
492
493
493
494 def stub_timer(fm, func, setup=None, title=None):
494 def stub_timer(fm, func, setup=None, title=None):
495 if setup is not None:
495 if setup is not None:
496 setup()
496 setup()
497 func()
497 func()
498
498
499
499
500 @contextlib.contextmanager
500 @contextlib.contextmanager
501 def timeone():
501 def timeone():
502 r = []
502 r = []
503 ostart = os.times()
503 ostart = os.times()
504 cstart = util.timer()
504 cstart = util.timer()
505 yield r
505 yield r
506 cstop = util.timer()
506 cstop = util.timer()
507 ostop = os.times()
507 ostop = os.times()
508 a, b = ostart, ostop
508 a, b = ostart, ostop
509 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
509 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
510
510
511
511
512 # list of stop condition (elapsed time, minimal run count)
512 # list of stop condition (elapsed time, minimal run count)
513 DEFAULTLIMITS = (
513 DEFAULTLIMITS = (
514 (3.0, 100),
514 (3.0, 100),
515 (10.0, 3),
515 (10.0, 3),
516 )
516 )
517
517
518
518
519 def _timer(
519 def _timer(
520 fm,
520 fm,
521 func,
521 func,
522 setup=None,
522 setup=None,
523 title=None,
523 title=None,
524 displayall=False,
524 displayall=False,
525 limits=DEFAULTLIMITS,
525 limits=DEFAULTLIMITS,
526 prerun=0,
526 prerun=0,
527 profiler=None,
527 profiler=None,
528 ):
528 ):
529 gc.collect()
529 gc.collect()
530 results = []
530 results = []
531 begin = util.timer()
531 begin = util.timer()
532 count = 0
532 count = 0
533 if profiler is None:
533 if profiler is None:
534 profiler = NOOPCTX
534 profiler = NOOPCTX
535 for i in range(prerun):
535 for i in range(prerun):
536 if setup is not None:
536 if setup is not None:
537 setup()
537 setup()
538 func()
538 func()
539 keepgoing = True
539 keepgoing = True
540 while keepgoing:
540 while keepgoing:
541 if setup is not None:
541 if setup is not None:
542 setup()
542 setup()
543 with profiler:
543 with profiler:
544 with timeone() as item:
544 with timeone() as item:
545 r = func()
545 r = func()
546 profiler = NOOPCTX
546 profiler = NOOPCTX
547 count += 1
547 count += 1
548 results.append(item[0])
548 results.append(item[0])
549 cstop = util.timer()
549 cstop = util.timer()
550 # Look for a stop condition.
550 # Look for a stop condition.
551 elapsed = cstop - begin
551 elapsed = cstop - begin
552 for t, mincount in limits:
552 for t, mincount in limits:
553 if elapsed >= t and count >= mincount:
553 if elapsed >= t and count >= mincount:
554 keepgoing = False
554 keepgoing = False
555 break
555 break
556
556
557 formatone(fm, results, title=title, result=r, displayall=displayall)
557 formatone(fm, results, title=title, result=r, displayall=displayall)
558
558
559
559
560 def formatone(fm, timings, title=None, result=None, displayall=False):
560 def formatone(fm, timings, title=None, result=None, displayall=False):
561
561
562 count = len(timings)
562 count = len(timings)
563
563
564 fm.startitem()
564 fm.startitem()
565
565
566 if title:
566 if title:
567 fm.write(b'title', b'! %s\n', title)
567 fm.write(b'title', b'! %s\n', title)
568 if result:
568 if result:
569 fm.write(b'result', b'! result: %s\n', result)
569 fm.write(b'result', b'! result: %s\n', result)
570
570
571 def display(role, entry):
571 def display(role, entry):
572 prefix = b''
572 prefix = b''
573 if role != b'best':
573 if role != b'best':
574 prefix = b'%s.' % role
574 prefix = b'%s.' % role
575 fm.plain(b'!')
575 fm.plain(b'!')
576 fm.write(prefix + b'wall', b' wall %f', entry[0])
576 fm.write(prefix + b'wall', b' wall %f', entry[0])
577 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
577 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
578 fm.write(prefix + b'user', b' user %f', entry[1])
578 fm.write(prefix + b'user', b' user %f', entry[1])
579 fm.write(prefix + b'sys', b' sys %f', entry[2])
579 fm.write(prefix + b'sys', b' sys %f', entry[2])
580 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
580 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
581 fm.plain(b'\n')
581 fm.plain(b'\n')
582
582
583 timings.sort()
583 timings.sort()
584 min_val = timings[0]
584 min_val = timings[0]
585 display(b'best', min_val)
585 display(b'best', min_val)
586 if displayall:
586 if displayall:
587 max_val = timings[-1]
587 max_val = timings[-1]
588 display(b'max', max_val)
588 display(b'max', max_val)
589 avg = tuple([sum(x) / count for x in zip(*timings)])
589 avg = tuple([sum(x) / count for x in zip(*timings)])
590 display(b'avg', avg)
590 display(b'avg', avg)
591 median = timings[len(timings) // 2]
591 median = timings[len(timings) // 2]
592 display(b'median', median)
592 display(b'median', median)
593
593
594
594
595 # utilities for historical portability
595 # utilities for historical portability
596
596
597
597
598 def getint(ui, section, name, default):
598 def getint(ui, section, name, default):
599 # for "historical portability":
599 # for "historical portability":
600 # ui.configint has been available since 1.9 (or fa2b596db182)
600 # ui.configint has been available since 1.9 (or fa2b596db182)
601 v = ui.config(section, name, None)
601 v = ui.config(section, name, None)
602 if v is None:
602 if v is None:
603 return default
603 return default
604 try:
604 try:
605 return int(v)
605 return int(v)
606 except ValueError:
606 except ValueError:
607 raise error.ConfigError(
607 raise error.ConfigError(
608 b"%s.%s is not an integer ('%s')" % (section, name, v)
608 b"%s.%s is not an integer ('%s')" % (section, name, v)
609 )
609 )
610
610
611
611
612 def safeattrsetter(obj, name, ignoremissing=False):
612 def safeattrsetter(obj, name, ignoremissing=False):
613 """Ensure that 'obj' has 'name' attribute before subsequent setattr
613 """Ensure that 'obj' has 'name' attribute before subsequent setattr
614
614
615 This function is aborted, if 'obj' doesn't have 'name' attribute
615 This function is aborted, if 'obj' doesn't have 'name' attribute
616 at runtime. This avoids overlooking removal of an attribute, which
616 at runtime. This avoids overlooking removal of an attribute, which
617 breaks assumption of performance measurement, in the future.
617 breaks assumption of performance measurement, in the future.
618
618
619 This function returns the object to (1) assign a new value, and
619 This function returns the object to (1) assign a new value, and
620 (2) restore an original value to the attribute.
620 (2) restore an original value to the attribute.
621
621
622 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
622 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
623 abortion, and this function returns None. This is useful to
623 abortion, and this function returns None. This is useful to
624 examine an attribute, which isn't ensured in all Mercurial
624 examine an attribute, which isn't ensured in all Mercurial
625 versions.
625 versions.
626 """
626 """
627 if not util.safehasattr(obj, name):
627 if not util.safehasattr(obj, name):
628 if ignoremissing:
628 if ignoremissing:
629 return None
629 return None
630 raise error.Abort(
630 raise error.Abort(
631 (
631 (
632 b"missing attribute %s of %s might break assumption"
632 b"missing attribute %s of %s might break assumption"
633 b" of performance measurement"
633 b" of performance measurement"
634 )
634 )
635 % (name, obj)
635 % (name, obj)
636 )
636 )
637
637
638 origvalue = getattr(obj, _sysstr(name))
638 origvalue = getattr(obj, _sysstr(name))
639
639
640 class attrutil(object):
640 class attrutil(object):
641 def set(self, newvalue):
641 def set(self, newvalue):
642 setattr(obj, _sysstr(name), newvalue)
642 setattr(obj, _sysstr(name), newvalue)
643
643
644 def restore(self):
644 def restore(self):
645 setattr(obj, _sysstr(name), origvalue)
645 setattr(obj, _sysstr(name), origvalue)
646
646
647 return attrutil()
647 return attrutil()
648
648
649
649
650 # utilities to examine each internal API changes
650 # utilities to examine each internal API changes
651
651
652
652
653 def getbranchmapsubsettable():
653 def getbranchmapsubsettable():
654 # for "historical portability":
654 # for "historical portability":
655 # subsettable is defined in:
655 # subsettable is defined in:
656 # - branchmap since 2.9 (or 175c6fd8cacc)
656 # - branchmap since 2.9 (or 175c6fd8cacc)
657 # - repoview since 2.5 (or 59a9f18d4587)
657 # - repoview since 2.5 (or 59a9f18d4587)
658 # - repoviewutil since 5.0
658 # - repoviewutil since 5.0
659 for mod in (branchmap, repoview, repoviewutil):
659 for mod in (branchmap, repoview, repoviewutil):
660 subsettable = getattr(mod, 'subsettable', None)
660 subsettable = getattr(mod, 'subsettable', None)
661 if subsettable:
661 if subsettable:
662 return subsettable
662 return subsettable
663
663
664 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
664 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
665 # branchmap and repoview modules exist, but subsettable attribute
665 # branchmap and repoview modules exist, but subsettable attribute
666 # doesn't)
666 # doesn't)
667 raise error.Abort(
667 raise error.Abort(
668 b"perfbranchmap not available with this Mercurial",
668 b"perfbranchmap not available with this Mercurial",
669 hint=b"use 2.5 or later",
669 hint=b"use 2.5 or later",
670 )
670 )
671
671
672
672
673 def getsvfs(repo):
673 def getsvfs(repo):
674 """Return appropriate object to access files under .hg/store"""
674 """Return appropriate object to access files under .hg/store"""
675 # for "historical portability":
675 # for "historical portability":
676 # repo.svfs has been available since 2.3 (or 7034365089bf)
676 # repo.svfs has been available since 2.3 (or 7034365089bf)
677 svfs = getattr(repo, 'svfs', None)
677 svfs = getattr(repo, 'svfs', None)
678 if svfs:
678 if svfs:
679 return svfs
679 return svfs
680 else:
680 else:
681 return getattr(repo, 'sopener')
681 return getattr(repo, 'sopener')
682
682
683
683
684 def getvfs(repo):
684 def getvfs(repo):
685 """Return appropriate object to access files under .hg"""
685 """Return appropriate object to access files under .hg"""
686 # for "historical portability":
686 # for "historical portability":
687 # repo.vfs has been available since 2.3 (or 7034365089bf)
687 # repo.vfs has been available since 2.3 (or 7034365089bf)
688 vfs = getattr(repo, 'vfs', None)
688 vfs = getattr(repo, 'vfs', None)
689 if vfs:
689 if vfs:
690 return vfs
690 return vfs
691 else:
691 else:
692 return getattr(repo, 'opener')
692 return getattr(repo, 'opener')
693
693
694
694
695 def repocleartagscachefunc(repo):
695 def repocleartagscachefunc(repo):
696 """Return the function to clear tags cache according to repo internal API"""
696 """Return the function to clear tags cache according to repo internal API"""
697 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
697 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
698 # in this case, setattr(repo, '_tagscache', None) or so isn't
698 # in this case, setattr(repo, '_tagscache', None) or so isn't
699 # correct way to clear tags cache, because existing code paths
699 # correct way to clear tags cache, because existing code paths
700 # expect _tagscache to be a structured object.
700 # expect _tagscache to be a structured object.
701 def clearcache():
701 def clearcache():
702 # _tagscache has been filteredpropertycache since 2.5 (or
702 # _tagscache has been filteredpropertycache since 2.5 (or
703 # 98c867ac1330), and delattr() can't work in such case
703 # 98c867ac1330), and delattr() can't work in such case
704 if '_tagscache' in vars(repo):
704 if '_tagscache' in vars(repo):
705 del repo.__dict__['_tagscache']
705 del repo.__dict__['_tagscache']
706
706
707 return clearcache
707 return clearcache
708
708
709 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
709 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
710 if repotags: # since 1.4 (or 5614a628d173)
710 if repotags: # since 1.4 (or 5614a628d173)
711 return lambda: repotags.set(None)
711 return lambda: repotags.set(None)
712
712
713 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
713 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
714 if repotagscache: # since 0.6 (or d7df759d0e97)
714 if repotagscache: # since 0.6 (or d7df759d0e97)
715 return lambda: repotagscache.set(None)
715 return lambda: repotagscache.set(None)
716
716
717 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
717 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
718 # this point, but it isn't so problematic, because:
718 # this point, but it isn't so problematic, because:
719 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
719 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
720 # in perftags() causes failure soon
720 # in perftags() causes failure soon
721 # - perf.py itself has been available since 1.1 (or eb240755386d)
721 # - perf.py itself has been available since 1.1 (or eb240755386d)
722 raise error.Abort(b"tags API of this hg command is unknown")
722 raise error.Abort(b"tags API of this hg command is unknown")
723
723
724
724
725 # utilities to clear cache
725 # utilities to clear cache
726
726
727
727
728 def clearfilecache(obj, attrname):
728 def clearfilecache(obj, attrname):
729 unfiltered = getattr(obj, 'unfiltered', None)
729 unfiltered = getattr(obj, 'unfiltered', None)
730 if unfiltered is not None:
730 if unfiltered is not None:
731 obj = obj.unfiltered()
731 obj = obj.unfiltered()
732 if attrname in vars(obj):
732 if attrname in vars(obj):
733 delattr(obj, attrname)
733 delattr(obj, attrname)
734 obj._filecache.pop(attrname, None)
734 obj._filecache.pop(attrname, None)
735
735
736
736
737 def clearchangelog(repo):
737 def clearchangelog(repo):
738 if repo is not repo.unfiltered():
738 if repo is not repo.unfiltered():
739 object.__setattr__(repo, '_clcachekey', None)
739 object.__setattr__(repo, '_clcachekey', None)
740 object.__setattr__(repo, '_clcache', None)
740 object.__setattr__(repo, '_clcache', None)
741 clearfilecache(repo.unfiltered(), 'changelog')
741 clearfilecache(repo.unfiltered(), 'changelog')
742
742
743
743
744 # perf commands
744 # perf commands
745
745
746
746
747 @command(b'perf::walk|perfwalk', formatteropts)
747 @command(b'perf::walk|perfwalk', formatteropts)
748 def perfwalk(ui, repo, *pats, **opts):
748 def perfwalk(ui, repo, *pats, **opts):
749 opts = _byteskwargs(opts)
749 opts = _byteskwargs(opts)
750 timer, fm = gettimer(ui, opts)
750 timer, fm = gettimer(ui, opts)
751 m = scmutil.match(repo[None], pats, {})
751 m = scmutil.match(repo[None], pats, {})
752 timer(
752 timer(
753 lambda: len(
753 lambda: len(
754 list(
754 list(
755 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
755 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
756 )
756 )
757 )
757 )
758 )
758 )
759 fm.end()
759 fm.end()
760
760
761
761
762 @command(b'perf::annotate|perfannotate', formatteropts)
762 @command(b'perf::annotate|perfannotate', formatteropts)
763 def perfannotate(ui, repo, f, **opts):
763 def perfannotate(ui, repo, f, **opts):
764 opts = _byteskwargs(opts)
764 opts = _byteskwargs(opts)
765 timer, fm = gettimer(ui, opts)
765 timer, fm = gettimer(ui, opts)
766 fc = repo[b'.'][f]
766 fc = repo[b'.'][f]
767 timer(lambda: len(fc.annotate(True)))
767 timer(lambda: len(fc.annotate(True)))
768 fm.end()
768 fm.end()
769
769
770
770
771 @command(
771 @command(
772 b'perf::status|perfstatus',
772 b'perf::status|perfstatus',
773 [
773 [
774 (b'u', b'unknown', False, b'ask status to look for unknown files'),
774 (b'u', b'unknown', False, b'ask status to look for unknown files'),
775 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
775 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
776 ]
776 ]
777 + formatteropts,
777 + formatteropts,
778 )
778 )
779 def perfstatus(ui, repo, **opts):
779 def perfstatus(ui, repo, **opts):
780 """benchmark the performance of a single status call
780 """benchmark the performance of a single status call
781
781
782 The repository data are preserved between each call.
782 The repository data are preserved between each call.
783
783
784 By default, only the status of the tracked file are requested. If
784 By default, only the status of the tracked file are requested. If
785 `--unknown` is passed, the "unknown" files are also tracked.
785 `--unknown` is passed, the "unknown" files are also tracked.
786 """
786 """
787 opts = _byteskwargs(opts)
787 opts = _byteskwargs(opts)
788 # m = match.always(repo.root, repo.getcwd())
788 # m = match.always(repo.root, repo.getcwd())
789 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
789 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
790 # False))))
790 # False))))
791 timer, fm = gettimer(ui, opts)
791 timer, fm = gettimer(ui, opts)
792 if opts[b'dirstate']:
792 if opts[b'dirstate']:
793 dirstate = repo.dirstate
793 dirstate = repo.dirstate
794 m = scmutil.matchall(repo)
794 m = scmutil.matchall(repo)
795 unknown = opts[b'unknown']
795 unknown = opts[b'unknown']
796
796
797 def status_dirstate():
797 def status_dirstate():
798 s = dirstate.status(
798 s = dirstate.status(
799 m, subrepos=[], ignored=False, clean=False, unknown=unknown
799 m, subrepos=[], ignored=False, clean=False, unknown=unknown
800 )
800 )
801 sum(map(bool, s))
801 sum(map(bool, s))
802
802
803 timer(status_dirstate)
803 timer(status_dirstate)
804 else:
804 else:
805 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
805 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
806 fm.end()
806 fm.end()
807
807
808
808
809 @command(b'perf::addremove|perfaddremove', formatteropts)
809 @command(b'perf::addremove|perfaddremove', formatteropts)
810 def perfaddremove(ui, repo, **opts):
810 def perfaddremove(ui, repo, **opts):
811 opts = _byteskwargs(opts)
811 opts = _byteskwargs(opts)
812 timer, fm = gettimer(ui, opts)
812 timer, fm = gettimer(ui, opts)
813 try:
813 try:
814 oldquiet = repo.ui.quiet
814 oldquiet = repo.ui.quiet
815 repo.ui.quiet = True
815 repo.ui.quiet = True
816 matcher = scmutil.match(repo[None])
816 matcher = scmutil.match(repo[None])
817 opts[b'dry_run'] = True
817 opts[b'dry_run'] = True
818 if 'uipathfn' in getargspec(scmutil.addremove).args:
818 if 'uipathfn' in getargspec(scmutil.addremove).args:
819 uipathfn = scmutil.getuipathfn(repo)
819 uipathfn = scmutil.getuipathfn(repo)
820 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
820 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
821 else:
821 else:
822 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
822 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
823 finally:
823 finally:
824 repo.ui.quiet = oldquiet
824 repo.ui.quiet = oldquiet
825 fm.end()
825 fm.end()
826
826
827
827
828 def clearcaches(cl):
828 def clearcaches(cl):
829 # behave somewhat consistently across internal API changes
829 # behave somewhat consistently across internal API changes
830 if util.safehasattr(cl, b'clearcaches'):
830 if util.safehasattr(cl, b'clearcaches'):
831 cl.clearcaches()
831 cl.clearcaches()
832 elif util.safehasattr(cl, b'_nodecache'):
832 elif util.safehasattr(cl, b'_nodecache'):
833 # <= hg-5.2
833 # <= hg-5.2
834 from mercurial.node import nullid, nullrev
834 from mercurial.node import nullid, nullrev
835
835
836 cl._nodecache = {nullid: nullrev}
836 cl._nodecache = {nullid: nullrev}
837 cl._nodepos = None
837 cl._nodepos = None
838
838
839
839
840 @command(b'perf::heads|perfheads', formatteropts)
840 @command(b'perf::heads|perfheads', formatteropts)
841 def perfheads(ui, repo, **opts):
841 def perfheads(ui, repo, **opts):
842 """benchmark the computation of a changelog heads"""
842 """benchmark the computation of a changelog heads"""
843 opts = _byteskwargs(opts)
843 opts = _byteskwargs(opts)
844 timer, fm = gettimer(ui, opts)
844 timer, fm = gettimer(ui, opts)
845 cl = repo.changelog
845 cl = repo.changelog
846
846
847 def s():
847 def s():
848 clearcaches(cl)
848 clearcaches(cl)
849
849
850 def d():
850 def d():
851 len(cl.headrevs())
851 len(cl.headrevs())
852
852
853 timer(d, setup=s)
853 timer(d, setup=s)
854 fm.end()
854 fm.end()
855
855
856
856
857 @command(
857 @command(
858 b'perf::tags|perftags',
858 b'perf::tags|perftags',
859 formatteropts
859 formatteropts
860 + [
860 + [
861 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
861 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
862 ],
862 ],
863 )
863 )
864 def perftags(ui, repo, **opts):
864 def perftags(ui, repo, **opts):
865 opts = _byteskwargs(opts)
865 opts = _byteskwargs(opts)
866 timer, fm = gettimer(ui, opts)
866 timer, fm = gettimer(ui, opts)
867 repocleartagscache = repocleartagscachefunc(repo)
867 repocleartagscache = repocleartagscachefunc(repo)
868 clearrevlogs = opts[b'clear_revlogs']
868 clearrevlogs = opts[b'clear_revlogs']
869
869
870 def s():
870 def s():
871 if clearrevlogs:
871 if clearrevlogs:
872 clearchangelog(repo)
872 clearchangelog(repo)
873 clearfilecache(repo.unfiltered(), 'manifest')
873 clearfilecache(repo.unfiltered(), 'manifest')
874 repocleartagscache()
874 repocleartagscache()
875
875
876 def t():
876 def t():
877 return len(repo.tags())
877 return len(repo.tags())
878
878
879 timer(t, setup=s)
879 timer(t, setup=s)
880 fm.end()
880 fm.end()
881
881
882
882
883 @command(b'perf::ancestors|perfancestors', formatteropts)
883 @command(b'perf::ancestors|perfancestors', formatteropts)
884 def perfancestors(ui, repo, **opts):
884 def perfancestors(ui, repo, **opts):
885 opts = _byteskwargs(opts)
885 opts = _byteskwargs(opts)
886 timer, fm = gettimer(ui, opts)
886 timer, fm = gettimer(ui, opts)
887 heads = repo.changelog.headrevs()
887 heads = repo.changelog.headrevs()
888
888
889 def d():
889 def d():
890 for a in repo.changelog.ancestors(heads):
890 for a in repo.changelog.ancestors(heads):
891 pass
891 pass
892
892
893 timer(d)
893 timer(d)
894 fm.end()
894 fm.end()
895
895
896
896
897 @command(b'perf::ancestorset|perfancestorset', formatteropts)
897 @command(b'perf::ancestorset|perfancestorset', formatteropts)
898 def perfancestorset(ui, repo, revset, **opts):
898 def perfancestorset(ui, repo, revset, **opts):
899 opts = _byteskwargs(opts)
899 opts = _byteskwargs(opts)
900 timer, fm = gettimer(ui, opts)
900 timer, fm = gettimer(ui, opts)
901 revs = repo.revs(revset)
901 revs = repo.revs(revset)
902 heads = repo.changelog.headrevs()
902 heads = repo.changelog.headrevs()
903
903
904 def d():
904 def d():
905 s = repo.changelog.ancestors(heads)
905 s = repo.changelog.ancestors(heads)
906 for rev in revs:
906 for rev in revs:
907 rev in s
907 rev in s
908
908
909 timer(d)
909 timer(d)
910 fm.end()
910 fm.end()
911
911
912
912
913 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
913 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
914 def perfdiscovery(ui, repo, path, **opts):
914 def perfdiscovery(ui, repo, path, **opts):
915 """benchmark discovery between local repo and the peer at given path"""
915 """benchmark discovery between local repo and the peer at given path"""
916 repos = [repo, None]
916 repos = [repo, None]
917 timer, fm = gettimer(ui, opts)
917 timer, fm = gettimer(ui, opts)
918
918
919 try:
919 try:
920 from mercurial.utils.urlutil import get_unique_pull_path
920 from mercurial.utils.urlutil import get_unique_pull_path
921
921
922 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
922 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
923 except ImportError:
923 except ImportError:
924 path = ui.expandpath(path)
924 path = ui.expandpath(path)
925
925
926 def s():
926 def s():
927 repos[1] = hg.peer(ui, opts, path)
927 repos[1] = hg.peer(ui, opts, path)
928
928
929 def d():
929 def d():
930 setdiscovery.findcommonheads(ui, *repos)
930 setdiscovery.findcommonheads(ui, *repos)
931
931
932 timer(d, setup=s)
932 timer(d, setup=s)
933 fm.end()
933 fm.end()
934
934
935
935
936 @command(
936 @command(
937 b'perf::bookmarks|perfbookmarks',
937 b'perf::bookmarks|perfbookmarks',
938 formatteropts
938 formatteropts
939 + [
939 + [
940 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
940 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
941 ],
941 ],
942 )
942 )
943 def perfbookmarks(ui, repo, **opts):
943 def perfbookmarks(ui, repo, **opts):
944 """benchmark parsing bookmarks from disk to memory"""
944 """benchmark parsing bookmarks from disk to memory"""
945 opts = _byteskwargs(opts)
945 opts = _byteskwargs(opts)
946 timer, fm = gettimer(ui, opts)
946 timer, fm = gettimer(ui, opts)
947
947
948 clearrevlogs = opts[b'clear_revlogs']
948 clearrevlogs = opts[b'clear_revlogs']
949
949
950 def s():
950 def s():
951 if clearrevlogs:
951 if clearrevlogs:
952 clearchangelog(repo)
952 clearchangelog(repo)
953 clearfilecache(repo, b'_bookmarks')
953 clearfilecache(repo, b'_bookmarks')
954
954
955 def d():
955 def d():
956 repo._bookmarks
956 repo._bookmarks
957
957
958 timer(d, setup=s)
958 timer(d, setup=s)
959 fm.end()
959 fm.end()
960
960
961
961
962 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
962 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
963 def perfbundleread(ui, repo, bundlepath, **opts):
963 def perfbundleread(ui, repo, bundlepath, **opts):
964 """Benchmark reading of bundle files.
964 """Benchmark reading of bundle files.
965
965
966 This command is meant to isolate the I/O part of bundle reading as
966 This command is meant to isolate the I/O part of bundle reading as
967 much as possible.
967 much as possible.
968 """
968 """
969 from mercurial import (
969 from mercurial import (
970 bundle2,
970 bundle2,
971 exchange,
971 exchange,
972 streamclone,
972 streamclone,
973 )
973 )
974
974
975 opts = _byteskwargs(opts)
975 opts = _byteskwargs(opts)
976
976
977 def makebench(fn):
977 def makebench(fn):
978 def run():
978 def run():
979 with open(bundlepath, b'rb') as fh:
979 with open(bundlepath, b'rb') as fh:
980 bundle = exchange.readbundle(ui, fh, bundlepath)
980 bundle = exchange.readbundle(ui, fh, bundlepath)
981 fn(bundle)
981 fn(bundle)
982
982
983 return run
983 return run
984
984
985 def makereadnbytes(size):
985 def makereadnbytes(size):
986 def run():
986 def run():
987 with open(bundlepath, b'rb') as fh:
987 with open(bundlepath, b'rb') as fh:
988 bundle = exchange.readbundle(ui, fh, bundlepath)
988 bundle = exchange.readbundle(ui, fh, bundlepath)
989 while bundle.read(size):
989 while bundle.read(size):
990 pass
990 pass
991
991
992 return run
992 return run
993
993
994 def makestdioread(size):
994 def makestdioread(size):
995 def run():
995 def run():
996 with open(bundlepath, b'rb') as fh:
996 with open(bundlepath, b'rb') as fh:
997 while fh.read(size):
997 while fh.read(size):
998 pass
998 pass
999
999
1000 return run
1000 return run
1001
1001
1002 # bundle1
1002 # bundle1
1003
1003
1004 def deltaiter(bundle):
1004 def deltaiter(bundle):
1005 for delta in bundle.deltaiter():
1005 for delta in bundle.deltaiter():
1006 pass
1006 pass
1007
1007
1008 def iterchunks(bundle):
1008 def iterchunks(bundle):
1009 for chunk in bundle.getchunks():
1009 for chunk in bundle.getchunks():
1010 pass
1010 pass
1011
1011
1012 # bundle2
1012 # bundle2
1013
1013
1014 def forwardchunks(bundle):
1014 def forwardchunks(bundle):
1015 for chunk in bundle._forwardchunks():
1015 for chunk in bundle._forwardchunks():
1016 pass
1016 pass
1017
1017
1018 def iterparts(bundle):
1018 def iterparts(bundle):
1019 for part in bundle.iterparts():
1019 for part in bundle.iterparts():
1020 pass
1020 pass
1021
1021
1022 def iterpartsseekable(bundle):
1022 def iterpartsseekable(bundle):
1023 for part in bundle.iterparts(seekable=True):
1023 for part in bundle.iterparts(seekable=True):
1024 pass
1024 pass
1025
1025
1026 def seek(bundle):
1026 def seek(bundle):
1027 for part in bundle.iterparts(seekable=True):
1027 for part in bundle.iterparts(seekable=True):
1028 part.seek(0, os.SEEK_END)
1028 part.seek(0, os.SEEK_END)
1029
1029
1030 def makepartreadnbytes(size):
1030 def makepartreadnbytes(size):
1031 def run():
1031 def run():
1032 with open(bundlepath, b'rb') as fh:
1032 with open(bundlepath, b'rb') as fh:
1033 bundle = exchange.readbundle(ui, fh, bundlepath)
1033 bundle = exchange.readbundle(ui, fh, bundlepath)
1034 for part in bundle.iterparts():
1034 for part in bundle.iterparts():
1035 while part.read(size):
1035 while part.read(size):
1036 pass
1036 pass
1037
1037
1038 return run
1038 return run
1039
1039
1040 benches = [
1040 benches = [
1041 (makestdioread(8192), b'read(8k)'),
1041 (makestdioread(8192), b'read(8k)'),
1042 (makestdioread(16384), b'read(16k)'),
1042 (makestdioread(16384), b'read(16k)'),
1043 (makestdioread(32768), b'read(32k)'),
1043 (makestdioread(32768), b'read(32k)'),
1044 (makestdioread(131072), b'read(128k)'),
1044 (makestdioread(131072), b'read(128k)'),
1045 ]
1045 ]
1046
1046
1047 with open(bundlepath, b'rb') as fh:
1047 with open(bundlepath, b'rb') as fh:
1048 bundle = exchange.readbundle(ui, fh, bundlepath)
1048 bundle = exchange.readbundle(ui, fh, bundlepath)
1049
1049
1050 if isinstance(bundle, changegroup.cg1unpacker):
1050 if isinstance(bundle, changegroup.cg1unpacker):
1051 benches.extend(
1051 benches.extend(
1052 [
1052 [
1053 (makebench(deltaiter), b'cg1 deltaiter()'),
1053 (makebench(deltaiter), b'cg1 deltaiter()'),
1054 (makebench(iterchunks), b'cg1 getchunks()'),
1054 (makebench(iterchunks), b'cg1 getchunks()'),
1055 (makereadnbytes(8192), b'cg1 read(8k)'),
1055 (makereadnbytes(8192), b'cg1 read(8k)'),
1056 (makereadnbytes(16384), b'cg1 read(16k)'),
1056 (makereadnbytes(16384), b'cg1 read(16k)'),
1057 (makereadnbytes(32768), b'cg1 read(32k)'),
1057 (makereadnbytes(32768), b'cg1 read(32k)'),
1058 (makereadnbytes(131072), b'cg1 read(128k)'),
1058 (makereadnbytes(131072), b'cg1 read(128k)'),
1059 ]
1059 ]
1060 )
1060 )
1061 elif isinstance(bundle, bundle2.unbundle20):
1061 elif isinstance(bundle, bundle2.unbundle20):
1062 benches.extend(
1062 benches.extend(
1063 [
1063 [
1064 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1064 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1065 (makebench(iterparts), b'bundle2 iterparts()'),
1065 (makebench(iterparts), b'bundle2 iterparts()'),
1066 (
1066 (
1067 makebench(iterpartsseekable),
1067 makebench(iterpartsseekable),
1068 b'bundle2 iterparts() seekable',
1068 b'bundle2 iterparts() seekable',
1069 ),
1069 ),
1070 (makebench(seek), b'bundle2 part seek()'),
1070 (makebench(seek), b'bundle2 part seek()'),
1071 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1071 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1072 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1072 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1073 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1073 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1074 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1074 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1075 ]
1075 ]
1076 )
1076 )
1077 elif isinstance(bundle, streamclone.streamcloneapplier):
1077 elif isinstance(bundle, streamclone.streamcloneapplier):
1078 raise error.Abort(b'stream clone bundles not supported')
1078 raise error.Abort(b'stream clone bundles not supported')
1079 else:
1079 else:
1080 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1080 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1081
1081
1082 for fn, title in benches:
1082 for fn, title in benches:
1083 timer, fm = gettimer(ui, opts)
1083 timer, fm = gettimer(ui, opts)
1084 timer(fn, title=title)
1084 timer(fn, title=title)
1085 fm.end()
1085 fm.end()
1086
1086
1087
1087
1088 @command(
1088 @command(
1089 b'perf::changegroupchangelog|perfchangegroupchangelog',
1089 b'perf::changegroupchangelog|perfchangegroupchangelog',
1090 formatteropts
1090 formatteropts
1091 + [
1091 + [
1092 (b'', b'cgversion', b'02', b'changegroup version'),
1092 (b'', b'cgversion', b'02', b'changegroup version'),
1093 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1093 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1094 ],
1094 ],
1095 )
1095 )
1096 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1096 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1097 """Benchmark producing a changelog group for a changegroup.
1097 """Benchmark producing a changelog group for a changegroup.
1098
1098
1099 This measures the time spent processing the changelog during a
1099 This measures the time spent processing the changelog during a
1100 bundle operation. This occurs during `hg bundle` and on a server
1100 bundle operation. This occurs during `hg bundle` and on a server
1101 processing a `getbundle` wire protocol request (handles clones
1101 processing a `getbundle` wire protocol request (handles clones
1102 and pull requests).
1102 and pull requests).
1103
1103
1104 By default, all revisions are added to the changegroup.
1104 By default, all revisions are added to the changegroup.
1105 """
1105 """
1106 opts = _byteskwargs(opts)
1106 opts = _byteskwargs(opts)
1107 cl = repo.changelog
1107 cl = repo.changelog
1108 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1108 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1109 bundler = changegroup.getbundler(cgversion, repo)
1109 bundler = changegroup.getbundler(cgversion, repo)
1110
1110
1111 def d():
1111 def d():
1112 state, chunks = bundler._generatechangelog(cl, nodes)
1112 state, chunks = bundler._generatechangelog(cl, nodes)
1113 for chunk in chunks:
1113 for chunk in chunks:
1114 pass
1114 pass
1115
1115
1116 timer, fm = gettimer(ui, opts)
1116 timer, fm = gettimer(ui, opts)
1117
1117
1118 # Terminal printing can interfere with timing. So disable it.
1118 # Terminal printing can interfere with timing. So disable it.
1119 with ui.configoverride({(b'progress', b'disable'): True}):
1119 with ui.configoverride({(b'progress', b'disable'): True}):
1120 timer(d)
1120 timer(d)
1121
1121
1122 fm.end()
1122 fm.end()
1123
1123
1124
1124
1125 @command(b'perf::dirs|perfdirs', formatteropts)
1125 @command(b'perf::dirs|perfdirs', formatteropts)
1126 def perfdirs(ui, repo, **opts):
1126 def perfdirs(ui, repo, **opts):
1127 opts = _byteskwargs(opts)
1127 opts = _byteskwargs(opts)
1128 timer, fm = gettimer(ui, opts)
1128 timer, fm = gettimer(ui, opts)
1129 dirstate = repo.dirstate
1129 dirstate = repo.dirstate
1130 b'a' in dirstate
1130 b'a' in dirstate
1131
1131
1132 def d():
1132 def d():
1133 dirstate.hasdir(b'a')
1133 dirstate.hasdir(b'a')
1134 del dirstate._map._dirs
1134 del dirstate._map._dirs
1135
1135
1136 timer(d)
1136 timer(d)
1137 fm.end()
1137 fm.end()
1138
1138
1139
1139
1140 @command(
1140 @command(
1141 b'perf::dirstate|perfdirstate',
1141 b'perf::dirstate|perfdirstate',
1142 [
1142 [
1143 (
1143 (
1144 b'',
1144 b'',
1145 b'iteration',
1145 b'iteration',
1146 None,
1146 None,
1147 b'benchmark a full iteration for the dirstate',
1147 b'benchmark a full iteration for the dirstate',
1148 ),
1148 ),
1149 (
1149 (
1150 b'',
1150 b'',
1151 b'contains',
1151 b'contains',
1152 None,
1152 None,
1153 b'benchmark a large amount of `nf in dirstate` calls',
1153 b'benchmark a large amount of `nf in dirstate` calls',
1154 ),
1154 ),
1155 ]
1155 ]
1156 + formatteropts,
1156 + formatteropts,
1157 )
1157 )
1158 def perfdirstate(ui, repo, **opts):
1158 def perfdirstate(ui, repo, **opts):
1159 """benchmap the time of various distate operations
1159 """benchmap the time of various distate operations
1160
1160
1161 By default benchmark the time necessary to load a dirstate from scratch.
1161 By default benchmark the time necessary to load a dirstate from scratch.
1162 The dirstate is loaded to the point were a "contains" request can be
1162 The dirstate is loaded to the point were a "contains" request can be
1163 answered.
1163 answered.
1164 """
1164 """
1165 opts = _byteskwargs(opts)
1165 opts = _byteskwargs(opts)
1166 timer, fm = gettimer(ui, opts)
1166 timer, fm = gettimer(ui, opts)
1167 b"a" in repo.dirstate
1167 b"a" in repo.dirstate
1168
1168
1169 if opts[b'iteration'] and opts[b'contains']:
1169 if opts[b'iteration'] and opts[b'contains']:
1170 msg = b'only specify one of --iteration or --contains'
1170 msg = b'only specify one of --iteration or --contains'
1171 raise error.Abort(msg)
1171 raise error.Abort(msg)
1172
1172
1173 if opts[b'iteration']:
1173 if opts[b'iteration']:
1174 setup = None
1174 setup = None
1175 dirstate = repo.dirstate
1175 dirstate = repo.dirstate
1176
1176
1177 def d():
1177 def d():
1178 for f in dirstate:
1178 for f in dirstate:
1179 pass
1179 pass
1180
1180
1181 elif opts[b'contains']:
1181 elif opts[b'contains']:
1182 setup = None
1182 setup = None
1183 dirstate = repo.dirstate
1183 dirstate = repo.dirstate
1184 allfiles = list(dirstate)
1184 allfiles = list(dirstate)
1185 # also add file path that will be "missing" from the dirstate
1185 # also add file path that will be "missing" from the dirstate
1186 allfiles.extend([f[::-1] for f in allfiles])
1186 allfiles.extend([f[::-1] for f in allfiles])
1187
1187
1188 def d():
1188 def d():
1189 for f in allfiles:
1189 for f in allfiles:
1190 f in dirstate
1190 f in dirstate
1191
1191
1192 else:
1192 else:
1193
1193
1194 def setup():
1194 def setup():
1195 repo.dirstate.invalidate()
1195 repo.dirstate.invalidate()
1196
1196
1197 def d():
1197 def d():
1198 b"a" in repo.dirstate
1198 b"a" in repo.dirstate
1199
1199
1200 timer(d, setup=setup)
1200 timer(d, setup=setup)
1201 fm.end()
1201 fm.end()
1202
1202
1203
1203
1204 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1204 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1205 def perfdirstatedirs(ui, repo, **opts):
1205 def perfdirstatedirs(ui, repo, **opts):
1206 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1206 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1207 opts = _byteskwargs(opts)
1207 opts = _byteskwargs(opts)
1208 timer, fm = gettimer(ui, opts)
1208 timer, fm = gettimer(ui, opts)
1209 repo.dirstate.hasdir(b"a")
1209 repo.dirstate.hasdir(b"a")
1210
1210
1211 def setup():
1211 def setup():
1212 del repo.dirstate._map._dirs
1212 del repo.dirstate._map._dirs
1213
1213
1214 def d():
1214 def d():
1215 repo.dirstate.hasdir(b"a")
1215 repo.dirstate.hasdir(b"a")
1216
1216
1217 timer(d, setup=setup)
1217 timer(d, setup=setup)
1218 fm.end()
1218 fm.end()
1219
1219
1220
1220
1221 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1221 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1222 def perfdirstatefoldmap(ui, repo, **opts):
1222 def perfdirstatefoldmap(ui, repo, **opts):
1223 """benchmap a `dirstate._map.filefoldmap.get()` request
1223 """benchmap a `dirstate._map.filefoldmap.get()` request
1224
1224
1225 The dirstate filefoldmap cache is dropped between every request.
1225 The dirstate filefoldmap cache is dropped between every request.
1226 """
1226 """
1227 opts = _byteskwargs(opts)
1227 opts = _byteskwargs(opts)
1228 timer, fm = gettimer(ui, opts)
1228 timer, fm = gettimer(ui, opts)
1229 dirstate = repo.dirstate
1229 dirstate = repo.dirstate
1230 dirstate._map.filefoldmap.get(b'a')
1230 dirstate._map.filefoldmap.get(b'a')
1231
1231
1232 def setup():
1232 def setup():
1233 del dirstate._map.filefoldmap
1233 del dirstate._map.filefoldmap
1234
1234
1235 def d():
1235 def d():
1236 dirstate._map.filefoldmap.get(b'a')
1236 dirstate._map.filefoldmap.get(b'a')
1237
1237
1238 timer(d, setup=setup)
1238 timer(d, setup=setup)
1239 fm.end()
1239 fm.end()
1240
1240
1241
1241
1242 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1242 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1243 def perfdirfoldmap(ui, repo, **opts):
1243 def perfdirfoldmap(ui, repo, **opts):
1244 """benchmap a `dirstate._map.dirfoldmap.get()` request
1244 """benchmap a `dirstate._map.dirfoldmap.get()` request
1245
1245
1246 The dirstate dirfoldmap cache is dropped between every request.
1246 The dirstate dirfoldmap cache is dropped between every request.
1247 """
1247 """
1248 opts = _byteskwargs(opts)
1248 opts = _byteskwargs(opts)
1249 timer, fm = gettimer(ui, opts)
1249 timer, fm = gettimer(ui, opts)
1250 dirstate = repo.dirstate
1250 dirstate = repo.dirstate
1251 dirstate._map.dirfoldmap.get(b'a')
1251 dirstate._map.dirfoldmap.get(b'a')
1252
1252
1253 def setup():
1253 def setup():
1254 del dirstate._map.dirfoldmap
1254 del dirstate._map.dirfoldmap
1255 del dirstate._map._dirs
1255 del dirstate._map._dirs
1256
1256
1257 def d():
1257 def d():
1258 dirstate._map.dirfoldmap.get(b'a')
1258 dirstate._map.dirfoldmap.get(b'a')
1259
1259
1260 timer(d, setup=setup)
1260 timer(d, setup=setup)
1261 fm.end()
1261 fm.end()
1262
1262
1263
1263
1264 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1264 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1265 def perfdirstatewrite(ui, repo, **opts):
1265 def perfdirstatewrite(ui, repo, **opts):
1266 """benchmap the time it take to write a dirstate on disk"""
1266 """benchmap the time it take to write a dirstate on disk"""
1267 opts = _byteskwargs(opts)
1267 opts = _byteskwargs(opts)
1268 timer, fm = gettimer(ui, opts)
1268 timer, fm = gettimer(ui, opts)
1269 ds = repo.dirstate
1269 ds = repo.dirstate
1270 b"a" in ds
1270 b"a" in ds
1271
1271
1272 def setup():
1272 def setup():
1273 ds._dirty = True
1273 ds._dirty = True
1274
1274
1275 def d():
1275 def d():
1276 ds.write(repo.currenttransaction())
1276 ds.write(repo.currenttransaction())
1277
1277
1278 timer(d, setup=setup)
1278 timer(d, setup=setup)
1279 fm.end()
1279 fm.end()
1280
1280
1281
1281
1282 def _getmergerevs(repo, opts):
1282 def _getmergerevs(repo, opts):
1283 """parse command argument to return rev involved in merge
1283 """parse command argument to return rev involved in merge
1284
1284
1285 input: options dictionnary with `rev`, `from` and `bse`
1285 input: options dictionnary with `rev`, `from` and `bse`
1286 output: (localctx, otherctx, basectx)
1286 output: (localctx, otherctx, basectx)
1287 """
1287 """
1288 if opts[b'from']:
1288 if opts[b'from']:
1289 fromrev = scmutil.revsingle(repo, opts[b'from'])
1289 fromrev = scmutil.revsingle(repo, opts[b'from'])
1290 wctx = repo[fromrev]
1290 wctx = repo[fromrev]
1291 else:
1291 else:
1292 wctx = repo[None]
1292 wctx = repo[None]
1293 # we don't want working dir files to be stat'd in the benchmark, so
1293 # we don't want working dir files to be stat'd in the benchmark, so
1294 # prime that cache
1294 # prime that cache
1295 wctx.dirty()
1295 wctx.dirty()
1296 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1296 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1297 if opts[b'base']:
1297 if opts[b'base']:
1298 fromrev = scmutil.revsingle(repo, opts[b'base'])
1298 fromrev = scmutil.revsingle(repo, opts[b'base'])
1299 ancestor = repo[fromrev]
1299 ancestor = repo[fromrev]
1300 else:
1300 else:
1301 ancestor = wctx.ancestor(rctx)
1301 ancestor = wctx.ancestor(rctx)
1302 return (wctx, rctx, ancestor)
1302 return (wctx, rctx, ancestor)
1303
1303
1304
1304
1305 @command(
1305 @command(
1306 b'perf::mergecalculate|perfmergecalculate',
1306 b'perf::mergecalculate|perfmergecalculate',
1307 [
1307 [
1308 (b'r', b'rev', b'.', b'rev to merge against'),
1308 (b'r', b'rev', b'.', b'rev to merge against'),
1309 (b'', b'from', b'', b'rev to merge from'),
1309 (b'', b'from', b'', b'rev to merge from'),
1310 (b'', b'base', b'', b'the revision to use as base'),
1310 (b'', b'base', b'', b'the revision to use as base'),
1311 ]
1311 ]
1312 + formatteropts,
1312 + formatteropts,
1313 )
1313 )
1314 def perfmergecalculate(ui, repo, **opts):
1314 def perfmergecalculate(ui, repo, **opts):
1315 opts = _byteskwargs(opts)
1315 opts = _byteskwargs(opts)
1316 timer, fm = gettimer(ui, opts)
1316 timer, fm = gettimer(ui, opts)
1317
1317
1318 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1318 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1319
1319
1320 def d():
1320 def d():
1321 # acceptremote is True because we don't want prompts in the middle of
1321 # acceptremote is True because we don't want prompts in the middle of
1322 # our benchmark
1322 # our benchmark
1323 merge.calculateupdates(
1323 merge.calculateupdates(
1324 repo,
1324 repo,
1325 wctx,
1325 wctx,
1326 rctx,
1326 rctx,
1327 [ancestor],
1327 [ancestor],
1328 branchmerge=False,
1328 branchmerge=False,
1329 force=False,
1329 force=False,
1330 acceptremote=True,
1330 acceptremote=True,
1331 followcopies=True,
1331 followcopies=True,
1332 )
1332 )
1333
1333
1334 timer(d)
1334 timer(d)
1335 fm.end()
1335 fm.end()
1336
1336
1337
1337
1338 @command(
1338 @command(
1339 b'perf::mergecopies|perfmergecopies',
1339 b'perf::mergecopies|perfmergecopies',
1340 [
1340 [
1341 (b'r', b'rev', b'.', b'rev to merge against'),
1341 (b'r', b'rev', b'.', b'rev to merge against'),
1342 (b'', b'from', b'', b'rev to merge from'),
1342 (b'', b'from', b'', b'rev to merge from'),
1343 (b'', b'base', b'', b'the revision to use as base'),
1343 (b'', b'base', b'', b'the revision to use as base'),
1344 ]
1344 ]
1345 + formatteropts,
1345 + formatteropts,
1346 )
1346 )
1347 def perfmergecopies(ui, repo, **opts):
1347 def perfmergecopies(ui, repo, **opts):
1348 """measure runtime of `copies.mergecopies`"""
1348 """measure runtime of `copies.mergecopies`"""
1349 opts = _byteskwargs(opts)
1349 opts = _byteskwargs(opts)
1350 timer, fm = gettimer(ui, opts)
1350 timer, fm = gettimer(ui, opts)
1351 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1351 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1352
1352
1353 def d():
1353 def d():
1354 # acceptremote is True because we don't want prompts in the middle of
1354 # acceptremote is True because we don't want prompts in the middle of
1355 # our benchmark
1355 # our benchmark
1356 copies.mergecopies(repo, wctx, rctx, ancestor)
1356 copies.mergecopies(repo, wctx, rctx, ancestor)
1357
1357
1358 timer(d)
1358 timer(d)
1359 fm.end()
1359 fm.end()
1360
1360
1361
1361
1362 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1362 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1363 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1363 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1364 """benchmark the copy tracing logic"""
1364 """benchmark the copy tracing logic"""
1365 opts = _byteskwargs(opts)
1365 opts = _byteskwargs(opts)
1366 timer, fm = gettimer(ui, opts)
1366 timer, fm = gettimer(ui, opts)
1367 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1367 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1368 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1368 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1369
1369
1370 def d():
1370 def d():
1371 copies.pathcopies(ctx1, ctx2)
1371 copies.pathcopies(ctx1, ctx2)
1372
1372
1373 timer(d)
1373 timer(d)
1374 fm.end()
1374 fm.end()
1375
1375
1376
1376
1377 @command(
1377 @command(
1378 b'perf::phases|perfphases',
1378 b'perf::phases|perfphases',
1379 [
1379 [
1380 (b'', b'full', False, b'include file reading time too'),
1380 (b'', b'full', False, b'include file reading time too'),
1381 ],
1381 ],
1382 b"",
1382 b"",
1383 )
1383 )
1384 def perfphases(ui, repo, **opts):
1384 def perfphases(ui, repo, **opts):
1385 """benchmark phasesets computation"""
1385 """benchmark phasesets computation"""
1386 opts = _byteskwargs(opts)
1386 opts = _byteskwargs(opts)
1387 timer, fm = gettimer(ui, opts)
1387 timer, fm = gettimer(ui, opts)
1388 _phases = repo._phasecache
1388 _phases = repo._phasecache
1389 full = opts.get(b'full')
1389 full = opts.get(b'full')
1390
1390
1391 def d():
1391 def d():
1392 phases = _phases
1392 phases = _phases
1393 if full:
1393 if full:
1394 clearfilecache(repo, b'_phasecache')
1394 clearfilecache(repo, b'_phasecache')
1395 phases = repo._phasecache
1395 phases = repo._phasecache
1396 phases.invalidate()
1396 phases.invalidate()
1397 phases.loadphaserevs(repo)
1397 phases.loadphaserevs(repo)
1398
1398
1399 timer(d)
1399 timer(d)
1400 fm.end()
1400 fm.end()
1401
1401
1402
1402
1403 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1403 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1404 def perfphasesremote(ui, repo, dest=None, **opts):
1404 def perfphasesremote(ui, repo, dest=None, **opts):
1405 """benchmark time needed to analyse phases of the remote server"""
1405 """benchmark time needed to analyse phases of the remote server"""
1406 from mercurial.node import bin
1406 from mercurial.node import bin
1407 from mercurial import (
1407 from mercurial import (
1408 exchange,
1408 exchange,
1409 hg,
1409 hg,
1410 phases,
1410 phases,
1411 )
1411 )
1412
1412
1413 opts = _byteskwargs(opts)
1413 opts = _byteskwargs(opts)
1414 timer, fm = gettimer(ui, opts)
1414 timer, fm = gettimer(ui, opts)
1415
1415
1416 path = ui.getpath(dest, default=(b'default-push', b'default'))
1416 path = ui.getpath(dest, default=(b'default-push', b'default'))
1417 if not path:
1417 if not path:
1418 raise error.Abort(
1418 raise error.Abort(
1419 b'default repository not configured!',
1419 b'default repository not configured!',
1420 hint=b"see 'hg help config.paths'",
1420 hint=b"see 'hg help config.paths'",
1421 )
1421 )
1422 dest = path.pushloc or path.loc
1422 dest = path.pushloc or path.loc
1423 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1423 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1424 other = hg.peer(repo, opts, dest)
1424 other = hg.peer(repo, opts, dest)
1425
1425
1426 # easier to perform discovery through the operation
1426 # easier to perform discovery through the operation
1427 op = exchange.pushoperation(repo, other)
1427 op = exchange.pushoperation(repo, other)
1428 exchange._pushdiscoverychangeset(op)
1428 exchange._pushdiscoverychangeset(op)
1429
1429
1430 remotesubset = op.fallbackheads
1430 remotesubset = op.fallbackheads
1431
1431
1432 with other.commandexecutor() as e:
1432 with other.commandexecutor() as e:
1433 remotephases = e.callcommand(
1433 remotephases = e.callcommand(
1434 b'listkeys', {b'namespace': b'phases'}
1434 b'listkeys', {b'namespace': b'phases'}
1435 ).result()
1435 ).result()
1436 del other
1436 del other
1437 publishing = remotephases.get(b'publishing', False)
1437 publishing = remotephases.get(b'publishing', False)
1438 if publishing:
1438 if publishing:
1439 ui.statusnoi18n(b'publishing: yes\n')
1439 ui.statusnoi18n(b'publishing: yes\n')
1440 else:
1440 else:
1441 ui.statusnoi18n(b'publishing: no\n')
1441 ui.statusnoi18n(b'publishing: no\n')
1442
1442
1443 has_node = getattr(repo.changelog.index, 'has_node', None)
1443 has_node = getattr(repo.changelog.index, 'has_node', None)
1444 if has_node is None:
1444 if has_node is None:
1445 has_node = repo.changelog.nodemap.__contains__
1445 has_node = repo.changelog.nodemap.__contains__
1446 nonpublishroots = 0
1446 nonpublishroots = 0
1447 for nhex, phase in remotephases.iteritems():
1447 for nhex, phase in remotephases.iteritems():
1448 if nhex == b'publishing': # ignore data related to publish option
1448 if nhex == b'publishing': # ignore data related to publish option
1449 continue
1449 continue
1450 node = bin(nhex)
1450 node = bin(nhex)
1451 if has_node(node) and int(phase):
1451 if has_node(node) and int(phase):
1452 nonpublishroots += 1
1452 nonpublishroots += 1
1453 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1453 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1454 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1454 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1455
1455
1456 def d():
1456 def d():
1457 phases.remotephasessummary(repo, remotesubset, remotephases)
1457 phases.remotephasessummary(repo, remotesubset, remotephases)
1458
1458
1459 timer(d)
1459 timer(d)
1460 fm.end()
1460 fm.end()
1461
1461
1462
1462
1463 @command(
1463 @command(
1464 b'perf::manifest|perfmanifest',
1464 b'perf::manifest|perfmanifest',
1465 [
1465 [
1466 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1466 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1467 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1467 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1468 ]
1468 ]
1469 + formatteropts,
1469 + formatteropts,
1470 b'REV|NODE',
1470 b'REV|NODE',
1471 )
1471 )
1472 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1472 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1473 """benchmark the time to read a manifest from disk and return a usable
1473 """benchmark the time to read a manifest from disk and return a usable
1474 dict-like object
1474 dict-like object
1475
1475
1476 Manifest caches are cleared before retrieval."""
1476 Manifest caches are cleared before retrieval."""
1477 opts = _byteskwargs(opts)
1477 opts = _byteskwargs(opts)
1478 timer, fm = gettimer(ui, opts)
1478 timer, fm = gettimer(ui, opts)
1479 if not manifest_rev:
1479 if not manifest_rev:
1480 ctx = scmutil.revsingle(repo, rev, rev)
1480 ctx = scmutil.revsingle(repo, rev, rev)
1481 t = ctx.manifestnode()
1481 t = ctx.manifestnode()
1482 else:
1482 else:
1483 from mercurial.node import bin
1483 from mercurial.node import bin
1484
1484
1485 if len(rev) == 40:
1485 if len(rev) == 40:
1486 t = bin(rev)
1486 t = bin(rev)
1487 else:
1487 else:
1488 try:
1488 try:
1489 rev = int(rev)
1489 rev = int(rev)
1490
1490
1491 if util.safehasattr(repo.manifestlog, b'getstorage'):
1491 if util.safehasattr(repo.manifestlog, b'getstorage'):
1492 t = repo.manifestlog.getstorage(b'').node(rev)
1492 t = repo.manifestlog.getstorage(b'').node(rev)
1493 else:
1493 else:
1494 t = repo.manifestlog._revlog.lookup(rev)
1494 t = repo.manifestlog._revlog.lookup(rev)
1495 except ValueError:
1495 except ValueError:
1496 raise error.Abort(
1496 raise error.Abort(
1497 b'manifest revision must be integer or full node'
1497 b'manifest revision must be integer or full node'
1498 )
1498 )
1499
1499
1500 def d():
1500 def d():
1501 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1501 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1502 repo.manifestlog[t].read()
1502 repo.manifestlog[t].read()
1503
1503
1504 timer(d)
1504 timer(d)
1505 fm.end()
1505 fm.end()
1506
1506
1507
1507
1508 @command(b'perf::changeset|perfchangeset', formatteropts)
1508 @command(b'perf::changeset|perfchangeset', formatteropts)
1509 def perfchangeset(ui, repo, rev, **opts):
1509 def perfchangeset(ui, repo, rev, **opts):
1510 opts = _byteskwargs(opts)
1510 opts = _byteskwargs(opts)
1511 timer, fm = gettimer(ui, opts)
1511 timer, fm = gettimer(ui, opts)
1512 n = scmutil.revsingle(repo, rev).node()
1512 n = scmutil.revsingle(repo, rev).node()
1513
1513
1514 def d():
1514 def d():
1515 repo.changelog.read(n)
1515 repo.changelog.read(n)
1516 # repo.changelog._cache = None
1516 # repo.changelog._cache = None
1517
1517
1518 timer(d)
1518 timer(d)
1519 fm.end()
1519 fm.end()
1520
1520
1521
1521
1522 @command(b'perf::ignore|perfignore', formatteropts)
1522 @command(b'perf::ignore|perfignore', formatteropts)
1523 def perfignore(ui, repo, **opts):
1523 def perfignore(ui, repo, **opts):
1524 """benchmark operation related to computing ignore"""
1524 """benchmark operation related to computing ignore"""
1525 opts = _byteskwargs(opts)
1525 opts = _byteskwargs(opts)
1526 timer, fm = gettimer(ui, opts)
1526 timer, fm = gettimer(ui, opts)
1527 dirstate = repo.dirstate
1527 dirstate = repo.dirstate
1528
1528
1529 def setupone():
1529 def setupone():
1530 dirstate.invalidate()
1530 dirstate.invalidate()
1531 clearfilecache(dirstate, b'_ignore')
1531 clearfilecache(dirstate, b'_ignore')
1532
1532
1533 def runone():
1533 def runone():
1534 dirstate._ignore
1534 dirstate._ignore
1535
1535
1536 timer(runone, setup=setupone, title=b"load")
1536 timer(runone, setup=setupone, title=b"load")
1537 fm.end()
1537 fm.end()
1538
1538
1539
1539
1540 @command(
1540 @command(
1541 b'perf::index|perfindex',
1541 b'perf::index|perfindex',
1542 [
1542 [
1543 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1543 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1544 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1544 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1545 ]
1545 ]
1546 + formatteropts,
1546 + formatteropts,
1547 )
1547 )
1548 def perfindex(ui, repo, **opts):
1548 def perfindex(ui, repo, **opts):
1549 """benchmark index creation time followed by a lookup
1549 """benchmark index creation time followed by a lookup
1550
1550
1551 The default is to look `tip` up. Depending on the index implementation,
1551 The default is to look `tip` up. Depending on the index implementation,
1552 the revision looked up can matters. For example, an implementation
1552 the revision looked up can matters. For example, an implementation
1553 scanning the index will have a faster lookup time for `--rev tip` than for
1553 scanning the index will have a faster lookup time for `--rev tip` than for
1554 `--rev 0`. The number of looked up revisions and their order can also
1554 `--rev 0`. The number of looked up revisions and their order can also
1555 matters.
1555 matters.
1556
1556
1557 Example of useful set to test:
1557 Example of useful set to test:
1558
1558
1559 * tip
1559 * tip
1560 * 0
1560 * 0
1561 * -10:
1561 * -10:
1562 * :10
1562 * :10
1563 * -10: + :10
1563 * -10: + :10
1564 * :10: + -10:
1564 * :10: + -10:
1565 * -10000:
1565 * -10000:
1566 * -10000: + 0
1566 * -10000: + 0
1567
1567
1568 It is not currently possible to check for lookup of a missing node. For
1568 It is not currently possible to check for lookup of a missing node. For
1569 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1569 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1570 import mercurial.revlog
1570 import mercurial.revlog
1571
1571
1572 opts = _byteskwargs(opts)
1572 opts = _byteskwargs(opts)
1573 timer, fm = gettimer(ui, opts)
1573 timer, fm = gettimer(ui, opts)
1574 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1574 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1575 if opts[b'no_lookup']:
1575 if opts[b'no_lookup']:
1576 if opts['rev']:
1576 if opts['rev']:
1577 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1577 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1578 nodes = []
1578 nodes = []
1579 elif not opts[b'rev']:
1579 elif not opts[b'rev']:
1580 nodes = [repo[b"tip"].node()]
1580 nodes = [repo[b"tip"].node()]
1581 else:
1581 else:
1582 revs = scmutil.revrange(repo, opts[b'rev'])
1582 revs = scmutil.revrange(repo, opts[b'rev'])
1583 cl = repo.changelog
1583 cl = repo.changelog
1584 nodes = [cl.node(r) for r in revs]
1584 nodes = [cl.node(r) for r in revs]
1585
1585
1586 unfi = repo.unfiltered()
1586 unfi = repo.unfiltered()
1587 # find the filecache func directly
1587 # find the filecache func directly
1588 # This avoid polluting the benchmark with the filecache logic
1588 # This avoid polluting the benchmark with the filecache logic
1589 makecl = unfi.__class__.changelog.func
1589 makecl = unfi.__class__.changelog.func
1590
1590
1591 def setup():
1591 def setup():
1592 # probably not necessary, but for good measure
1592 # probably not necessary, but for good measure
1593 clearchangelog(unfi)
1593 clearchangelog(unfi)
1594
1594
1595 def d():
1595 def d():
1596 cl = makecl(unfi)
1596 cl = makecl(unfi)
1597 for n in nodes:
1597 for n in nodes:
1598 cl.rev(n)
1598 cl.rev(n)
1599
1599
1600 timer(d, setup=setup)
1600 timer(d, setup=setup)
1601 fm.end()
1601 fm.end()
1602
1602
1603
1603
1604 @command(
1604 @command(
1605 b'perf::nodemap|perfnodemap',
1605 b'perf::nodemap|perfnodemap',
1606 [
1606 [
1607 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1607 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1608 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1608 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1609 ]
1609 ]
1610 + formatteropts,
1610 + formatteropts,
1611 )
1611 )
1612 def perfnodemap(ui, repo, **opts):
1612 def perfnodemap(ui, repo, **opts):
1613 """benchmark the time necessary to look up revision from a cold nodemap
1613 """benchmark the time necessary to look up revision from a cold nodemap
1614
1614
1615 Depending on the implementation, the amount and order of revision we look
1615 Depending on the implementation, the amount and order of revision we look
1616 up can varies. Example of useful set to test:
1616 up can varies. Example of useful set to test:
1617 * tip
1617 * tip
1618 * 0
1618 * 0
1619 * -10:
1619 * -10:
1620 * :10
1620 * :10
1621 * -10: + :10
1621 * -10: + :10
1622 * :10: + -10:
1622 * :10: + -10:
1623 * -10000:
1623 * -10000:
1624 * -10000: + 0
1624 * -10000: + 0
1625
1625
1626 The command currently focus on valid binary lookup. Benchmarking for
1626 The command currently focus on valid binary lookup. Benchmarking for
1627 hexlookup, prefix lookup and missing lookup would also be valuable.
1627 hexlookup, prefix lookup and missing lookup would also be valuable.
1628 """
1628 """
1629 import mercurial.revlog
1629 import mercurial.revlog
1630
1630
1631 opts = _byteskwargs(opts)
1631 opts = _byteskwargs(opts)
1632 timer, fm = gettimer(ui, opts)
1632 timer, fm = gettimer(ui, opts)
1633 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1633 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1634
1634
1635 unfi = repo.unfiltered()
1635 unfi = repo.unfiltered()
1636 clearcaches = opts[b'clear_caches']
1636 clearcaches = opts[b'clear_caches']
1637 # find the filecache func directly
1637 # find the filecache func directly
1638 # This avoid polluting the benchmark with the filecache logic
1638 # This avoid polluting the benchmark with the filecache logic
1639 makecl = unfi.__class__.changelog.func
1639 makecl = unfi.__class__.changelog.func
1640 if not opts[b'rev']:
1640 if not opts[b'rev']:
1641 raise error.Abort(b'use --rev to specify revisions to look up')
1641 raise error.Abort(b'use --rev to specify revisions to look up')
1642 revs = scmutil.revrange(repo, opts[b'rev'])
1642 revs = scmutil.revrange(repo, opts[b'rev'])
1643 cl = repo.changelog
1643 cl = repo.changelog
1644 nodes = [cl.node(r) for r in revs]
1644 nodes = [cl.node(r) for r in revs]
1645
1645
1646 # use a list to pass reference to a nodemap from one closure to the next
1646 # use a list to pass reference to a nodemap from one closure to the next
1647 nodeget = [None]
1647 nodeget = [None]
1648
1648
1649 def setnodeget():
1649 def setnodeget():
1650 # probably not necessary, but for good measure
1650 # probably not necessary, but for good measure
1651 clearchangelog(unfi)
1651 clearchangelog(unfi)
1652 cl = makecl(unfi)
1652 cl = makecl(unfi)
1653 if util.safehasattr(cl.index, 'get_rev'):
1653 if util.safehasattr(cl.index, 'get_rev'):
1654 nodeget[0] = cl.index.get_rev
1654 nodeget[0] = cl.index.get_rev
1655 else:
1655 else:
1656 nodeget[0] = cl.nodemap.get
1656 nodeget[0] = cl.nodemap.get
1657
1657
1658 def d():
1658 def d():
1659 get = nodeget[0]
1659 get = nodeget[0]
1660 for n in nodes:
1660 for n in nodes:
1661 get(n)
1661 get(n)
1662
1662
1663 setup = None
1663 setup = None
1664 if clearcaches:
1664 if clearcaches:
1665
1665
1666 def setup():
1666 def setup():
1667 setnodeget()
1667 setnodeget()
1668
1668
1669 else:
1669 else:
1670 setnodeget()
1670 setnodeget()
1671 d() # prewarm the data structure
1671 d() # prewarm the data structure
1672 timer(d, setup=setup)
1672 timer(d, setup=setup)
1673 fm.end()
1673 fm.end()
1674
1674
1675
1675
1676 @command(b'perf::startup|perfstartup', formatteropts)
1676 @command(b'perf::startup|perfstartup', formatteropts)
1677 def perfstartup(ui, repo, **opts):
1677 def perfstartup(ui, repo, **opts):
1678 opts = _byteskwargs(opts)
1678 opts = _byteskwargs(opts)
1679 timer, fm = gettimer(ui, opts)
1679 timer, fm = gettimer(ui, opts)
1680
1680
1681 def d():
1681 def d():
1682 if os.name != 'nt':
1682 if os.name != 'nt':
1683 os.system(
1683 os.system(
1684 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
1684 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
1685 )
1685 )
1686 else:
1686 else:
1687 os.environ['HGRCPATH'] = r' '
1687 os.environ['HGRCPATH'] = r' '
1688 os.system("%s version -q > NUL" % sys.argv[0])
1688 os.system("%s version -q > NUL" % sys.argv[0])
1689
1689
1690 timer(d)
1690 timer(d)
1691 fm.end()
1691 fm.end()
1692
1692
1693
1693
1694 @command(b'perf::parents|perfparents', formatteropts)
1694 @command(b'perf::parents|perfparents', formatteropts)
1695 def perfparents(ui, repo, **opts):
1695 def perfparents(ui, repo, **opts):
1696 """benchmark the time necessary to fetch one changeset's parents.
1696 """benchmark the time necessary to fetch one changeset's parents.
1697
1697
1698 The fetch is done using the `node identifier`, traversing all object layers
1698 The fetch is done using the `node identifier`, traversing all object layers
1699 from the repository object. The first N revisions will be used for this
1699 from the repository object. The first N revisions will be used for this
1700 benchmark. N is controlled by the ``perf.parentscount`` config option
1700 benchmark. N is controlled by the ``perf.parentscount`` config option
1701 (default: 1000).
1701 (default: 1000).
1702 """
1702 """
1703 opts = _byteskwargs(opts)
1703 opts = _byteskwargs(opts)
1704 timer, fm = gettimer(ui, opts)
1704 timer, fm = gettimer(ui, opts)
1705 # control the number of commits perfparents iterates over
1705 # control the number of commits perfparents iterates over
1706 # experimental config: perf.parentscount
1706 # experimental config: perf.parentscount
1707 count = getint(ui, b"perf", b"parentscount", 1000)
1707 count = getint(ui, b"perf", b"parentscount", 1000)
1708 if len(repo.changelog) < count:
1708 if len(repo.changelog) < count:
1709 raise error.Abort(b"repo needs %d commits for this test" % count)
1709 raise error.Abort(b"repo needs %d commits for this test" % count)
1710 repo = repo.unfiltered()
1710 repo = repo.unfiltered()
1711 nl = [repo.changelog.node(i) for i in _xrange(count)]
1711 nl = [repo.changelog.node(i) for i in _xrange(count)]
1712
1712
1713 def d():
1713 def d():
1714 for n in nl:
1714 for n in nl:
1715 repo.changelog.parents(n)
1715 repo.changelog.parents(n)
1716
1716
1717 timer(d)
1717 timer(d)
1718 fm.end()
1718 fm.end()
1719
1719
1720
1720
1721 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
1721 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
1722 def perfctxfiles(ui, repo, x, **opts):
1722 def perfctxfiles(ui, repo, x, **opts):
1723 opts = _byteskwargs(opts)
1723 opts = _byteskwargs(opts)
1724 x = int(x)
1724 x = int(x)
1725 timer, fm = gettimer(ui, opts)
1725 timer, fm = gettimer(ui, opts)
1726
1726
1727 def d():
1727 def d():
1728 len(repo[x].files())
1728 len(repo[x].files())
1729
1729
1730 timer(d)
1730 timer(d)
1731 fm.end()
1731 fm.end()
1732
1732
1733
1733
1734 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
1734 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
1735 def perfrawfiles(ui, repo, x, **opts):
1735 def perfrawfiles(ui, repo, x, **opts):
1736 opts = _byteskwargs(opts)
1736 opts = _byteskwargs(opts)
1737 x = int(x)
1737 x = int(x)
1738 timer, fm = gettimer(ui, opts)
1738 timer, fm = gettimer(ui, opts)
1739 cl = repo.changelog
1739 cl = repo.changelog
1740
1740
1741 def d():
1741 def d():
1742 len(cl.read(x)[3])
1742 len(cl.read(x)[3])
1743
1743
1744 timer(d)
1744 timer(d)
1745 fm.end()
1745 fm.end()
1746
1746
1747
1747
1748 @command(b'perf::lookup|perflookup', formatteropts)
1748 @command(b'perf::lookup|perflookup', formatteropts)
1749 def perflookup(ui, repo, rev, **opts):
1749 def perflookup(ui, repo, rev, **opts):
1750 opts = _byteskwargs(opts)
1750 opts = _byteskwargs(opts)
1751 timer, fm = gettimer(ui, opts)
1751 timer, fm = gettimer(ui, opts)
1752 timer(lambda: len(repo.lookup(rev)))
1752 timer(lambda: len(repo.lookup(rev)))
1753 fm.end()
1753 fm.end()
1754
1754
1755
1755
1756 @command(
1756 @command(
1757 b'perf::linelogedits|perflinelogedits',
1757 b'perf::linelogedits|perflinelogedits',
1758 [
1758 [
1759 (b'n', b'edits', 10000, b'number of edits'),
1759 (b'n', b'edits', 10000, b'number of edits'),
1760 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1760 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1761 ],
1761 ],
1762 norepo=True,
1762 norepo=True,
1763 )
1763 )
1764 def perflinelogedits(ui, **opts):
1764 def perflinelogedits(ui, **opts):
1765 from mercurial import linelog
1765 from mercurial import linelog
1766
1766
1767 opts = _byteskwargs(opts)
1767 opts = _byteskwargs(opts)
1768
1768
1769 edits = opts[b'edits']
1769 edits = opts[b'edits']
1770 maxhunklines = opts[b'max_hunk_lines']
1770 maxhunklines = opts[b'max_hunk_lines']
1771
1771
1772 maxb1 = 100000
1772 maxb1 = 100000
1773 random.seed(0)
1773 random.seed(0)
1774 randint = random.randint
1774 randint = random.randint
1775 currentlines = 0
1775 currentlines = 0
1776 arglist = []
1776 arglist = []
1777 for rev in _xrange(edits):
1777 for rev in _xrange(edits):
1778 a1 = randint(0, currentlines)
1778 a1 = randint(0, currentlines)
1779 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1779 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1780 b1 = randint(0, maxb1)
1780 b1 = randint(0, maxb1)
1781 b2 = randint(b1, b1 + maxhunklines)
1781 b2 = randint(b1, b1 + maxhunklines)
1782 currentlines += (b2 - b1) - (a2 - a1)
1782 currentlines += (b2 - b1) - (a2 - a1)
1783 arglist.append((rev, a1, a2, b1, b2))
1783 arglist.append((rev, a1, a2, b1, b2))
1784
1784
1785 def d():
1785 def d():
1786 ll = linelog.linelog()
1786 ll = linelog.linelog()
1787 for args in arglist:
1787 for args in arglist:
1788 ll.replacelines(*args)
1788 ll.replacelines(*args)
1789
1789
1790 timer, fm = gettimer(ui, opts)
1790 timer, fm = gettimer(ui, opts)
1791 timer(d)
1791 timer(d)
1792 fm.end()
1792 fm.end()
1793
1793
1794
1794
1795 @command(b'perf::revrange|perfrevrange', formatteropts)
1795 @command(b'perf::revrange|perfrevrange', formatteropts)
1796 def perfrevrange(ui, repo, *specs, **opts):
1796 def perfrevrange(ui, repo, *specs, **opts):
1797 opts = _byteskwargs(opts)
1797 opts = _byteskwargs(opts)
1798 timer, fm = gettimer(ui, opts)
1798 timer, fm = gettimer(ui, opts)
1799 revrange = scmutil.revrange
1799 revrange = scmutil.revrange
1800 timer(lambda: len(revrange(repo, specs)))
1800 timer(lambda: len(revrange(repo, specs)))
1801 fm.end()
1801 fm.end()
1802
1802
1803
1803
1804 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
1804 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
1805 def perfnodelookup(ui, repo, rev, **opts):
1805 def perfnodelookup(ui, repo, rev, **opts):
1806 opts = _byteskwargs(opts)
1806 opts = _byteskwargs(opts)
1807 timer, fm = gettimer(ui, opts)
1807 timer, fm = gettimer(ui, opts)
1808 import mercurial.revlog
1808 import mercurial.revlog
1809
1809
1810 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1810 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1811 n = scmutil.revsingle(repo, rev).node()
1811 n = scmutil.revsingle(repo, rev).node()
1812 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1812 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1813
1813
1814 def d():
1814 def d():
1815 cl.rev(n)
1815 cl.rev(n)
1816 clearcaches(cl)
1816 clearcaches(cl)
1817
1817
1818 timer(d)
1818 timer(d)
1819 fm.end()
1819 fm.end()
1820
1820
1821
1821
1822 @command(
1822 @command(
1823 b'perf::log|perflog',
1823 b'perf::log|perflog',
1824 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
1824 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
1825 )
1825 )
1826 def perflog(ui, repo, rev=None, **opts):
1826 def perflog(ui, repo, rev=None, **opts):
1827 opts = _byteskwargs(opts)
1827 opts = _byteskwargs(opts)
1828 if rev is None:
1828 if rev is None:
1829 rev = []
1829 rev = []
1830 timer, fm = gettimer(ui, opts)
1830 timer, fm = gettimer(ui, opts)
1831 ui.pushbuffer()
1831 ui.pushbuffer()
1832 timer(
1832 timer(
1833 lambda: commands.log(
1833 lambda: commands.log(
1834 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
1834 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
1835 )
1835 )
1836 )
1836 )
1837 ui.popbuffer()
1837 ui.popbuffer()
1838 fm.end()
1838 fm.end()
1839
1839
1840
1840
1841 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
1841 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
1842 def perfmoonwalk(ui, repo, **opts):
1842 def perfmoonwalk(ui, repo, **opts):
1843 """benchmark walking the changelog backwards
1843 """benchmark walking the changelog backwards
1844
1844
1845 This also loads the changelog data for each revision in the changelog.
1845 This also loads the changelog data for each revision in the changelog.
1846 """
1846 """
1847 opts = _byteskwargs(opts)
1847 opts = _byteskwargs(opts)
1848 timer, fm = gettimer(ui, opts)
1848 timer, fm = gettimer(ui, opts)
1849
1849
1850 def moonwalk():
1850 def moonwalk():
1851 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1851 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1852 ctx = repo[i]
1852 ctx = repo[i]
1853 ctx.branch() # read changelog data (in addition to the index)
1853 ctx.branch() # read changelog data (in addition to the index)
1854
1854
1855 timer(moonwalk)
1855 timer(moonwalk)
1856 fm.end()
1856 fm.end()
1857
1857
1858
1858
1859 @command(
1859 @command(
1860 b'perf::templating|perftemplating',
1860 b'perf::templating|perftemplating',
1861 [
1861 [
1862 (b'r', b'rev', [], b'revisions to run the template on'),
1862 (b'r', b'rev', [], b'revisions to run the template on'),
1863 ]
1863 ]
1864 + formatteropts,
1864 + formatteropts,
1865 )
1865 )
1866 def perftemplating(ui, repo, testedtemplate=None, **opts):
1866 def perftemplating(ui, repo, testedtemplate=None, **opts):
1867 """test the rendering time of a given template"""
1867 """test the rendering time of a given template"""
1868 if makelogtemplater is None:
1868 if makelogtemplater is None:
1869 raise error.Abort(
1869 raise error.Abort(
1870 b"perftemplating not available with this Mercurial",
1870 b"perftemplating not available with this Mercurial",
1871 hint=b"use 4.3 or later",
1871 hint=b"use 4.3 or later",
1872 )
1872 )
1873
1873
1874 opts = _byteskwargs(opts)
1874 opts = _byteskwargs(opts)
1875
1875
1876 nullui = ui.copy()
1876 nullui = ui.copy()
1877 nullui.fout = open(os.devnull, 'wb')
1877 nullui.fout = open(os.devnull, 'wb')
1878 nullui.disablepager()
1878 nullui.disablepager()
1879 revs = opts.get(b'rev')
1879 revs = opts.get(b'rev')
1880 if not revs:
1880 if not revs:
1881 revs = [b'all()']
1881 revs = [b'all()']
1882 revs = list(scmutil.revrange(repo, revs))
1882 revs = list(scmutil.revrange(repo, revs))
1883
1883
1884 defaulttemplate = (
1884 defaulttemplate = (
1885 b'{date|shortdate} [{rev}:{node|short}]'
1885 b'{date|shortdate} [{rev}:{node|short}]'
1886 b' {author|person}: {desc|firstline}\n'
1886 b' {author|person}: {desc|firstline}\n'
1887 )
1887 )
1888 if testedtemplate is None:
1888 if testedtemplate is None:
1889 testedtemplate = defaulttemplate
1889 testedtemplate = defaulttemplate
1890 displayer = makelogtemplater(nullui, repo, testedtemplate)
1890 displayer = makelogtemplater(nullui, repo, testedtemplate)
1891
1891
1892 def format():
1892 def format():
1893 for r in revs:
1893 for r in revs:
1894 ctx = repo[r]
1894 ctx = repo[r]
1895 displayer.show(ctx)
1895 displayer.show(ctx)
1896 displayer.flush(ctx)
1896 displayer.flush(ctx)
1897
1897
1898 timer, fm = gettimer(ui, opts)
1898 timer, fm = gettimer(ui, opts)
1899 timer(format)
1899 timer(format)
1900 fm.end()
1900 fm.end()
1901
1901
1902
1902
1903 def _displaystats(ui, opts, entries, data):
1903 def _displaystats(ui, opts, entries, data):
1904 # use a second formatter because the data are quite different, not sure
1904 # use a second formatter because the data are quite different, not sure
1905 # how it flies with the templater.
1905 # how it flies with the templater.
1906 fm = ui.formatter(b'perf-stats', opts)
1906 fm = ui.formatter(b'perf-stats', opts)
1907 for key, title in entries:
1907 for key, title in entries:
1908 values = data[key]
1908 values = data[key]
1909 nbvalues = len(data)
1909 nbvalues = len(data)
1910 values.sort()
1910 values.sort()
1911 stats = {
1911 stats = {
1912 'key': key,
1912 'key': key,
1913 'title': title,
1913 'title': title,
1914 'nbitems': len(values),
1914 'nbitems': len(values),
1915 'min': values[0][0],
1915 'min': values[0][0],
1916 '10%': values[(nbvalues * 10) // 100][0],
1916 '10%': values[(nbvalues * 10) // 100][0],
1917 '25%': values[(nbvalues * 25) // 100][0],
1917 '25%': values[(nbvalues * 25) // 100][0],
1918 '50%': values[(nbvalues * 50) // 100][0],
1918 '50%': values[(nbvalues * 50) // 100][0],
1919 '75%': values[(nbvalues * 75) // 100][0],
1919 '75%': values[(nbvalues * 75) // 100][0],
1920 '80%': values[(nbvalues * 80) // 100][0],
1920 '80%': values[(nbvalues * 80) // 100][0],
1921 '85%': values[(nbvalues * 85) // 100][0],
1921 '85%': values[(nbvalues * 85) // 100][0],
1922 '90%': values[(nbvalues * 90) // 100][0],
1922 '90%': values[(nbvalues * 90) // 100][0],
1923 '95%': values[(nbvalues * 95) // 100][0],
1923 '95%': values[(nbvalues * 95) // 100][0],
1924 '99%': values[(nbvalues * 99) // 100][0],
1924 '99%': values[(nbvalues * 99) // 100][0],
1925 'max': values[-1][0],
1925 'max': values[-1][0],
1926 }
1926 }
1927 fm.startitem()
1927 fm.startitem()
1928 fm.data(**stats)
1928 fm.data(**stats)
1929 # make node pretty for the human output
1929 # make node pretty for the human output
1930 fm.plain('### %s (%d items)\n' % (title, len(values)))
1930 fm.plain('### %s (%d items)\n' % (title, len(values)))
1931 lines = [
1931 lines = [
1932 'min',
1932 'min',
1933 '10%',
1933 '10%',
1934 '25%',
1934 '25%',
1935 '50%',
1935 '50%',
1936 '75%',
1936 '75%',
1937 '80%',
1937 '80%',
1938 '85%',
1938 '85%',
1939 '90%',
1939 '90%',
1940 '95%',
1940 '95%',
1941 '99%',
1941 '99%',
1942 'max',
1942 'max',
1943 ]
1943 ]
1944 for l in lines:
1944 for l in lines:
1945 fm.plain('%s: %s\n' % (l, stats[l]))
1945 fm.plain('%s: %s\n' % (l, stats[l]))
1946 fm.end()
1946 fm.end()
1947
1947
1948
1948
1949 @command(
1949 @command(
1950 b'perf::helper-mergecopies|perfhelper-mergecopies',
1950 b'perf::helper-mergecopies|perfhelper-mergecopies',
1951 formatteropts
1951 formatteropts
1952 + [
1952 + [
1953 (b'r', b'revs', [], b'restrict search to these revisions'),
1953 (b'r', b'revs', [], b'restrict search to these revisions'),
1954 (b'', b'timing', False, b'provides extra data (costly)'),
1954 (b'', b'timing', False, b'provides extra data (costly)'),
1955 (b'', b'stats', False, b'provides statistic about the measured data'),
1955 (b'', b'stats', False, b'provides statistic about the measured data'),
1956 ],
1956 ],
1957 )
1957 )
1958 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1958 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1959 """find statistics about potential parameters for `perfmergecopies`
1959 """find statistics about potential parameters for `perfmergecopies`
1960
1960
1961 This command find (base, p1, p2) triplet relevant for copytracing
1961 This command find (base, p1, p2) triplet relevant for copytracing
1962 benchmarking in the context of a merge. It reports values for some of the
1962 benchmarking in the context of a merge. It reports values for some of the
1963 parameters that impact merge copy tracing time during merge.
1963 parameters that impact merge copy tracing time during merge.
1964
1964
1965 If `--timing` is set, rename detection is run and the associated timing
1965 If `--timing` is set, rename detection is run and the associated timing
1966 will be reported. The extra details come at the cost of slower command
1966 will be reported. The extra details come at the cost of slower command
1967 execution.
1967 execution.
1968
1968
1969 Since rename detection is only run once, other factors might easily
1969 Since rename detection is only run once, other factors might easily
1970 affect the precision of the timing. However it should give a good
1970 affect the precision of the timing. However it should give a good
1971 approximation of which revision triplets are very costly.
1971 approximation of which revision triplets are very costly.
1972 """
1972 """
1973 opts = _byteskwargs(opts)
1973 opts = _byteskwargs(opts)
1974 fm = ui.formatter(b'perf', opts)
1974 fm = ui.formatter(b'perf', opts)
1975 dotiming = opts[b'timing']
1975 dotiming = opts[b'timing']
1976 dostats = opts[b'stats']
1976 dostats = opts[b'stats']
1977
1977
1978 output_template = [
1978 output_template = [
1979 ("base", "%(base)12s"),
1979 ("base", "%(base)12s"),
1980 ("p1", "%(p1.node)12s"),
1980 ("p1", "%(p1.node)12s"),
1981 ("p2", "%(p2.node)12s"),
1981 ("p2", "%(p2.node)12s"),
1982 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1982 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1983 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1983 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1984 ("p1.renames", "%(p1.renamedfiles)12d"),
1984 ("p1.renames", "%(p1.renamedfiles)12d"),
1985 ("p1.time", "%(p1.time)12.3f"),
1985 ("p1.time", "%(p1.time)12.3f"),
1986 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1986 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1987 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1987 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1988 ("p2.renames", "%(p2.renamedfiles)12d"),
1988 ("p2.renames", "%(p2.renamedfiles)12d"),
1989 ("p2.time", "%(p2.time)12.3f"),
1989 ("p2.time", "%(p2.time)12.3f"),
1990 ("renames", "%(nbrenamedfiles)12d"),
1990 ("renames", "%(nbrenamedfiles)12d"),
1991 ("total.time", "%(time)12.3f"),
1991 ("total.time", "%(time)12.3f"),
1992 ]
1992 ]
1993 if not dotiming:
1993 if not dotiming:
1994 output_template = [
1994 output_template = [
1995 i
1995 i
1996 for i in output_template
1996 for i in output_template
1997 if not ('time' in i[0] or 'renames' in i[0])
1997 if not ('time' in i[0] or 'renames' in i[0])
1998 ]
1998 ]
1999 header_names = [h for (h, v) in output_template]
1999 header_names = [h for (h, v) in output_template]
2000 output = ' '.join([v for (h, v) in output_template]) + '\n'
2000 output = ' '.join([v for (h, v) in output_template]) + '\n'
2001 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2001 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2002 fm.plain(header % tuple(header_names))
2002 fm.plain(header % tuple(header_names))
2003
2003
2004 if not revs:
2004 if not revs:
2005 revs = ['all()']
2005 revs = ['all()']
2006 revs = scmutil.revrange(repo, revs)
2006 revs = scmutil.revrange(repo, revs)
2007
2007
2008 if dostats:
2008 if dostats:
2009 alldata = {
2009 alldata = {
2010 'nbrevs': [],
2010 'nbrevs': [],
2011 'nbmissingfiles': [],
2011 'nbmissingfiles': [],
2012 }
2012 }
2013 if dotiming:
2013 if dotiming:
2014 alldata['parentnbrenames'] = []
2014 alldata['parentnbrenames'] = []
2015 alldata['totalnbrenames'] = []
2015 alldata['totalnbrenames'] = []
2016 alldata['parenttime'] = []
2016 alldata['parenttime'] = []
2017 alldata['totaltime'] = []
2017 alldata['totaltime'] = []
2018
2018
2019 roi = repo.revs('merge() and %ld', revs)
2019 roi = repo.revs('merge() and %ld', revs)
2020 for r in roi:
2020 for r in roi:
2021 ctx = repo[r]
2021 ctx = repo[r]
2022 p1 = ctx.p1()
2022 p1 = ctx.p1()
2023 p2 = ctx.p2()
2023 p2 = ctx.p2()
2024 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2024 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2025 for b in bases:
2025 for b in bases:
2026 b = repo[b]
2026 b = repo[b]
2027 p1missing = copies._computeforwardmissing(b, p1)
2027 p1missing = copies._computeforwardmissing(b, p1)
2028 p2missing = copies._computeforwardmissing(b, p2)
2028 p2missing = copies._computeforwardmissing(b, p2)
2029 data = {
2029 data = {
2030 b'base': b.hex(),
2030 b'base': b.hex(),
2031 b'p1.node': p1.hex(),
2031 b'p1.node': p1.hex(),
2032 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2032 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2033 b'p1.nbmissingfiles': len(p1missing),
2033 b'p1.nbmissingfiles': len(p1missing),
2034 b'p2.node': p2.hex(),
2034 b'p2.node': p2.hex(),
2035 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2035 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2036 b'p2.nbmissingfiles': len(p2missing),
2036 b'p2.nbmissingfiles': len(p2missing),
2037 }
2037 }
2038 if dostats:
2038 if dostats:
2039 if p1missing:
2039 if p1missing:
2040 alldata['nbrevs'].append(
2040 alldata['nbrevs'].append(
2041 (data['p1.nbrevs'], b.hex(), p1.hex())
2041 (data['p1.nbrevs'], b.hex(), p1.hex())
2042 )
2042 )
2043 alldata['nbmissingfiles'].append(
2043 alldata['nbmissingfiles'].append(
2044 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2044 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2045 )
2045 )
2046 if p2missing:
2046 if p2missing:
2047 alldata['nbrevs'].append(
2047 alldata['nbrevs'].append(
2048 (data['p2.nbrevs'], b.hex(), p2.hex())
2048 (data['p2.nbrevs'], b.hex(), p2.hex())
2049 )
2049 )
2050 alldata['nbmissingfiles'].append(
2050 alldata['nbmissingfiles'].append(
2051 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2051 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2052 )
2052 )
2053 if dotiming:
2053 if dotiming:
2054 begin = util.timer()
2054 begin = util.timer()
2055 mergedata = copies.mergecopies(repo, p1, p2, b)
2055 mergedata = copies.mergecopies(repo, p1, p2, b)
2056 end = util.timer()
2056 end = util.timer()
2057 # not very stable timing since we did only one run
2057 # not very stable timing since we did only one run
2058 data['time'] = end - begin
2058 data['time'] = end - begin
2059 # mergedata contains five dicts: "copy", "movewithdir",
2059 # mergedata contains five dicts: "copy", "movewithdir",
2060 # "diverge", "renamedelete" and "dirmove".
2060 # "diverge", "renamedelete" and "dirmove".
2061 # The first 4 are about renamed file so lets count that.
2061 # The first 4 are about renamed file so lets count that.
2062 renames = len(mergedata[0])
2062 renames = len(mergedata[0])
2063 renames += len(mergedata[1])
2063 renames += len(mergedata[1])
2064 renames += len(mergedata[2])
2064 renames += len(mergedata[2])
2065 renames += len(mergedata[3])
2065 renames += len(mergedata[3])
2066 data['nbrenamedfiles'] = renames
2066 data['nbrenamedfiles'] = renames
2067 begin = util.timer()
2067 begin = util.timer()
2068 p1renames = copies.pathcopies(b, p1)
2068 p1renames = copies.pathcopies(b, p1)
2069 end = util.timer()
2069 end = util.timer()
2070 data['p1.time'] = end - begin
2070 data['p1.time'] = end - begin
2071 begin = util.timer()
2071 begin = util.timer()
2072 p2renames = copies.pathcopies(b, p2)
2072 p2renames = copies.pathcopies(b, p2)
2073 end = util.timer()
2073 end = util.timer()
2074 data['p2.time'] = end - begin
2074 data['p2.time'] = end - begin
2075 data['p1.renamedfiles'] = len(p1renames)
2075 data['p1.renamedfiles'] = len(p1renames)
2076 data['p2.renamedfiles'] = len(p2renames)
2076 data['p2.renamedfiles'] = len(p2renames)
2077
2077
2078 if dostats:
2078 if dostats:
2079 if p1missing:
2079 if p1missing:
2080 alldata['parentnbrenames'].append(
2080 alldata['parentnbrenames'].append(
2081 (data['p1.renamedfiles'], b.hex(), p1.hex())
2081 (data['p1.renamedfiles'], b.hex(), p1.hex())
2082 )
2082 )
2083 alldata['parenttime'].append(
2083 alldata['parenttime'].append(
2084 (data['p1.time'], b.hex(), p1.hex())
2084 (data['p1.time'], b.hex(), p1.hex())
2085 )
2085 )
2086 if p2missing:
2086 if p2missing:
2087 alldata['parentnbrenames'].append(
2087 alldata['parentnbrenames'].append(
2088 (data['p2.renamedfiles'], b.hex(), p2.hex())
2088 (data['p2.renamedfiles'], b.hex(), p2.hex())
2089 )
2089 )
2090 alldata['parenttime'].append(
2090 alldata['parenttime'].append(
2091 (data['p2.time'], b.hex(), p2.hex())
2091 (data['p2.time'], b.hex(), p2.hex())
2092 )
2092 )
2093 if p1missing or p2missing:
2093 if p1missing or p2missing:
2094 alldata['totalnbrenames'].append(
2094 alldata['totalnbrenames'].append(
2095 (
2095 (
2096 data['nbrenamedfiles'],
2096 data['nbrenamedfiles'],
2097 b.hex(),
2097 b.hex(),
2098 p1.hex(),
2098 p1.hex(),
2099 p2.hex(),
2099 p2.hex(),
2100 )
2100 )
2101 )
2101 )
2102 alldata['totaltime'].append(
2102 alldata['totaltime'].append(
2103 (data['time'], b.hex(), p1.hex(), p2.hex())
2103 (data['time'], b.hex(), p1.hex(), p2.hex())
2104 )
2104 )
2105 fm.startitem()
2105 fm.startitem()
2106 fm.data(**data)
2106 fm.data(**data)
2107 # make node pretty for the human output
2107 # make node pretty for the human output
2108 out = data.copy()
2108 out = data.copy()
2109 out['base'] = fm.hexfunc(b.node())
2109 out['base'] = fm.hexfunc(b.node())
2110 out['p1.node'] = fm.hexfunc(p1.node())
2110 out['p1.node'] = fm.hexfunc(p1.node())
2111 out['p2.node'] = fm.hexfunc(p2.node())
2111 out['p2.node'] = fm.hexfunc(p2.node())
2112 fm.plain(output % out)
2112 fm.plain(output % out)
2113
2113
2114 fm.end()
2114 fm.end()
2115 if dostats:
2115 if dostats:
2116 # use a second formatter because the data are quite different, not sure
2116 # use a second formatter because the data are quite different, not sure
2117 # how it flies with the templater.
2117 # how it flies with the templater.
2118 entries = [
2118 entries = [
2119 ('nbrevs', 'number of revision covered'),
2119 ('nbrevs', 'number of revision covered'),
2120 ('nbmissingfiles', 'number of missing files at head'),
2120 ('nbmissingfiles', 'number of missing files at head'),
2121 ]
2121 ]
2122 if dotiming:
2122 if dotiming:
2123 entries.append(
2123 entries.append(
2124 ('parentnbrenames', 'rename from one parent to base')
2124 ('parentnbrenames', 'rename from one parent to base')
2125 )
2125 )
2126 entries.append(('totalnbrenames', 'total number of renames'))
2126 entries.append(('totalnbrenames', 'total number of renames'))
2127 entries.append(('parenttime', 'time for one parent'))
2127 entries.append(('parenttime', 'time for one parent'))
2128 entries.append(('totaltime', 'time for both parents'))
2128 entries.append(('totaltime', 'time for both parents'))
2129 _displaystats(ui, opts, entries, alldata)
2129 _displaystats(ui, opts, entries, alldata)
2130
2130
2131
2131
2132 @command(
2132 @command(
2133 b'perf::helper-pathcopies|perfhelper-pathcopies',
2133 b'perf::helper-pathcopies|perfhelper-pathcopies',
2134 formatteropts
2134 formatteropts
2135 + [
2135 + [
2136 (b'r', b'revs', [], b'restrict search to these revisions'),
2136 (b'r', b'revs', [], b'restrict search to these revisions'),
2137 (b'', b'timing', False, b'provides extra data (costly)'),
2137 (b'', b'timing', False, b'provides extra data (costly)'),
2138 (b'', b'stats', False, b'provides statistic about the measured data'),
2138 (b'', b'stats', False, b'provides statistic about the measured data'),
2139 ],
2139 ],
2140 )
2140 )
2141 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2141 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2142 """find statistic about potential parameters for the `perftracecopies`
2142 """find statistic about potential parameters for the `perftracecopies`
2143
2143
2144 This command find source-destination pair relevant for copytracing testing.
2144 This command find source-destination pair relevant for copytracing testing.
2145 It report value for some of the parameters that impact copy tracing time.
2145 It report value for some of the parameters that impact copy tracing time.
2146
2146
2147 If `--timing` is set, rename detection is run and the associated timing
2147 If `--timing` is set, rename detection is run and the associated timing
2148 will be reported. The extra details comes at the cost of a slower command
2148 will be reported. The extra details comes at the cost of a slower command
2149 execution.
2149 execution.
2150
2150
2151 Since the rename detection is only run once, other factors might easily
2151 Since the rename detection is only run once, other factors might easily
2152 affect the precision of the timing. However it should give a good
2152 affect the precision of the timing. However it should give a good
2153 approximation of which revision pairs are very costly.
2153 approximation of which revision pairs are very costly.
2154 """
2154 """
2155 opts = _byteskwargs(opts)
2155 opts = _byteskwargs(opts)
2156 fm = ui.formatter(b'perf', opts)
2156 fm = ui.formatter(b'perf', opts)
2157 dotiming = opts[b'timing']
2157 dotiming = opts[b'timing']
2158 dostats = opts[b'stats']
2158 dostats = opts[b'stats']
2159
2159
2160 if dotiming:
2160 if dotiming:
2161 header = '%12s %12s %12s %12s %12s %12s\n'
2161 header = '%12s %12s %12s %12s %12s %12s\n'
2162 output = (
2162 output = (
2163 "%(source)12s %(destination)12s "
2163 "%(source)12s %(destination)12s "
2164 "%(nbrevs)12d %(nbmissingfiles)12d "
2164 "%(nbrevs)12d %(nbmissingfiles)12d "
2165 "%(nbrenamedfiles)12d %(time)18.5f\n"
2165 "%(nbrenamedfiles)12d %(time)18.5f\n"
2166 )
2166 )
2167 header_names = (
2167 header_names = (
2168 "source",
2168 "source",
2169 "destination",
2169 "destination",
2170 "nb-revs",
2170 "nb-revs",
2171 "nb-files",
2171 "nb-files",
2172 "nb-renames",
2172 "nb-renames",
2173 "time",
2173 "time",
2174 )
2174 )
2175 fm.plain(header % header_names)
2175 fm.plain(header % header_names)
2176 else:
2176 else:
2177 header = '%12s %12s %12s %12s\n'
2177 header = '%12s %12s %12s %12s\n'
2178 output = (
2178 output = (
2179 "%(source)12s %(destination)12s "
2179 "%(source)12s %(destination)12s "
2180 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2180 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2181 )
2181 )
2182 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2182 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2183
2183
2184 if not revs:
2184 if not revs:
2185 revs = ['all()']
2185 revs = ['all()']
2186 revs = scmutil.revrange(repo, revs)
2186 revs = scmutil.revrange(repo, revs)
2187
2187
2188 if dostats:
2188 if dostats:
2189 alldata = {
2189 alldata = {
2190 'nbrevs': [],
2190 'nbrevs': [],
2191 'nbmissingfiles': [],
2191 'nbmissingfiles': [],
2192 }
2192 }
2193 if dotiming:
2193 if dotiming:
2194 alldata['nbrenames'] = []
2194 alldata['nbrenames'] = []
2195 alldata['time'] = []
2195 alldata['time'] = []
2196
2196
2197 roi = repo.revs('merge() and %ld', revs)
2197 roi = repo.revs('merge() and %ld', revs)
2198 for r in roi:
2198 for r in roi:
2199 ctx = repo[r]
2199 ctx = repo[r]
2200 p1 = ctx.p1().rev()
2200 p1 = ctx.p1().rev()
2201 p2 = ctx.p2().rev()
2201 p2 = ctx.p2().rev()
2202 bases = repo.changelog._commonancestorsheads(p1, p2)
2202 bases = repo.changelog._commonancestorsheads(p1, p2)
2203 for p in (p1, p2):
2203 for p in (p1, p2):
2204 for b in bases:
2204 for b in bases:
2205 base = repo[b]
2205 base = repo[b]
2206 parent = repo[p]
2206 parent = repo[p]
2207 missing = copies._computeforwardmissing(base, parent)
2207 missing = copies._computeforwardmissing(base, parent)
2208 if not missing:
2208 if not missing:
2209 continue
2209 continue
2210 data = {
2210 data = {
2211 b'source': base.hex(),
2211 b'source': base.hex(),
2212 b'destination': parent.hex(),
2212 b'destination': parent.hex(),
2213 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2213 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2214 b'nbmissingfiles': len(missing),
2214 b'nbmissingfiles': len(missing),
2215 }
2215 }
2216 if dostats:
2216 if dostats:
2217 alldata['nbrevs'].append(
2217 alldata['nbrevs'].append(
2218 (
2218 (
2219 data['nbrevs'],
2219 data['nbrevs'],
2220 base.hex(),
2220 base.hex(),
2221 parent.hex(),
2221 parent.hex(),
2222 )
2222 )
2223 )
2223 )
2224 alldata['nbmissingfiles'].append(
2224 alldata['nbmissingfiles'].append(
2225 (
2225 (
2226 data['nbmissingfiles'],
2226 data['nbmissingfiles'],
2227 base.hex(),
2227 base.hex(),
2228 parent.hex(),
2228 parent.hex(),
2229 )
2229 )
2230 )
2230 )
2231 if dotiming:
2231 if dotiming:
2232 begin = util.timer()
2232 begin = util.timer()
2233 renames = copies.pathcopies(base, parent)
2233 renames = copies.pathcopies(base, parent)
2234 end = util.timer()
2234 end = util.timer()
2235 # not very stable timing since we did only one run
2235 # not very stable timing since we did only one run
2236 data['time'] = end - begin
2236 data['time'] = end - begin
2237 data['nbrenamedfiles'] = len(renames)
2237 data['nbrenamedfiles'] = len(renames)
2238 if dostats:
2238 if dostats:
2239 alldata['time'].append(
2239 alldata['time'].append(
2240 (
2240 (
2241 data['time'],
2241 data['time'],
2242 base.hex(),
2242 base.hex(),
2243 parent.hex(),
2243 parent.hex(),
2244 )
2244 )
2245 )
2245 )
2246 alldata['nbrenames'].append(
2246 alldata['nbrenames'].append(
2247 (
2247 (
2248 data['nbrenamedfiles'],
2248 data['nbrenamedfiles'],
2249 base.hex(),
2249 base.hex(),
2250 parent.hex(),
2250 parent.hex(),
2251 )
2251 )
2252 )
2252 )
2253 fm.startitem()
2253 fm.startitem()
2254 fm.data(**data)
2254 fm.data(**data)
2255 out = data.copy()
2255 out = data.copy()
2256 out['source'] = fm.hexfunc(base.node())
2256 out['source'] = fm.hexfunc(base.node())
2257 out['destination'] = fm.hexfunc(parent.node())
2257 out['destination'] = fm.hexfunc(parent.node())
2258 fm.plain(output % out)
2258 fm.plain(output % out)
2259
2259
2260 fm.end()
2260 fm.end()
2261 if dostats:
2261 if dostats:
2262 entries = [
2262 entries = [
2263 ('nbrevs', 'number of revision covered'),
2263 ('nbrevs', 'number of revision covered'),
2264 ('nbmissingfiles', 'number of missing files at head'),
2264 ('nbmissingfiles', 'number of missing files at head'),
2265 ]
2265 ]
2266 if dotiming:
2266 if dotiming:
2267 entries.append(('nbrenames', 'renamed files'))
2267 entries.append(('nbrenames', 'renamed files'))
2268 entries.append(('time', 'time'))
2268 entries.append(('time', 'time'))
2269 _displaystats(ui, opts, entries, alldata)
2269 _displaystats(ui, opts, entries, alldata)
2270
2270
2271
2271
2272 @command(b'perf::cca|perfcca', formatteropts)
2272 @command(b'perf::cca|perfcca', formatteropts)
2273 def perfcca(ui, repo, **opts):
2273 def perfcca(ui, repo, **opts):
2274 opts = _byteskwargs(opts)
2274 opts = _byteskwargs(opts)
2275 timer, fm = gettimer(ui, opts)
2275 timer, fm = gettimer(ui, opts)
2276 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2276 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2277 fm.end()
2277 fm.end()
2278
2278
2279
2279
2280 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2280 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2281 def perffncacheload(ui, repo, **opts):
2281 def perffncacheload(ui, repo, **opts):
2282 opts = _byteskwargs(opts)
2282 opts = _byteskwargs(opts)
2283 timer, fm = gettimer(ui, opts)
2283 timer, fm = gettimer(ui, opts)
2284 s = repo.store
2284 s = repo.store
2285
2285
2286 def d():
2286 def d():
2287 s.fncache._load()
2287 s.fncache._load()
2288
2288
2289 timer(d)
2289 timer(d)
2290 fm.end()
2290 fm.end()
2291
2291
2292
2292
2293 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2293 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2294 def perffncachewrite(ui, repo, **opts):
2294 def perffncachewrite(ui, repo, **opts):
2295 opts = _byteskwargs(opts)
2295 opts = _byteskwargs(opts)
2296 timer, fm = gettimer(ui, opts)
2296 timer, fm = gettimer(ui, opts)
2297 s = repo.store
2297 s = repo.store
2298 lock = repo.lock()
2298 lock = repo.lock()
2299 s.fncache._load()
2299 s.fncache._load()
2300 tr = repo.transaction(b'perffncachewrite')
2300 tr = repo.transaction(b'perffncachewrite')
2301 tr.addbackup(b'fncache')
2301 tr.addbackup(b'fncache')
2302
2302
2303 def d():
2303 def d():
2304 s.fncache._dirty = True
2304 s.fncache._dirty = True
2305 s.fncache.write(tr)
2305 s.fncache.write(tr)
2306
2306
2307 timer(d)
2307 timer(d)
2308 tr.close()
2308 tr.close()
2309 lock.release()
2309 lock.release()
2310 fm.end()
2310 fm.end()
2311
2311
2312
2312
2313 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2313 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2314 def perffncacheencode(ui, repo, **opts):
2314 def perffncacheencode(ui, repo, **opts):
2315 opts = _byteskwargs(opts)
2315 opts = _byteskwargs(opts)
2316 timer, fm = gettimer(ui, opts)
2316 timer, fm = gettimer(ui, opts)
2317 s = repo.store
2317 s = repo.store
2318 s.fncache._load()
2318 s.fncache._load()
2319
2319
2320 def d():
2320 def d():
2321 for p in s.fncache.entries:
2321 for p in s.fncache.entries:
2322 s.encode(p)
2322 s.encode(p)
2323
2323
2324 timer(d)
2324 timer(d)
2325 fm.end()
2325 fm.end()
2326
2326
2327
2327
2328 def _bdiffworker(q, blocks, xdiff, ready, done):
2328 def _bdiffworker(q, blocks, xdiff, ready, done):
2329 while not done.is_set():
2329 while not done.is_set():
2330 pair = q.get()
2330 pair = q.get()
2331 while pair is not None:
2331 while pair is not None:
2332 if xdiff:
2332 if xdiff:
2333 mdiff.bdiff.xdiffblocks(*pair)
2333 mdiff.bdiff.xdiffblocks(*pair)
2334 elif blocks:
2334 elif blocks:
2335 mdiff.bdiff.blocks(*pair)
2335 mdiff.bdiff.blocks(*pair)
2336 else:
2336 else:
2337 mdiff.textdiff(*pair)
2337 mdiff.textdiff(*pair)
2338 q.task_done()
2338 q.task_done()
2339 pair = q.get()
2339 pair = q.get()
2340 q.task_done() # for the None one
2340 q.task_done() # for the None one
2341 with ready:
2341 with ready:
2342 ready.wait()
2342 ready.wait()
2343
2343
2344
2344
2345 def _manifestrevision(repo, mnode):
2345 def _manifestrevision(repo, mnode):
2346 ml = repo.manifestlog
2346 ml = repo.manifestlog
2347
2347
2348 if util.safehasattr(ml, b'getstorage'):
2348 if util.safehasattr(ml, b'getstorage'):
2349 store = ml.getstorage(b'')
2349 store = ml.getstorage(b'')
2350 else:
2350 else:
2351 store = ml._revlog
2351 store = ml._revlog
2352
2352
2353 return store.revision(mnode)
2353 return store.revision(mnode)
2354
2354
2355
2355
2356 @command(
2356 @command(
2357 b'perf::bdiff|perfbdiff',
2357 b'perf::bdiff|perfbdiff',
2358 revlogopts
2358 revlogopts
2359 + formatteropts
2359 + formatteropts
2360 + [
2360 + [
2361 (
2361 (
2362 b'',
2362 b'',
2363 b'count',
2363 b'count',
2364 1,
2364 1,
2365 b'number of revisions to test (when using --startrev)',
2365 b'number of revisions to test (when using --startrev)',
2366 ),
2366 ),
2367 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2367 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2368 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2368 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2369 (b'', b'blocks', False, b'test computing diffs into blocks'),
2369 (b'', b'blocks', False, b'test computing diffs into blocks'),
2370 (b'', b'xdiff', False, b'use xdiff algorithm'),
2370 (b'', b'xdiff', False, b'use xdiff algorithm'),
2371 ],
2371 ],
2372 b'-c|-m|FILE REV',
2372 b'-c|-m|FILE REV',
2373 )
2373 )
2374 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2374 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2375 """benchmark a bdiff between revisions
2375 """benchmark a bdiff between revisions
2376
2376
2377 By default, benchmark a bdiff between its delta parent and itself.
2377 By default, benchmark a bdiff between its delta parent and itself.
2378
2378
2379 With ``--count``, benchmark bdiffs between delta parents and self for N
2379 With ``--count``, benchmark bdiffs between delta parents and self for N
2380 revisions starting at the specified revision.
2380 revisions starting at the specified revision.
2381
2381
2382 With ``--alldata``, assume the requested revision is a changeset and
2382 With ``--alldata``, assume the requested revision is a changeset and
2383 measure bdiffs for all changes related to that changeset (manifest
2383 measure bdiffs for all changes related to that changeset (manifest
2384 and filelogs).
2384 and filelogs).
2385 """
2385 """
2386 opts = _byteskwargs(opts)
2386 opts = _byteskwargs(opts)
2387
2387
2388 if opts[b'xdiff'] and not opts[b'blocks']:
2388 if opts[b'xdiff'] and not opts[b'blocks']:
2389 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2389 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2390
2390
2391 if opts[b'alldata']:
2391 if opts[b'alldata']:
2392 opts[b'changelog'] = True
2392 opts[b'changelog'] = True
2393
2393
2394 if opts.get(b'changelog') or opts.get(b'manifest'):
2394 if opts.get(b'changelog') or opts.get(b'manifest'):
2395 file_, rev = None, file_
2395 file_, rev = None, file_
2396 elif rev is None:
2396 elif rev is None:
2397 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2397 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2398
2398
2399 blocks = opts[b'blocks']
2399 blocks = opts[b'blocks']
2400 xdiff = opts[b'xdiff']
2400 xdiff = opts[b'xdiff']
2401 textpairs = []
2401 textpairs = []
2402
2402
2403 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2403 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2404
2404
2405 startrev = r.rev(r.lookup(rev))
2405 startrev = r.rev(r.lookup(rev))
2406 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2406 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2407 if opts[b'alldata']:
2407 if opts[b'alldata']:
2408 # Load revisions associated with changeset.
2408 # Load revisions associated with changeset.
2409 ctx = repo[rev]
2409 ctx = repo[rev]
2410 mtext = _manifestrevision(repo, ctx.manifestnode())
2410 mtext = _manifestrevision(repo, ctx.manifestnode())
2411 for pctx in ctx.parents():
2411 for pctx in ctx.parents():
2412 pman = _manifestrevision(repo, pctx.manifestnode())
2412 pman = _manifestrevision(repo, pctx.manifestnode())
2413 textpairs.append((pman, mtext))
2413 textpairs.append((pman, mtext))
2414
2414
2415 # Load filelog revisions by iterating manifest delta.
2415 # Load filelog revisions by iterating manifest delta.
2416 man = ctx.manifest()
2416 man = ctx.manifest()
2417 pman = ctx.p1().manifest()
2417 pman = ctx.p1().manifest()
2418 for filename, change in pman.diff(man).items():
2418 for filename, change in pman.diff(man).items():
2419 fctx = repo.file(filename)
2419 fctx = repo.file(filename)
2420 f1 = fctx.revision(change[0][0] or -1)
2420 f1 = fctx.revision(change[0][0] or -1)
2421 f2 = fctx.revision(change[1][0] or -1)
2421 f2 = fctx.revision(change[1][0] or -1)
2422 textpairs.append((f1, f2))
2422 textpairs.append((f1, f2))
2423 else:
2423 else:
2424 dp = r.deltaparent(rev)
2424 dp = r.deltaparent(rev)
2425 textpairs.append((r.revision(dp), r.revision(rev)))
2425 textpairs.append((r.revision(dp), r.revision(rev)))
2426
2426
2427 withthreads = threads > 0
2427 withthreads = threads > 0
2428 if not withthreads:
2428 if not withthreads:
2429
2429
2430 def d():
2430 def d():
2431 for pair in textpairs:
2431 for pair in textpairs:
2432 if xdiff:
2432 if xdiff:
2433 mdiff.bdiff.xdiffblocks(*pair)
2433 mdiff.bdiff.xdiffblocks(*pair)
2434 elif blocks:
2434 elif blocks:
2435 mdiff.bdiff.blocks(*pair)
2435 mdiff.bdiff.blocks(*pair)
2436 else:
2436 else:
2437 mdiff.textdiff(*pair)
2437 mdiff.textdiff(*pair)
2438
2438
2439 else:
2439 else:
2440 q = queue()
2440 q = queue()
2441 for i in _xrange(threads):
2441 for i in _xrange(threads):
2442 q.put(None)
2442 q.put(None)
2443 ready = threading.Condition()
2443 ready = threading.Condition()
2444 done = threading.Event()
2444 done = threading.Event()
2445 for i in _xrange(threads):
2445 for i in _xrange(threads):
2446 threading.Thread(
2446 threading.Thread(
2447 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2447 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2448 ).start()
2448 ).start()
2449 q.join()
2449 q.join()
2450
2450
2451 def d():
2451 def d():
2452 for pair in textpairs:
2452 for pair in textpairs:
2453 q.put(pair)
2453 q.put(pair)
2454 for i in _xrange(threads):
2454 for i in _xrange(threads):
2455 q.put(None)
2455 q.put(None)
2456 with ready:
2456 with ready:
2457 ready.notify_all()
2457 ready.notify_all()
2458 q.join()
2458 q.join()
2459
2459
2460 timer, fm = gettimer(ui, opts)
2460 timer, fm = gettimer(ui, opts)
2461 timer(d)
2461 timer(d)
2462 fm.end()
2462 fm.end()
2463
2463
2464 if withthreads:
2464 if withthreads:
2465 done.set()
2465 done.set()
2466 for i in _xrange(threads):
2466 for i in _xrange(threads):
2467 q.put(None)
2467 q.put(None)
2468 with ready:
2468 with ready:
2469 ready.notify_all()
2469 ready.notify_all()
2470
2470
2471
2471
2472 @command(
2472 @command(
2473 b'perf::unidiff|perfunidiff',
2473 b'perf::unidiff|perfunidiff',
2474 revlogopts
2474 revlogopts
2475 + formatteropts
2475 + formatteropts
2476 + [
2476 + [
2477 (
2477 (
2478 b'',
2478 b'',
2479 b'count',
2479 b'count',
2480 1,
2480 1,
2481 b'number of revisions to test (when using --startrev)',
2481 b'number of revisions to test (when using --startrev)',
2482 ),
2482 ),
2483 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
2483 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
2484 ],
2484 ],
2485 b'-c|-m|FILE REV',
2485 b'-c|-m|FILE REV',
2486 )
2486 )
2487 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
2487 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
2488 """benchmark a unified diff between revisions
2488 """benchmark a unified diff between revisions
2489
2489
2490 This doesn't include any copy tracing - it's just a unified diff
2490 This doesn't include any copy tracing - it's just a unified diff
2491 of the texts.
2491 of the texts.
2492
2492
2493 By default, benchmark a diff between its delta parent and itself.
2493 By default, benchmark a diff between its delta parent and itself.
2494
2494
2495 With ``--count``, benchmark diffs between delta parents and self for N
2495 With ``--count``, benchmark diffs between delta parents and self for N
2496 revisions starting at the specified revision.
2496 revisions starting at the specified revision.
2497
2497
2498 With ``--alldata``, assume the requested revision is a changeset and
2498 With ``--alldata``, assume the requested revision is a changeset and
2499 measure diffs for all changes related to that changeset (manifest
2499 measure diffs for all changes related to that changeset (manifest
2500 and filelogs).
2500 and filelogs).
2501 """
2501 """
2502 opts = _byteskwargs(opts)
2502 opts = _byteskwargs(opts)
2503 if opts[b'alldata']:
2503 if opts[b'alldata']:
2504 opts[b'changelog'] = True
2504 opts[b'changelog'] = True
2505
2505
2506 if opts.get(b'changelog') or opts.get(b'manifest'):
2506 if opts.get(b'changelog') or opts.get(b'manifest'):
2507 file_, rev = None, file_
2507 file_, rev = None, file_
2508 elif rev is None:
2508 elif rev is None:
2509 raise error.CommandError(b'perfunidiff', b'invalid arguments')
2509 raise error.CommandError(b'perfunidiff', b'invalid arguments')
2510
2510
2511 textpairs = []
2511 textpairs = []
2512
2512
2513 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
2513 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
2514
2514
2515 startrev = r.rev(r.lookup(rev))
2515 startrev = r.rev(r.lookup(rev))
2516 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2516 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2517 if opts[b'alldata']:
2517 if opts[b'alldata']:
2518 # Load revisions associated with changeset.
2518 # Load revisions associated with changeset.
2519 ctx = repo[rev]
2519 ctx = repo[rev]
2520 mtext = _manifestrevision(repo, ctx.manifestnode())
2520 mtext = _manifestrevision(repo, ctx.manifestnode())
2521 for pctx in ctx.parents():
2521 for pctx in ctx.parents():
2522 pman = _manifestrevision(repo, pctx.manifestnode())
2522 pman = _manifestrevision(repo, pctx.manifestnode())
2523 textpairs.append((pman, mtext))
2523 textpairs.append((pman, mtext))
2524
2524
2525 # Load filelog revisions by iterating manifest delta.
2525 # Load filelog revisions by iterating manifest delta.
2526 man = ctx.manifest()
2526 man = ctx.manifest()
2527 pman = ctx.p1().manifest()
2527 pman = ctx.p1().manifest()
2528 for filename, change in pman.diff(man).items():
2528 for filename, change in pman.diff(man).items():
2529 fctx = repo.file(filename)
2529 fctx = repo.file(filename)
2530 f1 = fctx.revision(change[0][0] or -1)
2530 f1 = fctx.revision(change[0][0] or -1)
2531 f2 = fctx.revision(change[1][0] or -1)
2531 f2 = fctx.revision(change[1][0] or -1)
2532 textpairs.append((f1, f2))
2532 textpairs.append((f1, f2))
2533 else:
2533 else:
2534 dp = r.deltaparent(rev)
2534 dp = r.deltaparent(rev)
2535 textpairs.append((r.revision(dp), r.revision(rev)))
2535 textpairs.append((r.revision(dp), r.revision(rev)))
2536
2536
2537 def d():
2537 def d():
2538 for left, right in textpairs:
2538 for left, right in textpairs:
2539 # The date strings don't matter, so we pass empty strings.
2539 # The date strings don't matter, so we pass empty strings.
2540 headerlines, hunks = mdiff.unidiff(
2540 headerlines, hunks = mdiff.unidiff(
2541 left, b'', right, b'', b'left', b'right', binary=False
2541 left, b'', right, b'', b'left', b'right', binary=False
2542 )
2542 )
2543 # consume iterators in roughly the way patch.py does
2543 # consume iterators in roughly the way patch.py does
2544 b'\n'.join(headerlines)
2544 b'\n'.join(headerlines)
2545 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2545 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2546
2546
2547 timer, fm = gettimer(ui, opts)
2547 timer, fm = gettimer(ui, opts)
2548 timer(d)
2548 timer(d)
2549 fm.end()
2549 fm.end()
2550
2550
2551
2551
2552 @command(b'perf::diffwd|perfdiffwd', formatteropts)
2552 @command(b'perf::diffwd|perfdiffwd', formatteropts)
2553 def perfdiffwd(ui, repo, **opts):
2553 def perfdiffwd(ui, repo, **opts):
2554 """Profile diff of working directory changes"""
2554 """Profile diff of working directory changes"""
2555 opts = _byteskwargs(opts)
2555 opts = _byteskwargs(opts)
2556 timer, fm = gettimer(ui, opts)
2556 timer, fm = gettimer(ui, opts)
2557 options = {
2557 options = {
2558 'w': 'ignore_all_space',
2558 'w': 'ignore_all_space',
2559 'b': 'ignore_space_change',
2559 'b': 'ignore_space_change',
2560 'B': 'ignore_blank_lines',
2560 'B': 'ignore_blank_lines',
2561 }
2561 }
2562
2562
2563 for diffopt in ('', 'w', 'b', 'B', 'wB'):
2563 for diffopt in ('', 'w', 'b', 'B', 'wB'):
2564 opts = {options[c]: b'1' for c in diffopt}
2564 opts = {options[c]: b'1' for c in diffopt}
2565
2565
2566 def d():
2566 def d():
2567 ui.pushbuffer()
2567 ui.pushbuffer()
2568 commands.diff(ui, repo, **opts)
2568 commands.diff(ui, repo, **opts)
2569 ui.popbuffer()
2569 ui.popbuffer()
2570
2570
2571 diffopt = diffopt.encode('ascii')
2571 diffopt = diffopt.encode('ascii')
2572 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
2572 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
2573 timer(d, title=title)
2573 timer(d, title=title)
2574 fm.end()
2574 fm.end()
2575
2575
2576
2576
2577 @command(
2577 @command(
2578 b'perf::revlogindex|perfrevlogindex',
2578 b'perf::revlogindex|perfrevlogindex',
2579 revlogopts + formatteropts,
2579 revlogopts + formatteropts,
2580 b'-c|-m|FILE',
2580 b'-c|-m|FILE',
2581 )
2581 )
2582 def perfrevlogindex(ui, repo, file_=None, **opts):
2582 def perfrevlogindex(ui, repo, file_=None, **opts):
2583 """Benchmark operations against a revlog index.
2583 """Benchmark operations against a revlog index.
2584
2584
2585 This tests constructing a revlog instance, reading index data,
2585 This tests constructing a revlog instance, reading index data,
2586 parsing index data, and performing various operations related to
2586 parsing index data, and performing various operations related to
2587 index data.
2587 index data.
2588 """
2588 """
2589
2589
2590 opts = _byteskwargs(opts)
2590 opts = _byteskwargs(opts)
2591
2591
2592 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
2592 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
2593
2593
2594 opener = getattr(rl, 'opener') # trick linter
2594 opener = getattr(rl, 'opener') # trick linter
2595 indexfile = rl.indexfile
2595 indexfile = rl.indexfile
2596 data = opener.read(indexfile)
2596 data = opener.read(indexfile)
2597
2597
2598 header = struct.unpack(b'>I', data[0:4])[0]
2598 header = struct.unpack(b'>I', data[0:4])[0]
2599 version = header & 0xFFFF
2599 version = header & 0xFFFF
2600 if version == 1:
2600 if version == 1:
2601 revlogio = revlog.revlogio()
2601 revlogio = revlog.revlogio()
2602 inline = header & (1 << 16)
2602 inline = header & (1 << 16)
2603 else:
2603 else:
2604 raise error.Abort(b'unsupported revlog version: %d' % version)
2604 raise error.Abort(b'unsupported revlog version: %d' % version)
2605
2605
2606 rllen = len(rl)
2606 rllen = len(rl)
2607
2607
2608 node0 = rl.node(0)
2608 node0 = rl.node(0)
2609 node25 = rl.node(rllen // 4)
2609 node25 = rl.node(rllen // 4)
2610 node50 = rl.node(rllen // 2)
2610 node50 = rl.node(rllen // 2)
2611 node75 = rl.node(rllen // 4 * 3)
2611 node75 = rl.node(rllen // 4 * 3)
2612 node100 = rl.node(rllen - 1)
2612 node100 = rl.node(rllen - 1)
2613
2613
2614 allrevs = range(rllen)
2614 allrevs = range(rllen)
2615 allrevsrev = list(reversed(allrevs))
2615 allrevsrev = list(reversed(allrevs))
2616 allnodes = [rl.node(rev) for rev in range(rllen)]
2616 allnodes = [rl.node(rev) for rev in range(rllen)]
2617 allnodesrev = list(reversed(allnodes))
2617 allnodesrev = list(reversed(allnodes))
2618
2618
2619 def constructor():
2619 def constructor():
2620 revlog.revlog(opener, indexfile)
2620 revlog.revlog(opener, indexfile)
2621
2621
2622 def read():
2622 def read():
2623 with opener(indexfile) as fh:
2623 with opener(indexfile) as fh:
2624 fh.read()
2624 fh.read()
2625
2625
2626 def parseindex():
2626 def parseindex():
2627 revlogio.parseindex(data, inline)
2627 revlogio.parseindex(data, inline)
2628
2628
2629 def getentry(revornode):
2629 def getentry(revornode):
2630 index = revlogio.parseindex(data, inline)[0]
2630 index = revlogio.parseindex(data, inline)[0]
2631 index[revornode]
2631 index[revornode]
2632
2632
2633 def getentries(revs, count=1):
2633 def getentries(revs, count=1):
2634 index = revlogio.parseindex(data, inline)[0]
2634 index = revlogio.parseindex(data, inline)[0]
2635
2635
2636 for i in range(count):
2636 for i in range(count):
2637 for rev in revs:
2637 for rev in revs:
2638 index[rev]
2638 index[rev]
2639
2639
2640 def resolvenode(node):
2640 def resolvenode(node):
2641 index = revlogio.parseindex(data, inline)[0]
2641 index = revlogio.parseindex(data, inline)[0]
2642 rev = getattr(index, 'rev', None)
2642 rev = getattr(index, 'rev', None)
2643 if rev is None:
2643 if rev is None:
2644 nodemap = getattr(
2644 nodemap = getattr(
2645 revlogio.parseindex(data, inline)[0], 'nodemap', None
2645 revlogio.parseindex(data, inline)[0], 'nodemap', None
2646 )
2646 )
2647 # This only works for the C code.
2647 # This only works for the C code.
2648 if nodemap is None:
2648 if nodemap is None:
2649 return
2649 return
2650 rev = nodemap.__getitem__
2650 rev = nodemap.__getitem__
2651
2651
2652 try:
2652 try:
2653 rev(node)
2653 rev(node)
2654 except error.RevlogError:
2654 except error.RevlogError:
2655 pass
2655 pass
2656
2656
2657 def resolvenodes(nodes, count=1):
2657 def resolvenodes(nodes, count=1):
2658 index = revlogio.parseindex(data, inline)[0]
2658 index = revlogio.parseindex(data, inline)[0]
2659 rev = getattr(index, 'rev', None)
2659 rev = getattr(index, 'rev', None)
2660 if rev is None:
2660 if rev is None:
2661 nodemap = getattr(
2661 nodemap = getattr(
2662 revlogio.parseindex(data, inline)[0], 'nodemap', None
2662 revlogio.parseindex(data, inline)[0], 'nodemap', None
2663 )
2663 )
2664 # This only works for the C code.
2664 # This only works for the C code.
2665 if nodemap is None:
2665 if nodemap is None:
2666 return
2666 return
2667 rev = nodemap.__getitem__
2667 rev = nodemap.__getitem__
2668
2668
2669 for i in range(count):
2669 for i in range(count):
2670 for node in nodes:
2670 for node in nodes:
2671 try:
2671 try:
2672 rev(node)
2672 rev(node)
2673 except error.RevlogError:
2673 except error.RevlogError:
2674 pass
2674 pass
2675
2675
2676 benches = [
2676 benches = [
2677 (constructor, b'revlog constructor'),
2677 (constructor, b'revlog constructor'),
2678 (read, b'read'),
2678 (read, b'read'),
2679 (parseindex, b'create index object'),
2679 (parseindex, b'create index object'),
2680 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2680 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2681 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2681 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2682 (lambda: resolvenode(node0), b'look up node at rev 0'),
2682 (lambda: resolvenode(node0), b'look up node at rev 0'),
2683 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2683 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2684 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2684 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2685 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2685 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2686 (lambda: resolvenode(node100), b'look up node at tip'),
2686 (lambda: resolvenode(node100), b'look up node at tip'),
2687 # 2x variation is to measure caching impact.
2687 # 2x variation is to measure caching impact.
2688 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
2688 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
2689 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
2689 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
2690 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
2690 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
2691 (
2691 (
2692 lambda: resolvenodes(allnodesrev, 2),
2692 lambda: resolvenodes(allnodesrev, 2),
2693 b'look up all nodes 2x (reverse)',
2693 b'look up all nodes 2x (reverse)',
2694 ),
2694 ),
2695 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
2695 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
2696 (
2696 (
2697 lambda: getentries(allrevs, 2),
2697 lambda: getentries(allrevs, 2),
2698 b'retrieve all index entries 2x (forward)',
2698 b'retrieve all index entries 2x (forward)',
2699 ),
2699 ),
2700 (
2700 (
2701 lambda: getentries(allrevsrev),
2701 lambda: getentries(allrevsrev),
2702 b'retrieve all index entries (reverse)',
2702 b'retrieve all index entries (reverse)',
2703 ),
2703 ),
2704 (
2704 (
2705 lambda: getentries(allrevsrev, 2),
2705 lambda: getentries(allrevsrev, 2),
2706 b'retrieve all index entries 2x (reverse)',
2706 b'retrieve all index entries 2x (reverse)',
2707 ),
2707 ),
2708 ]
2708 ]
2709
2709
2710 for fn, title in benches:
2710 for fn, title in benches:
2711 timer, fm = gettimer(ui, opts)
2711 timer, fm = gettimer(ui, opts)
2712 timer(fn, title=title)
2712 timer(fn, title=title)
2713 fm.end()
2713 fm.end()
2714
2714
2715
2715
2716 @command(
2716 @command(
2717 b'perf::revlogrevisions|perfrevlogrevisions',
2717 b'perf::revlogrevisions|perfrevlogrevisions',
2718 revlogopts
2718 revlogopts
2719 + formatteropts
2719 + formatteropts
2720 + [
2720 + [
2721 (b'd', b'dist', 100, b'distance between the revisions'),
2721 (b'd', b'dist', 100, b'distance between the revisions'),
2722 (b's', b'startrev', 0, b'revision to start reading at'),
2722 (b's', b'startrev', 0, b'revision to start reading at'),
2723 (b'', b'reverse', False, b'read in reverse'),
2723 (b'', b'reverse', False, b'read in reverse'),
2724 ],
2724 ],
2725 b'-c|-m|FILE',
2725 b'-c|-m|FILE',
2726 )
2726 )
2727 def perfrevlogrevisions(
2727 def perfrevlogrevisions(
2728 ui, repo, file_=None, startrev=0, reverse=False, **opts
2728 ui, repo, file_=None, startrev=0, reverse=False, **opts
2729 ):
2729 ):
2730 """Benchmark reading a series of revisions from a revlog.
2730 """Benchmark reading a series of revisions from a revlog.
2731
2731
2732 By default, we read every ``-d/--dist`` revision from 0 to tip of
2732 By default, we read every ``-d/--dist`` revision from 0 to tip of
2733 the specified revlog.
2733 the specified revlog.
2734
2734
2735 The start revision can be defined via ``-s/--startrev``.
2735 The start revision can be defined via ``-s/--startrev``.
2736 """
2736 """
2737 opts = _byteskwargs(opts)
2737 opts = _byteskwargs(opts)
2738
2738
2739 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2739 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2740 rllen = getlen(ui)(rl)
2740 rllen = getlen(ui)(rl)
2741
2741
2742 if startrev < 0:
2742 if startrev < 0:
2743 startrev = rllen + startrev
2743 startrev = rllen + startrev
2744
2744
2745 def d():
2745 def d():
2746 rl.clearcaches()
2746 rl.clearcaches()
2747
2747
2748 beginrev = startrev
2748 beginrev = startrev
2749 endrev = rllen
2749 endrev = rllen
2750 dist = opts[b'dist']
2750 dist = opts[b'dist']
2751
2751
2752 if reverse:
2752 if reverse:
2753 beginrev, endrev = endrev - 1, beginrev - 1
2753 beginrev, endrev = endrev - 1, beginrev - 1
2754 dist = -1 * dist
2754 dist = -1 * dist
2755
2755
2756 for x in _xrange(beginrev, endrev, dist):
2756 for x in _xrange(beginrev, endrev, dist):
2757 # Old revisions don't support passing int.
2757 # Old revisions don't support passing int.
2758 n = rl.node(x)
2758 n = rl.node(x)
2759 rl.revision(n)
2759 rl.revision(n)
2760
2760
2761 timer, fm = gettimer(ui, opts)
2761 timer, fm = gettimer(ui, opts)
2762 timer(d)
2762 timer(d)
2763 fm.end()
2763 fm.end()
2764
2764
2765
2765
2766 @command(
2766 @command(
2767 b'perf::revlogwrite|perfrevlogwrite',
2767 b'perf::revlogwrite|perfrevlogwrite',
2768 revlogopts
2768 revlogopts
2769 + formatteropts
2769 + formatteropts
2770 + [
2770 + [
2771 (b's', b'startrev', 1000, b'revision to start writing at'),
2771 (b's', b'startrev', 1000, b'revision to start writing at'),
2772 (b'', b'stoprev', -1, b'last revision to write'),
2772 (b'', b'stoprev', -1, b'last revision to write'),
2773 (b'', b'count', 3, b'number of passes to perform'),
2773 (b'', b'count', 3, b'number of passes to perform'),
2774 (b'', b'details', False, b'print timing for every revisions tested'),
2774 (b'', b'details', False, b'print timing for every revisions tested'),
2775 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2775 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2776 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2776 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2777 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2777 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2778 ],
2778 ],
2779 b'-c|-m|FILE',
2779 b'-c|-m|FILE',
2780 )
2780 )
2781 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2781 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2782 """Benchmark writing a series of revisions to a revlog.
2782 """Benchmark writing a series of revisions to a revlog.
2783
2783
2784 Possible source values are:
2784 Possible source values are:
2785 * `full`: add from a full text (default).
2785 * `full`: add from a full text (default).
2786 * `parent-1`: add from a delta to the first parent
2786 * `parent-1`: add from a delta to the first parent
2787 * `parent-2`: add from a delta to the second parent if it exists
2787 * `parent-2`: add from a delta to the second parent if it exists
2788 (use a delta from the first parent otherwise)
2788 (use a delta from the first parent otherwise)
2789 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2789 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2790 * `storage`: add from the existing precomputed deltas
2790 * `storage`: add from the existing precomputed deltas
2791
2791
2792 Note: This performance command measures performance in a custom way. As a
2792 Note: This performance command measures performance in a custom way. As a
2793 result some of the global configuration of the 'perf' command does not
2793 result some of the global configuration of the 'perf' command does not
2794 apply to it:
2794 apply to it:
2795
2795
2796 * ``pre-run``: disabled
2796 * ``pre-run``: disabled
2797
2797
2798 * ``profile-benchmark``: disabled
2798 * ``profile-benchmark``: disabled
2799
2799
2800 * ``run-limits``: disabled use --count instead
2800 * ``run-limits``: disabled use --count instead
2801 """
2801 """
2802 opts = _byteskwargs(opts)
2802 opts = _byteskwargs(opts)
2803
2803
2804 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2804 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2805 rllen = getlen(ui)(rl)
2805 rllen = getlen(ui)(rl)
2806 if startrev < 0:
2806 if startrev < 0:
2807 startrev = rllen + startrev
2807 startrev = rllen + startrev
2808 if stoprev < 0:
2808 if stoprev < 0:
2809 stoprev = rllen + stoprev
2809 stoprev = rllen + stoprev
2810
2810
2811 lazydeltabase = opts['lazydeltabase']
2811 lazydeltabase = opts['lazydeltabase']
2812 source = opts['source']
2812 source = opts['source']
2813 clearcaches = opts['clear_caches']
2813 clearcaches = opts['clear_caches']
2814 validsource = (
2814 validsource = (
2815 b'full',
2815 b'full',
2816 b'parent-1',
2816 b'parent-1',
2817 b'parent-2',
2817 b'parent-2',
2818 b'parent-smallest',
2818 b'parent-smallest',
2819 b'storage',
2819 b'storage',
2820 )
2820 )
2821 if source not in validsource:
2821 if source not in validsource:
2822 raise error.Abort('invalid source type: %s' % source)
2822 raise error.Abort('invalid source type: %s' % source)
2823
2823
2824 ### actually gather results
2824 ### actually gather results
2825 count = opts['count']
2825 count = opts['count']
2826 if count <= 0:
2826 if count <= 0:
2827 raise error.Abort('invalide run count: %d' % count)
2827 raise error.Abort('invalide run count: %d' % count)
2828 allresults = []
2828 allresults = []
2829 for c in range(count):
2829 for c in range(count):
2830 timing = _timeonewrite(
2830 timing = _timeonewrite(
2831 ui,
2831 ui,
2832 rl,
2832 rl,
2833 source,
2833 source,
2834 startrev,
2834 startrev,
2835 stoprev,
2835 stoprev,
2836 c + 1,
2836 c + 1,
2837 lazydeltabase=lazydeltabase,
2837 lazydeltabase=lazydeltabase,
2838 clearcaches=clearcaches,
2838 clearcaches=clearcaches,
2839 )
2839 )
2840 allresults.append(timing)
2840 allresults.append(timing)
2841
2841
2842 ### consolidate the results in a single list
2842 ### consolidate the results in a single list
2843 results = []
2843 results = []
2844 for idx, (rev, t) in enumerate(allresults[0]):
2844 for idx, (rev, t) in enumerate(allresults[0]):
2845 ts = [t]
2845 ts = [t]
2846 for other in allresults[1:]:
2846 for other in allresults[1:]:
2847 orev, ot = other[idx]
2847 orev, ot = other[idx]
2848 assert orev == rev
2848 assert orev == rev
2849 ts.append(ot)
2849 ts.append(ot)
2850 results.append((rev, ts))
2850 results.append((rev, ts))
2851 resultcount = len(results)
2851 resultcount = len(results)
2852
2852
2853 ### Compute and display relevant statistics
2853 ### Compute and display relevant statistics
2854
2854
2855 # get a formatter
2855 # get a formatter
2856 fm = ui.formatter(b'perf', opts)
2856 fm = ui.formatter(b'perf', opts)
2857 displayall = ui.configbool(b"perf", b"all-timing", False)
2857 displayall = ui.configbool(b"perf", b"all-timing", False)
2858
2858
2859 # print individual details if requested
2859 # print individual details if requested
2860 if opts['details']:
2860 if opts['details']:
2861 for idx, item in enumerate(results, 1):
2861 for idx, item in enumerate(results, 1):
2862 rev, data = item
2862 rev, data = item
2863 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2863 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2864 formatone(fm, data, title=title, displayall=displayall)
2864 formatone(fm, data, title=title, displayall=displayall)
2865
2865
2866 # sorts results by median time
2866 # sorts results by median time
2867 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2867 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2868 # list of (name, index) to display)
2868 # list of (name, index) to display)
2869 relevants = [
2869 relevants = [
2870 ("min", 0),
2870 ("min", 0),
2871 ("10%", resultcount * 10 // 100),
2871 ("10%", resultcount * 10 // 100),
2872 ("25%", resultcount * 25 // 100),
2872 ("25%", resultcount * 25 // 100),
2873 ("50%", resultcount * 70 // 100),
2873 ("50%", resultcount * 70 // 100),
2874 ("75%", resultcount * 75 // 100),
2874 ("75%", resultcount * 75 // 100),
2875 ("90%", resultcount * 90 // 100),
2875 ("90%", resultcount * 90 // 100),
2876 ("95%", resultcount * 95 // 100),
2876 ("95%", resultcount * 95 // 100),
2877 ("99%", resultcount * 99 // 100),
2877 ("99%", resultcount * 99 // 100),
2878 ("99.9%", resultcount * 999 // 1000),
2878 ("99.9%", resultcount * 999 // 1000),
2879 ("99.99%", resultcount * 9999 // 10000),
2879 ("99.99%", resultcount * 9999 // 10000),
2880 ("99.999%", resultcount * 99999 // 100000),
2880 ("99.999%", resultcount * 99999 // 100000),
2881 ("max", -1),
2881 ("max", -1),
2882 ]
2882 ]
2883 if not ui.quiet:
2883 if not ui.quiet:
2884 for name, idx in relevants:
2884 for name, idx in relevants:
2885 data = results[idx]
2885 data = results[idx]
2886 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2886 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2887 formatone(fm, data[1], title=title, displayall=displayall)
2887 formatone(fm, data[1], title=title, displayall=displayall)
2888
2888
2889 # XXX summing that many float will not be very precise, we ignore this fact
2889 # XXX summing that many float will not be very precise, we ignore this fact
2890 # for now
2890 # for now
2891 totaltime = []
2891 totaltime = []
2892 for item in allresults:
2892 for item in allresults:
2893 totaltime.append(
2893 totaltime.append(
2894 (
2894 (
2895 sum(x[1][0] for x in item),
2895 sum(x[1][0] for x in item),
2896 sum(x[1][1] for x in item),
2896 sum(x[1][1] for x in item),
2897 sum(x[1][2] for x in item),
2897 sum(x[1][2] for x in item),
2898 )
2898 )
2899 )
2899 )
2900 formatone(
2900 formatone(
2901 fm,
2901 fm,
2902 totaltime,
2902 totaltime,
2903 title="total time (%d revs)" % resultcount,
2903 title="total time (%d revs)" % resultcount,
2904 displayall=displayall,
2904 displayall=displayall,
2905 )
2905 )
2906 fm.end()
2906 fm.end()
2907
2907
2908
2908
2909 class _faketr(object):
2909 class _faketr(object):
2910 def add(s, x, y, z=None):
2910 def add(s, x, y, z=None):
2911 return None
2911 return None
2912
2912
2913
2913
2914 def _timeonewrite(
2914 def _timeonewrite(
2915 ui,
2915 ui,
2916 orig,
2916 orig,
2917 source,
2917 source,
2918 startrev,
2918 startrev,
2919 stoprev,
2919 stoprev,
2920 runidx=None,
2920 runidx=None,
2921 lazydeltabase=True,
2921 lazydeltabase=True,
2922 clearcaches=True,
2922 clearcaches=True,
2923 ):
2923 ):
2924 timings = []
2924 timings = []
2925 tr = _faketr()
2925 tr = _faketr()
2926 with _temprevlog(ui, orig, startrev) as dest:
2926 with _temprevlog(ui, orig, startrev) as dest:
2927 dest._lazydeltabase = lazydeltabase
2927 dest._lazydeltabase = lazydeltabase
2928 revs = list(orig.revs(startrev, stoprev))
2928 revs = list(orig.revs(startrev, stoprev))
2929 total = len(revs)
2929 total = len(revs)
2930 topic = 'adding'
2930 topic = 'adding'
2931 if runidx is not None:
2931 if runidx is not None:
2932 topic += ' (run #%d)' % runidx
2932 topic += ' (run #%d)' % runidx
2933 # Support both old and new progress API
2933 # Support both old and new progress API
2934 if util.safehasattr(ui, 'makeprogress'):
2934 if util.safehasattr(ui, 'makeprogress'):
2935 progress = ui.makeprogress(topic, unit='revs', total=total)
2935 progress = ui.makeprogress(topic, unit='revs', total=total)
2936
2936
2937 def updateprogress(pos):
2937 def updateprogress(pos):
2938 progress.update(pos)
2938 progress.update(pos)
2939
2939
2940 def completeprogress():
2940 def completeprogress():
2941 progress.complete()
2941 progress.complete()
2942
2942
2943 else:
2943 else:
2944
2944
2945 def updateprogress(pos):
2945 def updateprogress(pos):
2946 ui.progress(topic, pos, unit='revs', total=total)
2946 ui.progress(topic, pos, unit='revs', total=total)
2947
2947
2948 def completeprogress():
2948 def completeprogress():
2949 ui.progress(topic, None, unit='revs', total=total)
2949 ui.progress(topic, None, unit='revs', total=total)
2950
2950
2951 for idx, rev in enumerate(revs):
2951 for idx, rev in enumerate(revs):
2952 updateprogress(idx)
2952 updateprogress(idx)
2953 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2953 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2954 if clearcaches:
2954 if clearcaches:
2955 dest.index.clearcaches()
2955 dest.index.clearcaches()
2956 dest.clearcaches()
2956 dest.clearcaches()
2957 with timeone() as r:
2957 with timeone() as r:
2958 dest.addrawrevision(*addargs, **addkwargs)
2958 dest.addrawrevision(*addargs, **addkwargs)
2959 timings.append((rev, r[0]))
2959 timings.append((rev, r[0]))
2960 updateprogress(total)
2960 updateprogress(total)
2961 completeprogress()
2961 completeprogress()
2962 return timings
2962 return timings
2963
2963
2964
2964
2965 def _getrevisionseed(orig, rev, tr, source):
2965 def _getrevisionseed(orig, rev, tr, source):
2966 from mercurial.node import nullid
2966 from mercurial.node import nullid
2967
2967
2968 linkrev = orig.linkrev(rev)
2968 linkrev = orig.linkrev(rev)
2969 node = orig.node(rev)
2969 node = orig.node(rev)
2970 p1, p2 = orig.parents(node)
2970 p1, p2 = orig.parents(node)
2971 flags = orig.flags(rev)
2971 flags = orig.flags(rev)
2972 cachedelta = None
2972 cachedelta = None
2973 text = None
2973 text = None
2974
2974
2975 if source == b'full':
2975 if source == b'full':
2976 text = orig.revision(rev)
2976 text = orig.revision(rev)
2977 elif source == b'parent-1':
2977 elif source == b'parent-1':
2978 baserev = orig.rev(p1)
2978 baserev = orig.rev(p1)
2979 cachedelta = (baserev, orig.revdiff(p1, rev))
2979 cachedelta = (baserev, orig.revdiff(p1, rev))
2980 elif source == b'parent-2':
2980 elif source == b'parent-2':
2981 parent = p2
2981 parent = p2
2982 if p2 == nullid:
2982 if p2 == nullid:
2983 parent = p1
2983 parent = p1
2984 baserev = orig.rev(parent)
2984 baserev = orig.rev(parent)
2985 cachedelta = (baserev, orig.revdiff(parent, rev))
2985 cachedelta = (baserev, orig.revdiff(parent, rev))
2986 elif source == b'parent-smallest':
2986 elif source == b'parent-smallest':
2987 p1diff = orig.revdiff(p1, rev)
2987 p1diff = orig.revdiff(p1, rev)
2988 parent = p1
2988 parent = p1
2989 diff = p1diff
2989 diff = p1diff
2990 if p2 != nullid:
2990 if p2 != nullid:
2991 p2diff = orig.revdiff(p2, rev)
2991 p2diff = orig.revdiff(p2, rev)
2992 if len(p1diff) > len(p2diff):
2992 if len(p1diff) > len(p2diff):
2993 parent = p2
2993 parent = p2
2994 diff = p2diff
2994 diff = p2diff
2995 baserev = orig.rev(parent)
2995 baserev = orig.rev(parent)
2996 cachedelta = (baserev, diff)
2996 cachedelta = (baserev, diff)
2997 elif source == b'storage':
2997 elif source == b'storage':
2998 baserev = orig.deltaparent(rev)
2998 baserev = orig.deltaparent(rev)
2999 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
2999 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3000
3000
3001 return (
3001 return (
3002 (text, tr, linkrev, p1, p2),
3002 (text, tr, linkrev, p1, p2),
3003 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3003 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3004 )
3004 )
3005
3005
3006
3006
3007 @contextlib.contextmanager
3007 @contextlib.contextmanager
3008 def _temprevlog(ui, orig, truncaterev):
3008 def _temprevlog(ui, orig, truncaterev):
3009 from mercurial import vfs as vfsmod
3009 from mercurial import vfs as vfsmod
3010
3010
3011 if orig._inline:
3011 if orig._inline:
3012 raise error.Abort('not supporting inline revlog (yet)')
3012 raise error.Abort('not supporting inline revlog (yet)')
3013 revlogkwargs = {}
3013 revlogkwargs = {}
3014 k = 'upperboundcomp'
3014 k = 'upperboundcomp'
3015 if util.safehasattr(orig, k):
3015 if util.safehasattr(orig, k):
3016 revlogkwargs[k] = getattr(orig, k)
3016 revlogkwargs[k] = getattr(orig, k)
3017
3017
3018 origindexpath = orig.opener.join(orig.indexfile)
3018 origindexpath = orig.opener.join(orig.indexfile)
3019 origdatapath = orig.opener.join(orig.datafile)
3019 origdatapath = orig.opener.join(orig.datafile)
3020 indexname = 'revlog.i'
3020 indexname = 'revlog.i'
3021 dataname = 'revlog.d'
3021 dataname = 'revlog.d'
3022
3022
3023 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3023 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3024 try:
3024 try:
3025 # copy the data file in a temporary directory
3025 # copy the data file in a temporary directory
3026 ui.debug('copying data in %s\n' % tmpdir)
3026 ui.debug('copying data in %s\n' % tmpdir)
3027 destindexpath = os.path.join(tmpdir, 'revlog.i')
3027 destindexpath = os.path.join(tmpdir, 'revlog.i')
3028 destdatapath = os.path.join(tmpdir, 'revlog.d')
3028 destdatapath = os.path.join(tmpdir, 'revlog.d')
3029 shutil.copyfile(origindexpath, destindexpath)
3029 shutil.copyfile(origindexpath, destindexpath)
3030 shutil.copyfile(origdatapath, destdatapath)
3030 shutil.copyfile(origdatapath, destdatapath)
3031
3031
3032 # remove the data we want to add again
3032 # remove the data we want to add again
3033 ui.debug('truncating data to be rewritten\n')
3033 ui.debug('truncating data to be rewritten\n')
3034 with open(destindexpath, 'ab') as index:
3034 with open(destindexpath, 'ab') as index:
3035 index.seek(0)
3035 index.seek(0)
3036 index.truncate(truncaterev * orig._io.size)
3036 index.truncate(truncaterev * orig._io.size)
3037 with open(destdatapath, 'ab') as data:
3037 with open(destdatapath, 'ab') as data:
3038 data.seek(0)
3038 data.seek(0)
3039 data.truncate(orig.start(truncaterev))
3039 data.truncate(orig.start(truncaterev))
3040
3040
3041 # instantiate a new revlog from the temporary copy
3041 # instantiate a new revlog from the temporary copy
3042 ui.debug('truncating adding to be rewritten\n')
3042 ui.debug('truncating adding to be rewritten\n')
3043 vfs = vfsmod.vfs(tmpdir)
3043 vfs = vfsmod.vfs(tmpdir)
3044 vfs.options = getattr(orig.opener, 'options', None)
3044 vfs.options = getattr(orig.opener, 'options', None)
3045
3045
3046 dest = revlog.revlog(
3046 dest = revlog.revlog(
3047 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3047 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3048 )
3048 )
3049 if dest._inline:
3049 if dest._inline:
3050 raise error.Abort('not supporting inline revlog (yet)')
3050 raise error.Abort('not supporting inline revlog (yet)')
3051 # make sure internals are initialized
3051 # make sure internals are initialized
3052 dest.revision(len(dest) - 1)
3052 dest.revision(len(dest) - 1)
3053 yield dest
3053 yield dest
3054 del dest, vfs
3054 del dest, vfs
3055 finally:
3055 finally:
3056 shutil.rmtree(tmpdir, True)
3056 shutil.rmtree(tmpdir, True)
3057
3057
3058
3058
3059 @command(
3059 @command(
3060 b'perf::revlogchunks|perfrevlogchunks',
3060 b'perf::revlogchunks|perfrevlogchunks',
3061 revlogopts
3061 revlogopts
3062 + formatteropts
3062 + formatteropts
3063 + [
3063 + [
3064 (b'e', b'engines', b'', b'compression engines to use'),
3064 (b'e', b'engines', b'', b'compression engines to use'),
3065 (b's', b'startrev', 0, b'revision to start at'),
3065 (b's', b'startrev', 0, b'revision to start at'),
3066 ],
3066 ],
3067 b'-c|-m|FILE',
3067 b'-c|-m|FILE',
3068 )
3068 )
3069 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3069 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3070 """Benchmark operations on revlog chunks.
3070 """Benchmark operations on revlog chunks.
3071
3071
3072 Logically, each revlog is a collection of fulltext revisions. However,
3072 Logically, each revlog is a collection of fulltext revisions. However,
3073 stored within each revlog are "chunks" of possibly compressed data. This
3073 stored within each revlog are "chunks" of possibly compressed data. This
3074 data needs to be read and decompressed or compressed and written.
3074 data needs to be read and decompressed or compressed and written.
3075
3075
3076 This command measures the time it takes to read+decompress and recompress
3076 This command measures the time it takes to read+decompress and recompress
3077 chunks in a revlog. It effectively isolates I/O and compression performance.
3077 chunks in a revlog. It effectively isolates I/O and compression performance.
3078 For measurements of higher-level operations like resolving revisions,
3078 For measurements of higher-level operations like resolving revisions,
3079 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3079 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3080 """
3080 """
3081 opts = _byteskwargs(opts)
3081 opts = _byteskwargs(opts)
3082
3082
3083 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3083 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3084
3084
3085 # _chunkraw was renamed to _getsegmentforrevs.
3085 # _chunkraw was renamed to _getsegmentforrevs.
3086 try:
3086 try:
3087 segmentforrevs = rl._getsegmentforrevs
3087 segmentforrevs = rl._getsegmentforrevs
3088 except AttributeError:
3088 except AttributeError:
3089 segmentforrevs = rl._chunkraw
3089 segmentforrevs = rl._chunkraw
3090
3090
3091 # Verify engines argument.
3091 # Verify engines argument.
3092 if engines:
3092 if engines:
3093 engines = {e.strip() for e in engines.split(b',')}
3093 engines = {e.strip() for e in engines.split(b',')}
3094 for engine in engines:
3094 for engine in engines:
3095 try:
3095 try:
3096 util.compressionengines[engine]
3096 util.compressionengines[engine]
3097 except KeyError:
3097 except KeyError:
3098 raise error.Abort(b'unknown compression engine: %s' % engine)
3098 raise error.Abort(b'unknown compression engine: %s' % engine)
3099 else:
3099 else:
3100 engines = []
3100 engines = []
3101 for e in util.compengines:
3101 for e in util.compengines:
3102 engine = util.compengines[e]
3102 engine = util.compengines[e]
3103 try:
3103 try:
3104 if engine.available():
3104 if engine.available():
3105 engine.revlogcompressor().compress(b'dummy')
3105 engine.revlogcompressor().compress(b'dummy')
3106 engines.append(e)
3106 engines.append(e)
3107 except NotImplementedError:
3107 except NotImplementedError:
3108 pass
3108 pass
3109
3109
3110 revs = list(rl.revs(startrev, len(rl) - 1))
3110 revs = list(rl.revs(startrev, len(rl) - 1))
3111
3111
3112 def rlfh(rl):
3112 def rlfh(rl):
3113 if rl._inline:
3113 if rl._inline:
3114 return getsvfs(repo)(rl.indexfile)
3114 return getsvfs(repo)(rl.indexfile)
3115 else:
3115 else:
3116 return getsvfs(repo)(rl.datafile)
3116 return getsvfs(repo)(rl.datafile)
3117
3117
3118 def doread():
3118 def doread():
3119 rl.clearcaches()
3119 rl.clearcaches()
3120 for rev in revs:
3120 for rev in revs:
3121 segmentforrevs(rev, rev)
3121 segmentforrevs(rev, rev)
3122
3122
3123 def doreadcachedfh():
3123 def doreadcachedfh():
3124 rl.clearcaches()
3124 rl.clearcaches()
3125 fh = rlfh(rl)
3125 fh = rlfh(rl)
3126 for rev in revs:
3126 for rev in revs:
3127 segmentforrevs(rev, rev, df=fh)
3127 segmentforrevs(rev, rev, df=fh)
3128
3128
3129 def doreadbatch():
3129 def doreadbatch():
3130 rl.clearcaches()
3130 rl.clearcaches()
3131 segmentforrevs(revs[0], revs[-1])
3131 segmentforrevs(revs[0], revs[-1])
3132
3132
3133 def doreadbatchcachedfh():
3133 def doreadbatchcachedfh():
3134 rl.clearcaches()
3134 rl.clearcaches()
3135 fh = rlfh(rl)
3135 fh = rlfh(rl)
3136 segmentforrevs(revs[0], revs[-1], df=fh)
3136 segmentforrevs(revs[0], revs[-1], df=fh)
3137
3137
3138 def dochunk():
3138 def dochunk():
3139 rl.clearcaches()
3139 rl.clearcaches()
3140 fh = rlfh(rl)
3140 fh = rlfh(rl)
3141 for rev in revs:
3141 for rev in revs:
3142 rl._chunk(rev, df=fh)
3142 rl._chunk(rev, df=fh)
3143
3143
3144 chunks = [None]
3144 chunks = [None]
3145
3145
3146 def dochunkbatch():
3146 def dochunkbatch():
3147 rl.clearcaches()
3147 rl.clearcaches()
3148 fh = rlfh(rl)
3148 fh = rlfh(rl)
3149 # Save chunks as a side-effect.
3149 # Save chunks as a side-effect.
3150 chunks[0] = rl._chunks(revs, df=fh)
3150 chunks[0] = rl._chunks(revs, df=fh)
3151
3151
3152 def docompress(compressor):
3152 def docompress(compressor):
3153 rl.clearcaches()
3153 rl.clearcaches()
3154
3154
3155 try:
3155 try:
3156 # Swap in the requested compression engine.
3156 # Swap in the requested compression engine.
3157 oldcompressor = rl._compressor
3157 oldcompressor = rl._compressor
3158 rl._compressor = compressor
3158 rl._compressor = compressor
3159 for chunk in chunks[0]:
3159 for chunk in chunks[0]:
3160 rl.compress(chunk)
3160 rl.compress(chunk)
3161 finally:
3161 finally:
3162 rl._compressor = oldcompressor
3162 rl._compressor = oldcompressor
3163
3163
3164 benches = [
3164 benches = [
3165 (lambda: doread(), b'read'),
3165 (lambda: doread(), b'read'),
3166 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3166 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3167 (lambda: doreadbatch(), b'read batch'),
3167 (lambda: doreadbatch(), b'read batch'),
3168 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3168 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3169 (lambda: dochunk(), b'chunk'),
3169 (lambda: dochunk(), b'chunk'),
3170 (lambda: dochunkbatch(), b'chunk batch'),
3170 (lambda: dochunkbatch(), b'chunk batch'),
3171 ]
3171 ]
3172
3172
3173 for engine in sorted(engines):
3173 for engine in sorted(engines):
3174 compressor = util.compengines[engine].revlogcompressor()
3174 compressor = util.compengines[engine].revlogcompressor()
3175 benches.append(
3175 benches.append(
3176 (
3176 (
3177 functools.partial(docompress, compressor),
3177 functools.partial(docompress, compressor),
3178 b'compress w/ %s' % engine,
3178 b'compress w/ %s' % engine,
3179 )
3179 )
3180 )
3180 )
3181
3181
3182 for fn, title in benches:
3182 for fn, title in benches:
3183 timer, fm = gettimer(ui, opts)
3183 timer, fm = gettimer(ui, opts)
3184 timer(fn, title=title)
3184 timer(fn, title=title)
3185 fm.end()
3185 fm.end()
3186
3186
3187
3187
3188 @command(
3188 @command(
3189 b'perf::revlogrevision|perfrevlogrevision',
3189 b'perf::revlogrevision|perfrevlogrevision',
3190 revlogopts
3190 revlogopts
3191 + formatteropts
3191 + formatteropts
3192 + [(b'', b'cache', False, b'use caches instead of clearing')],
3192 + [(b'', b'cache', False, b'use caches instead of clearing')],
3193 b'-c|-m|FILE REV',
3193 b'-c|-m|FILE REV',
3194 )
3194 )
3195 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3195 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3196 """Benchmark obtaining a revlog revision.
3196 """Benchmark obtaining a revlog revision.
3197
3197
3198 Obtaining a revlog revision consists of roughly the following steps:
3198 Obtaining a revlog revision consists of roughly the following steps:
3199
3199
3200 1. Compute the delta chain
3200 1. Compute the delta chain
3201 2. Slice the delta chain if applicable
3201 2. Slice the delta chain if applicable
3202 3. Obtain the raw chunks for that delta chain
3202 3. Obtain the raw chunks for that delta chain
3203 4. Decompress each raw chunk
3203 4. Decompress each raw chunk
3204 5. Apply binary patches to obtain fulltext
3204 5. Apply binary patches to obtain fulltext
3205 6. Verify hash of fulltext
3205 6. Verify hash of fulltext
3206
3206
3207 This command measures the time spent in each of these phases.
3207 This command measures the time spent in each of these phases.
3208 """
3208 """
3209 opts = _byteskwargs(opts)
3209 opts = _byteskwargs(opts)
3210
3210
3211 if opts.get(b'changelog') or opts.get(b'manifest'):
3211 if opts.get(b'changelog') or opts.get(b'manifest'):
3212 file_, rev = None, file_
3212 file_, rev = None, file_
3213 elif rev is None:
3213 elif rev is None:
3214 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3214 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3215
3215
3216 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3216 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3217
3217
3218 # _chunkraw was renamed to _getsegmentforrevs.
3218 # _chunkraw was renamed to _getsegmentforrevs.
3219 try:
3219 try:
3220 segmentforrevs = r._getsegmentforrevs
3220 segmentforrevs = r._getsegmentforrevs
3221 except AttributeError:
3221 except AttributeError:
3222 segmentforrevs = r._chunkraw
3222 segmentforrevs = r._chunkraw
3223
3223
3224 node = r.lookup(rev)
3224 node = r.lookup(rev)
3225 rev = r.rev(node)
3225 rev = r.rev(node)
3226
3226
3227 def getrawchunks(data, chain):
3227 def getrawchunks(data, chain):
3228 start = r.start
3228 start = r.start
3229 length = r.length
3229 length = r.length
3230 inline = r._inline
3230 inline = r._inline
3231 try:
3232 iosize = r.index.entry_size
3233 except AttributeError:
3231 iosize = r._io.size
3234 iosize = r._io.size
3232 buffer = util.buffer
3235 buffer = util.buffer
3233
3236
3234 chunks = []
3237 chunks = []
3235 ladd = chunks.append
3238 ladd = chunks.append
3236 for idx, item in enumerate(chain):
3239 for idx, item in enumerate(chain):
3237 offset = start(item[0])
3240 offset = start(item[0])
3238 bits = data[idx]
3241 bits = data[idx]
3239 for rev in item:
3242 for rev in item:
3240 chunkstart = start(rev)
3243 chunkstart = start(rev)
3241 if inline:
3244 if inline:
3242 chunkstart += (rev + 1) * iosize
3245 chunkstart += (rev + 1) * iosize
3243 chunklength = length(rev)
3246 chunklength = length(rev)
3244 ladd(buffer(bits, chunkstart - offset, chunklength))
3247 ladd(buffer(bits, chunkstart - offset, chunklength))
3245
3248
3246 return chunks
3249 return chunks
3247
3250
3248 def dodeltachain(rev):
3251 def dodeltachain(rev):
3249 if not cache:
3252 if not cache:
3250 r.clearcaches()
3253 r.clearcaches()
3251 r._deltachain(rev)
3254 r._deltachain(rev)
3252
3255
3253 def doread(chain):
3256 def doread(chain):
3254 if not cache:
3257 if not cache:
3255 r.clearcaches()
3258 r.clearcaches()
3256 for item in slicedchain:
3259 for item in slicedchain:
3257 segmentforrevs(item[0], item[-1])
3260 segmentforrevs(item[0], item[-1])
3258
3261
3259 def doslice(r, chain, size):
3262 def doslice(r, chain, size):
3260 for s in slicechunk(r, chain, targetsize=size):
3263 for s in slicechunk(r, chain, targetsize=size):
3261 pass
3264 pass
3262
3265
3263 def dorawchunks(data, chain):
3266 def dorawchunks(data, chain):
3264 if not cache:
3267 if not cache:
3265 r.clearcaches()
3268 r.clearcaches()
3266 getrawchunks(data, chain)
3269 getrawchunks(data, chain)
3267
3270
3268 def dodecompress(chunks):
3271 def dodecompress(chunks):
3269 decomp = r.decompress
3272 decomp = r.decompress
3270 for chunk in chunks:
3273 for chunk in chunks:
3271 decomp(chunk)
3274 decomp(chunk)
3272
3275
3273 def dopatch(text, bins):
3276 def dopatch(text, bins):
3274 if not cache:
3277 if not cache:
3275 r.clearcaches()
3278 r.clearcaches()
3276 mdiff.patches(text, bins)
3279 mdiff.patches(text, bins)
3277
3280
3278 def dohash(text):
3281 def dohash(text):
3279 if not cache:
3282 if not cache:
3280 r.clearcaches()
3283 r.clearcaches()
3281 r.checkhash(text, node, rev=rev)
3284 r.checkhash(text, node, rev=rev)
3282
3285
3283 def dorevision():
3286 def dorevision():
3284 if not cache:
3287 if not cache:
3285 r.clearcaches()
3288 r.clearcaches()
3286 r.revision(node)
3289 r.revision(node)
3287
3290
3288 try:
3291 try:
3289 from mercurial.revlogutils.deltas import slicechunk
3292 from mercurial.revlogutils.deltas import slicechunk
3290 except ImportError:
3293 except ImportError:
3291 slicechunk = getattr(revlog, '_slicechunk', None)
3294 slicechunk = getattr(revlog, '_slicechunk', None)
3292
3295
3293 size = r.length(rev)
3296 size = r.length(rev)
3294 chain = r._deltachain(rev)[0]
3297 chain = r._deltachain(rev)[0]
3295 if not getattr(r, '_withsparseread', False):
3298 if not getattr(r, '_withsparseread', False):
3296 slicedchain = (chain,)
3299 slicedchain = (chain,)
3297 else:
3300 else:
3298 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3301 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3299 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
3302 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
3300 rawchunks = getrawchunks(data, slicedchain)
3303 rawchunks = getrawchunks(data, slicedchain)
3301 bins = r._chunks(chain)
3304 bins = r._chunks(chain)
3302 text = bytes(bins[0])
3305 text = bytes(bins[0])
3303 bins = bins[1:]
3306 bins = bins[1:]
3304 text = mdiff.patches(text, bins)
3307 text = mdiff.patches(text, bins)
3305
3308
3306 benches = [
3309 benches = [
3307 (lambda: dorevision(), b'full'),
3310 (lambda: dorevision(), b'full'),
3308 (lambda: dodeltachain(rev), b'deltachain'),
3311 (lambda: dodeltachain(rev), b'deltachain'),
3309 (lambda: doread(chain), b'read'),
3312 (lambda: doread(chain), b'read'),
3310 ]
3313 ]
3311
3314
3312 if getattr(r, '_withsparseread', False):
3315 if getattr(r, '_withsparseread', False):
3313 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
3316 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
3314 benches.append(slicing)
3317 benches.append(slicing)
3315
3318
3316 benches.extend(
3319 benches.extend(
3317 [
3320 [
3318 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
3321 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
3319 (lambda: dodecompress(rawchunks), b'decompress'),
3322 (lambda: dodecompress(rawchunks), b'decompress'),
3320 (lambda: dopatch(text, bins), b'patch'),
3323 (lambda: dopatch(text, bins), b'patch'),
3321 (lambda: dohash(text), b'hash'),
3324 (lambda: dohash(text), b'hash'),
3322 ]
3325 ]
3323 )
3326 )
3324
3327
3325 timer, fm = gettimer(ui, opts)
3328 timer, fm = gettimer(ui, opts)
3326 for fn, title in benches:
3329 for fn, title in benches:
3327 timer(fn, title=title)
3330 timer(fn, title=title)
3328 fm.end()
3331 fm.end()
3329
3332
3330
3333
3331 @command(
3334 @command(
3332 b'perf::revset|perfrevset',
3335 b'perf::revset|perfrevset',
3333 [
3336 [
3334 (b'C', b'clear', False, b'clear volatile cache between each call.'),
3337 (b'C', b'clear', False, b'clear volatile cache between each call.'),
3335 (b'', b'contexts', False, b'obtain changectx for each revision'),
3338 (b'', b'contexts', False, b'obtain changectx for each revision'),
3336 ]
3339 ]
3337 + formatteropts,
3340 + formatteropts,
3338 b"REVSET",
3341 b"REVSET",
3339 )
3342 )
3340 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
3343 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
3341 """benchmark the execution time of a revset
3344 """benchmark the execution time of a revset
3342
3345
3343 Use the --clean option if need to evaluate the impact of build volatile
3346 Use the --clean option if need to evaluate the impact of build volatile
3344 revisions set cache on the revset execution. Volatile cache hold filtered
3347 revisions set cache on the revset execution. Volatile cache hold filtered
3345 and obsolete related cache."""
3348 and obsolete related cache."""
3346 opts = _byteskwargs(opts)
3349 opts = _byteskwargs(opts)
3347
3350
3348 timer, fm = gettimer(ui, opts)
3351 timer, fm = gettimer(ui, opts)
3349
3352
3350 def d():
3353 def d():
3351 if clear:
3354 if clear:
3352 repo.invalidatevolatilesets()
3355 repo.invalidatevolatilesets()
3353 if contexts:
3356 if contexts:
3354 for ctx in repo.set(expr):
3357 for ctx in repo.set(expr):
3355 pass
3358 pass
3356 else:
3359 else:
3357 for r in repo.revs(expr):
3360 for r in repo.revs(expr):
3358 pass
3361 pass
3359
3362
3360 timer(d)
3363 timer(d)
3361 fm.end()
3364 fm.end()
3362
3365
3363
3366
3364 @command(
3367 @command(
3365 b'perf::volatilesets|perfvolatilesets',
3368 b'perf::volatilesets|perfvolatilesets',
3366 [
3369 [
3367 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
3370 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
3368 ]
3371 ]
3369 + formatteropts,
3372 + formatteropts,
3370 )
3373 )
3371 def perfvolatilesets(ui, repo, *names, **opts):
3374 def perfvolatilesets(ui, repo, *names, **opts):
3372 """benchmark the computation of various volatile set
3375 """benchmark the computation of various volatile set
3373
3376
3374 Volatile set computes element related to filtering and obsolescence."""
3377 Volatile set computes element related to filtering and obsolescence."""
3375 opts = _byteskwargs(opts)
3378 opts = _byteskwargs(opts)
3376 timer, fm = gettimer(ui, opts)
3379 timer, fm = gettimer(ui, opts)
3377 repo = repo.unfiltered()
3380 repo = repo.unfiltered()
3378
3381
3379 def getobs(name):
3382 def getobs(name):
3380 def d():
3383 def d():
3381 repo.invalidatevolatilesets()
3384 repo.invalidatevolatilesets()
3382 if opts[b'clear_obsstore']:
3385 if opts[b'clear_obsstore']:
3383 clearfilecache(repo, b'obsstore')
3386 clearfilecache(repo, b'obsstore')
3384 obsolete.getrevs(repo, name)
3387 obsolete.getrevs(repo, name)
3385
3388
3386 return d
3389 return d
3387
3390
3388 allobs = sorted(obsolete.cachefuncs)
3391 allobs = sorted(obsolete.cachefuncs)
3389 if names:
3392 if names:
3390 allobs = [n for n in allobs if n in names]
3393 allobs = [n for n in allobs if n in names]
3391
3394
3392 for name in allobs:
3395 for name in allobs:
3393 timer(getobs(name), title=name)
3396 timer(getobs(name), title=name)
3394
3397
3395 def getfiltered(name):
3398 def getfiltered(name):
3396 def d():
3399 def d():
3397 repo.invalidatevolatilesets()
3400 repo.invalidatevolatilesets()
3398 if opts[b'clear_obsstore']:
3401 if opts[b'clear_obsstore']:
3399 clearfilecache(repo, b'obsstore')
3402 clearfilecache(repo, b'obsstore')
3400 repoview.filterrevs(repo, name)
3403 repoview.filterrevs(repo, name)
3401
3404
3402 return d
3405 return d
3403
3406
3404 allfilter = sorted(repoview.filtertable)
3407 allfilter = sorted(repoview.filtertable)
3405 if names:
3408 if names:
3406 allfilter = [n for n in allfilter if n in names]
3409 allfilter = [n for n in allfilter if n in names]
3407
3410
3408 for name in allfilter:
3411 for name in allfilter:
3409 timer(getfiltered(name), title=name)
3412 timer(getfiltered(name), title=name)
3410 fm.end()
3413 fm.end()
3411
3414
3412
3415
3413 @command(
3416 @command(
3414 b'perf::branchmap|perfbranchmap',
3417 b'perf::branchmap|perfbranchmap',
3415 [
3418 [
3416 (b'f', b'full', False, b'Includes build time of subset'),
3419 (b'f', b'full', False, b'Includes build time of subset'),
3417 (
3420 (
3418 b'',
3421 b'',
3419 b'clear-revbranch',
3422 b'clear-revbranch',
3420 False,
3423 False,
3421 b'purge the revbranch cache between computation',
3424 b'purge the revbranch cache between computation',
3422 ),
3425 ),
3423 ]
3426 ]
3424 + formatteropts,
3427 + formatteropts,
3425 )
3428 )
3426 def perfbranchmap(ui, repo, *filternames, **opts):
3429 def perfbranchmap(ui, repo, *filternames, **opts):
3427 """benchmark the update of a branchmap
3430 """benchmark the update of a branchmap
3428
3431
3429 This benchmarks the full repo.branchmap() call with read and write disabled
3432 This benchmarks the full repo.branchmap() call with read and write disabled
3430 """
3433 """
3431 opts = _byteskwargs(opts)
3434 opts = _byteskwargs(opts)
3432 full = opts.get(b"full", False)
3435 full = opts.get(b"full", False)
3433 clear_revbranch = opts.get(b"clear_revbranch", False)
3436 clear_revbranch = opts.get(b"clear_revbranch", False)
3434 timer, fm = gettimer(ui, opts)
3437 timer, fm = gettimer(ui, opts)
3435
3438
3436 def getbranchmap(filtername):
3439 def getbranchmap(filtername):
3437 """generate a benchmark function for the filtername"""
3440 """generate a benchmark function for the filtername"""
3438 if filtername is None:
3441 if filtername is None:
3439 view = repo
3442 view = repo
3440 else:
3443 else:
3441 view = repo.filtered(filtername)
3444 view = repo.filtered(filtername)
3442 if util.safehasattr(view._branchcaches, '_per_filter'):
3445 if util.safehasattr(view._branchcaches, '_per_filter'):
3443 filtered = view._branchcaches._per_filter
3446 filtered = view._branchcaches._per_filter
3444 else:
3447 else:
3445 # older versions
3448 # older versions
3446 filtered = view._branchcaches
3449 filtered = view._branchcaches
3447
3450
3448 def d():
3451 def d():
3449 if clear_revbranch:
3452 if clear_revbranch:
3450 repo.revbranchcache()._clear()
3453 repo.revbranchcache()._clear()
3451 if full:
3454 if full:
3452 view._branchcaches.clear()
3455 view._branchcaches.clear()
3453 else:
3456 else:
3454 filtered.pop(filtername, None)
3457 filtered.pop(filtername, None)
3455 view.branchmap()
3458 view.branchmap()
3456
3459
3457 return d
3460 return d
3458
3461
3459 # add filter in smaller subset to bigger subset
3462 # add filter in smaller subset to bigger subset
3460 possiblefilters = set(repoview.filtertable)
3463 possiblefilters = set(repoview.filtertable)
3461 if filternames:
3464 if filternames:
3462 possiblefilters &= set(filternames)
3465 possiblefilters &= set(filternames)
3463 subsettable = getbranchmapsubsettable()
3466 subsettable = getbranchmapsubsettable()
3464 allfilters = []
3467 allfilters = []
3465 while possiblefilters:
3468 while possiblefilters:
3466 for name in possiblefilters:
3469 for name in possiblefilters:
3467 subset = subsettable.get(name)
3470 subset = subsettable.get(name)
3468 if subset not in possiblefilters:
3471 if subset not in possiblefilters:
3469 break
3472 break
3470 else:
3473 else:
3471 assert False, b'subset cycle %s!' % possiblefilters
3474 assert False, b'subset cycle %s!' % possiblefilters
3472 allfilters.append(name)
3475 allfilters.append(name)
3473 possiblefilters.remove(name)
3476 possiblefilters.remove(name)
3474
3477
3475 # warm the cache
3478 # warm the cache
3476 if not full:
3479 if not full:
3477 for name in allfilters:
3480 for name in allfilters:
3478 repo.filtered(name).branchmap()
3481 repo.filtered(name).branchmap()
3479 if not filternames or b'unfiltered' in filternames:
3482 if not filternames or b'unfiltered' in filternames:
3480 # add unfiltered
3483 # add unfiltered
3481 allfilters.append(None)
3484 allfilters.append(None)
3482
3485
3483 if util.safehasattr(branchmap.branchcache, 'fromfile'):
3486 if util.safehasattr(branchmap.branchcache, 'fromfile'):
3484 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
3487 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
3485 branchcacheread.set(classmethod(lambda *args: None))
3488 branchcacheread.set(classmethod(lambda *args: None))
3486 else:
3489 else:
3487 # older versions
3490 # older versions
3488 branchcacheread = safeattrsetter(branchmap, b'read')
3491 branchcacheread = safeattrsetter(branchmap, b'read')
3489 branchcacheread.set(lambda *args: None)
3492 branchcacheread.set(lambda *args: None)
3490 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
3493 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
3491 branchcachewrite.set(lambda *args: None)
3494 branchcachewrite.set(lambda *args: None)
3492 try:
3495 try:
3493 for name in allfilters:
3496 for name in allfilters:
3494 printname = name
3497 printname = name
3495 if name is None:
3498 if name is None:
3496 printname = b'unfiltered'
3499 printname = b'unfiltered'
3497 timer(getbranchmap(name), title=printname)
3500 timer(getbranchmap(name), title=printname)
3498 finally:
3501 finally:
3499 branchcacheread.restore()
3502 branchcacheread.restore()
3500 branchcachewrite.restore()
3503 branchcachewrite.restore()
3501 fm.end()
3504 fm.end()
3502
3505
3503
3506
3504 @command(
3507 @command(
3505 b'perf::branchmapupdate|perfbranchmapupdate',
3508 b'perf::branchmapupdate|perfbranchmapupdate',
3506 [
3509 [
3507 (b'', b'base', [], b'subset of revision to start from'),
3510 (b'', b'base', [], b'subset of revision to start from'),
3508 (b'', b'target', [], b'subset of revision to end with'),
3511 (b'', b'target', [], b'subset of revision to end with'),
3509 (b'', b'clear-caches', False, b'clear cache between each runs'),
3512 (b'', b'clear-caches', False, b'clear cache between each runs'),
3510 ]
3513 ]
3511 + formatteropts,
3514 + formatteropts,
3512 )
3515 )
3513 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
3516 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
3514 """benchmark branchmap update from for <base> revs to <target> revs
3517 """benchmark branchmap update from for <base> revs to <target> revs
3515
3518
3516 If `--clear-caches` is passed, the following items will be reset before
3519 If `--clear-caches` is passed, the following items will be reset before
3517 each update:
3520 each update:
3518 * the changelog instance and associated indexes
3521 * the changelog instance and associated indexes
3519 * the rev-branch-cache instance
3522 * the rev-branch-cache instance
3520
3523
3521 Examples:
3524 Examples:
3522
3525
3523 # update for the one last revision
3526 # update for the one last revision
3524 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
3527 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
3525
3528
3526 $ update for change coming with a new branch
3529 $ update for change coming with a new branch
3527 $ hg perfbranchmapupdate --base 'stable' --target 'default'
3530 $ hg perfbranchmapupdate --base 'stable' --target 'default'
3528 """
3531 """
3529 from mercurial import branchmap
3532 from mercurial import branchmap
3530 from mercurial import repoview
3533 from mercurial import repoview
3531
3534
3532 opts = _byteskwargs(opts)
3535 opts = _byteskwargs(opts)
3533 timer, fm = gettimer(ui, opts)
3536 timer, fm = gettimer(ui, opts)
3534 clearcaches = opts[b'clear_caches']
3537 clearcaches = opts[b'clear_caches']
3535 unfi = repo.unfiltered()
3538 unfi = repo.unfiltered()
3536 x = [None] # used to pass data between closure
3539 x = [None] # used to pass data between closure
3537
3540
3538 # we use a `list` here to avoid possible side effect from smartset
3541 # we use a `list` here to avoid possible side effect from smartset
3539 baserevs = list(scmutil.revrange(repo, base))
3542 baserevs = list(scmutil.revrange(repo, base))
3540 targetrevs = list(scmutil.revrange(repo, target))
3543 targetrevs = list(scmutil.revrange(repo, target))
3541 if not baserevs:
3544 if not baserevs:
3542 raise error.Abort(b'no revisions selected for --base')
3545 raise error.Abort(b'no revisions selected for --base')
3543 if not targetrevs:
3546 if not targetrevs:
3544 raise error.Abort(b'no revisions selected for --target')
3547 raise error.Abort(b'no revisions selected for --target')
3545
3548
3546 # make sure the target branchmap also contains the one in the base
3549 # make sure the target branchmap also contains the one in the base
3547 targetrevs = list(set(baserevs) | set(targetrevs))
3550 targetrevs = list(set(baserevs) | set(targetrevs))
3548 targetrevs.sort()
3551 targetrevs.sort()
3549
3552
3550 cl = repo.changelog
3553 cl = repo.changelog
3551 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
3554 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
3552 allbaserevs.sort()
3555 allbaserevs.sort()
3553 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
3556 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
3554
3557
3555 newrevs = list(alltargetrevs.difference(allbaserevs))
3558 newrevs = list(alltargetrevs.difference(allbaserevs))
3556 newrevs.sort()
3559 newrevs.sort()
3557
3560
3558 allrevs = frozenset(unfi.changelog.revs())
3561 allrevs = frozenset(unfi.changelog.revs())
3559 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
3562 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
3560 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
3563 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
3561
3564
3562 def basefilter(repo, visibilityexceptions=None):
3565 def basefilter(repo, visibilityexceptions=None):
3563 return basefilterrevs
3566 return basefilterrevs
3564
3567
3565 def targetfilter(repo, visibilityexceptions=None):
3568 def targetfilter(repo, visibilityexceptions=None):
3566 return targetfilterrevs
3569 return targetfilterrevs
3567
3570
3568 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
3571 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
3569 ui.status(msg % (len(allbaserevs), len(newrevs)))
3572 ui.status(msg % (len(allbaserevs), len(newrevs)))
3570 if targetfilterrevs:
3573 if targetfilterrevs:
3571 msg = b'(%d revisions still filtered)\n'
3574 msg = b'(%d revisions still filtered)\n'
3572 ui.status(msg % len(targetfilterrevs))
3575 ui.status(msg % len(targetfilterrevs))
3573
3576
3574 try:
3577 try:
3575 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
3578 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
3576 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
3579 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
3577
3580
3578 baserepo = repo.filtered(b'__perf_branchmap_update_base')
3581 baserepo = repo.filtered(b'__perf_branchmap_update_base')
3579 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
3582 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
3580
3583
3581 # try to find an existing branchmap to reuse
3584 # try to find an existing branchmap to reuse
3582 subsettable = getbranchmapsubsettable()
3585 subsettable = getbranchmapsubsettable()
3583 candidatefilter = subsettable.get(None)
3586 candidatefilter = subsettable.get(None)
3584 while candidatefilter is not None:
3587 while candidatefilter is not None:
3585 candidatebm = repo.filtered(candidatefilter).branchmap()
3588 candidatebm = repo.filtered(candidatefilter).branchmap()
3586 if candidatebm.validfor(baserepo):
3589 if candidatebm.validfor(baserepo):
3587 filtered = repoview.filterrevs(repo, candidatefilter)
3590 filtered = repoview.filterrevs(repo, candidatefilter)
3588 missing = [r for r in allbaserevs if r in filtered]
3591 missing = [r for r in allbaserevs if r in filtered]
3589 base = candidatebm.copy()
3592 base = candidatebm.copy()
3590 base.update(baserepo, missing)
3593 base.update(baserepo, missing)
3591 break
3594 break
3592 candidatefilter = subsettable.get(candidatefilter)
3595 candidatefilter = subsettable.get(candidatefilter)
3593 else:
3596 else:
3594 # no suitable subset where found
3597 # no suitable subset where found
3595 base = branchmap.branchcache()
3598 base = branchmap.branchcache()
3596 base.update(baserepo, allbaserevs)
3599 base.update(baserepo, allbaserevs)
3597
3600
3598 def setup():
3601 def setup():
3599 x[0] = base.copy()
3602 x[0] = base.copy()
3600 if clearcaches:
3603 if clearcaches:
3601 unfi._revbranchcache = None
3604 unfi._revbranchcache = None
3602 clearchangelog(repo)
3605 clearchangelog(repo)
3603
3606
3604 def bench():
3607 def bench():
3605 x[0].update(targetrepo, newrevs)
3608 x[0].update(targetrepo, newrevs)
3606
3609
3607 timer(bench, setup=setup)
3610 timer(bench, setup=setup)
3608 fm.end()
3611 fm.end()
3609 finally:
3612 finally:
3610 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
3613 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
3611 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
3614 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
3612
3615
3613
3616
3614 @command(
3617 @command(
3615 b'perf::branchmapload|perfbranchmapload',
3618 b'perf::branchmapload|perfbranchmapload',
3616 [
3619 [
3617 (b'f', b'filter', b'', b'Specify repoview filter'),
3620 (b'f', b'filter', b'', b'Specify repoview filter'),
3618 (b'', b'list', False, b'List brachmap filter caches'),
3621 (b'', b'list', False, b'List brachmap filter caches'),
3619 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
3622 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
3620 ]
3623 ]
3621 + formatteropts,
3624 + formatteropts,
3622 )
3625 )
3623 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
3626 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
3624 """benchmark reading the branchmap"""
3627 """benchmark reading the branchmap"""
3625 opts = _byteskwargs(opts)
3628 opts = _byteskwargs(opts)
3626 clearrevlogs = opts[b'clear_revlogs']
3629 clearrevlogs = opts[b'clear_revlogs']
3627
3630
3628 if list:
3631 if list:
3629 for name, kind, st in repo.cachevfs.readdir(stat=True):
3632 for name, kind, st in repo.cachevfs.readdir(stat=True):
3630 if name.startswith(b'branch2'):
3633 if name.startswith(b'branch2'):
3631 filtername = name.partition(b'-')[2] or b'unfiltered'
3634 filtername = name.partition(b'-')[2] or b'unfiltered'
3632 ui.status(
3635 ui.status(
3633 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
3636 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
3634 )
3637 )
3635 return
3638 return
3636 if not filter:
3639 if not filter:
3637 filter = None
3640 filter = None
3638 subsettable = getbranchmapsubsettable()
3641 subsettable = getbranchmapsubsettable()
3639 if filter is None:
3642 if filter is None:
3640 repo = repo.unfiltered()
3643 repo = repo.unfiltered()
3641 else:
3644 else:
3642 repo = repoview.repoview(repo, filter)
3645 repo = repoview.repoview(repo, filter)
3643
3646
3644 repo.branchmap() # make sure we have a relevant, up to date branchmap
3647 repo.branchmap() # make sure we have a relevant, up to date branchmap
3645
3648
3646 try:
3649 try:
3647 fromfile = branchmap.branchcache.fromfile
3650 fromfile = branchmap.branchcache.fromfile
3648 except AttributeError:
3651 except AttributeError:
3649 # older versions
3652 # older versions
3650 fromfile = branchmap.read
3653 fromfile = branchmap.read
3651
3654
3652 currentfilter = filter
3655 currentfilter = filter
3653 # try once without timer, the filter may not be cached
3656 # try once without timer, the filter may not be cached
3654 while fromfile(repo) is None:
3657 while fromfile(repo) is None:
3655 currentfilter = subsettable.get(currentfilter)
3658 currentfilter = subsettable.get(currentfilter)
3656 if currentfilter is None:
3659 if currentfilter is None:
3657 raise error.Abort(
3660 raise error.Abort(
3658 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
3661 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
3659 )
3662 )
3660 repo = repo.filtered(currentfilter)
3663 repo = repo.filtered(currentfilter)
3661 timer, fm = gettimer(ui, opts)
3664 timer, fm = gettimer(ui, opts)
3662
3665
3663 def setup():
3666 def setup():
3664 if clearrevlogs:
3667 if clearrevlogs:
3665 clearchangelog(repo)
3668 clearchangelog(repo)
3666
3669
3667 def bench():
3670 def bench():
3668 fromfile(repo)
3671 fromfile(repo)
3669
3672
3670 timer(bench, setup=setup)
3673 timer(bench, setup=setup)
3671 fm.end()
3674 fm.end()
3672
3675
3673
3676
3674 @command(b'perf::loadmarkers|perfloadmarkers')
3677 @command(b'perf::loadmarkers|perfloadmarkers')
3675 def perfloadmarkers(ui, repo):
3678 def perfloadmarkers(ui, repo):
3676 """benchmark the time to parse the on-disk markers for a repo
3679 """benchmark the time to parse the on-disk markers for a repo
3677
3680
3678 Result is the number of markers in the repo."""
3681 Result is the number of markers in the repo."""
3679 timer, fm = gettimer(ui)
3682 timer, fm = gettimer(ui)
3680 svfs = getsvfs(repo)
3683 svfs = getsvfs(repo)
3681 timer(lambda: len(obsolete.obsstore(repo, svfs)))
3684 timer(lambda: len(obsolete.obsstore(repo, svfs)))
3682 fm.end()
3685 fm.end()
3683
3686
3684
3687
3685 @command(
3688 @command(
3686 b'perf::lrucachedict|perflrucachedict',
3689 b'perf::lrucachedict|perflrucachedict',
3687 formatteropts
3690 formatteropts
3688 + [
3691 + [
3689 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
3692 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
3690 (b'', b'mincost', 0, b'smallest cost of items in cache'),
3693 (b'', b'mincost', 0, b'smallest cost of items in cache'),
3691 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
3694 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
3692 (b'', b'size', 4, b'size of cache'),
3695 (b'', b'size', 4, b'size of cache'),
3693 (b'', b'gets', 10000, b'number of key lookups'),
3696 (b'', b'gets', 10000, b'number of key lookups'),
3694 (b'', b'sets', 10000, b'number of key sets'),
3697 (b'', b'sets', 10000, b'number of key sets'),
3695 (b'', b'mixed', 10000, b'number of mixed mode operations'),
3698 (b'', b'mixed', 10000, b'number of mixed mode operations'),
3696 (
3699 (
3697 b'',
3700 b'',
3698 b'mixedgetfreq',
3701 b'mixedgetfreq',
3699 50,
3702 50,
3700 b'frequency of get vs set ops in mixed mode',
3703 b'frequency of get vs set ops in mixed mode',
3701 ),
3704 ),
3702 ],
3705 ],
3703 norepo=True,
3706 norepo=True,
3704 )
3707 )
3705 def perflrucache(
3708 def perflrucache(
3706 ui,
3709 ui,
3707 mincost=0,
3710 mincost=0,
3708 maxcost=100,
3711 maxcost=100,
3709 costlimit=0,
3712 costlimit=0,
3710 size=4,
3713 size=4,
3711 gets=10000,
3714 gets=10000,
3712 sets=10000,
3715 sets=10000,
3713 mixed=10000,
3716 mixed=10000,
3714 mixedgetfreq=50,
3717 mixedgetfreq=50,
3715 **opts
3718 **opts
3716 ):
3719 ):
3717 opts = _byteskwargs(opts)
3720 opts = _byteskwargs(opts)
3718
3721
3719 def doinit():
3722 def doinit():
3720 for i in _xrange(10000):
3723 for i in _xrange(10000):
3721 util.lrucachedict(size)
3724 util.lrucachedict(size)
3722
3725
3723 costrange = list(range(mincost, maxcost + 1))
3726 costrange = list(range(mincost, maxcost + 1))
3724
3727
3725 values = []
3728 values = []
3726 for i in _xrange(size):
3729 for i in _xrange(size):
3727 values.append(random.randint(0, _maxint))
3730 values.append(random.randint(0, _maxint))
3728
3731
3729 # Get mode fills the cache and tests raw lookup performance with no
3732 # Get mode fills the cache and tests raw lookup performance with no
3730 # eviction.
3733 # eviction.
3731 getseq = []
3734 getseq = []
3732 for i in _xrange(gets):
3735 for i in _xrange(gets):
3733 getseq.append(random.choice(values))
3736 getseq.append(random.choice(values))
3734
3737
3735 def dogets():
3738 def dogets():
3736 d = util.lrucachedict(size)
3739 d = util.lrucachedict(size)
3737 for v in values:
3740 for v in values:
3738 d[v] = v
3741 d[v] = v
3739 for key in getseq:
3742 for key in getseq:
3740 value = d[key]
3743 value = d[key]
3741 value # silence pyflakes warning
3744 value # silence pyflakes warning
3742
3745
3743 def dogetscost():
3746 def dogetscost():
3744 d = util.lrucachedict(size, maxcost=costlimit)
3747 d = util.lrucachedict(size, maxcost=costlimit)
3745 for i, v in enumerate(values):
3748 for i, v in enumerate(values):
3746 d.insert(v, v, cost=costs[i])
3749 d.insert(v, v, cost=costs[i])
3747 for key in getseq:
3750 for key in getseq:
3748 try:
3751 try:
3749 value = d[key]
3752 value = d[key]
3750 value # silence pyflakes warning
3753 value # silence pyflakes warning
3751 except KeyError:
3754 except KeyError:
3752 pass
3755 pass
3753
3756
3754 # Set mode tests insertion speed with cache eviction.
3757 # Set mode tests insertion speed with cache eviction.
3755 setseq = []
3758 setseq = []
3756 costs = []
3759 costs = []
3757 for i in _xrange(sets):
3760 for i in _xrange(sets):
3758 setseq.append(random.randint(0, _maxint))
3761 setseq.append(random.randint(0, _maxint))
3759 costs.append(random.choice(costrange))
3762 costs.append(random.choice(costrange))
3760
3763
3761 def doinserts():
3764 def doinserts():
3762 d = util.lrucachedict(size)
3765 d = util.lrucachedict(size)
3763 for v in setseq:
3766 for v in setseq:
3764 d.insert(v, v)
3767 d.insert(v, v)
3765
3768
3766 def doinsertscost():
3769 def doinsertscost():
3767 d = util.lrucachedict(size, maxcost=costlimit)
3770 d = util.lrucachedict(size, maxcost=costlimit)
3768 for i, v in enumerate(setseq):
3771 for i, v in enumerate(setseq):
3769 d.insert(v, v, cost=costs[i])
3772 d.insert(v, v, cost=costs[i])
3770
3773
3771 def dosets():
3774 def dosets():
3772 d = util.lrucachedict(size)
3775 d = util.lrucachedict(size)
3773 for v in setseq:
3776 for v in setseq:
3774 d[v] = v
3777 d[v] = v
3775
3778
3776 # Mixed mode randomly performs gets and sets with eviction.
3779 # Mixed mode randomly performs gets and sets with eviction.
3777 mixedops = []
3780 mixedops = []
3778 for i in _xrange(mixed):
3781 for i in _xrange(mixed):
3779 r = random.randint(0, 100)
3782 r = random.randint(0, 100)
3780 if r < mixedgetfreq:
3783 if r < mixedgetfreq:
3781 op = 0
3784 op = 0
3782 else:
3785 else:
3783 op = 1
3786 op = 1
3784
3787
3785 mixedops.append(
3788 mixedops.append(
3786 (op, random.randint(0, size * 2), random.choice(costrange))
3789 (op, random.randint(0, size * 2), random.choice(costrange))
3787 )
3790 )
3788
3791
3789 def domixed():
3792 def domixed():
3790 d = util.lrucachedict(size)
3793 d = util.lrucachedict(size)
3791
3794
3792 for op, v, cost in mixedops:
3795 for op, v, cost in mixedops:
3793 if op == 0:
3796 if op == 0:
3794 try:
3797 try:
3795 d[v]
3798 d[v]
3796 except KeyError:
3799 except KeyError:
3797 pass
3800 pass
3798 else:
3801 else:
3799 d[v] = v
3802 d[v] = v
3800
3803
3801 def domixedcost():
3804 def domixedcost():
3802 d = util.lrucachedict(size, maxcost=costlimit)
3805 d = util.lrucachedict(size, maxcost=costlimit)
3803
3806
3804 for op, v, cost in mixedops:
3807 for op, v, cost in mixedops:
3805 if op == 0:
3808 if op == 0:
3806 try:
3809 try:
3807 d[v]
3810 d[v]
3808 except KeyError:
3811 except KeyError:
3809 pass
3812 pass
3810 else:
3813 else:
3811 d.insert(v, v, cost=cost)
3814 d.insert(v, v, cost=cost)
3812
3815
3813 benches = [
3816 benches = [
3814 (doinit, b'init'),
3817 (doinit, b'init'),
3815 ]
3818 ]
3816
3819
3817 if costlimit:
3820 if costlimit:
3818 benches.extend(
3821 benches.extend(
3819 [
3822 [
3820 (dogetscost, b'gets w/ cost limit'),
3823 (dogetscost, b'gets w/ cost limit'),
3821 (doinsertscost, b'inserts w/ cost limit'),
3824 (doinsertscost, b'inserts w/ cost limit'),
3822 (domixedcost, b'mixed w/ cost limit'),
3825 (domixedcost, b'mixed w/ cost limit'),
3823 ]
3826 ]
3824 )
3827 )
3825 else:
3828 else:
3826 benches.extend(
3829 benches.extend(
3827 [
3830 [
3828 (dogets, b'gets'),
3831 (dogets, b'gets'),
3829 (doinserts, b'inserts'),
3832 (doinserts, b'inserts'),
3830 (dosets, b'sets'),
3833 (dosets, b'sets'),
3831 (domixed, b'mixed'),
3834 (domixed, b'mixed'),
3832 ]
3835 ]
3833 )
3836 )
3834
3837
3835 for fn, title in benches:
3838 for fn, title in benches:
3836 timer, fm = gettimer(ui, opts)
3839 timer, fm = gettimer(ui, opts)
3837 timer(fn, title=title)
3840 timer(fn, title=title)
3838 fm.end()
3841 fm.end()
3839
3842
3840
3843
3841 @command(
3844 @command(
3842 b'perf::write|perfwrite',
3845 b'perf::write|perfwrite',
3843 formatteropts
3846 formatteropts
3844 + [
3847 + [
3845 (b'', b'write-method', b'write', b'ui write method'),
3848 (b'', b'write-method', b'write', b'ui write method'),
3846 (b'', b'nlines', 100, b'number of lines'),
3849 (b'', b'nlines', 100, b'number of lines'),
3847 (b'', b'nitems', 100, b'number of items (per line)'),
3850 (b'', b'nitems', 100, b'number of items (per line)'),
3848 (b'', b'item', b'x', b'item that is written'),
3851 (b'', b'item', b'x', b'item that is written'),
3849 (b'', b'batch-line', None, b'pass whole line to write method at once'),
3852 (b'', b'batch-line', None, b'pass whole line to write method at once'),
3850 (b'', b'flush-line', None, b'flush after each line'),
3853 (b'', b'flush-line', None, b'flush after each line'),
3851 ],
3854 ],
3852 )
3855 )
3853 def perfwrite(ui, repo, **opts):
3856 def perfwrite(ui, repo, **opts):
3854 """microbenchmark ui.write (and others)"""
3857 """microbenchmark ui.write (and others)"""
3855 opts = _byteskwargs(opts)
3858 opts = _byteskwargs(opts)
3856
3859
3857 write = getattr(ui, _sysstr(opts[b'write_method']))
3860 write = getattr(ui, _sysstr(opts[b'write_method']))
3858 nlines = int(opts[b'nlines'])
3861 nlines = int(opts[b'nlines'])
3859 nitems = int(opts[b'nitems'])
3862 nitems = int(opts[b'nitems'])
3860 item = opts[b'item']
3863 item = opts[b'item']
3861 batch_line = opts.get(b'batch_line')
3864 batch_line = opts.get(b'batch_line')
3862 flush_line = opts.get(b'flush_line')
3865 flush_line = opts.get(b'flush_line')
3863
3866
3864 if batch_line:
3867 if batch_line:
3865 line = item * nitems + b'\n'
3868 line = item * nitems + b'\n'
3866
3869
3867 def benchmark():
3870 def benchmark():
3868 for i in pycompat.xrange(nlines):
3871 for i in pycompat.xrange(nlines):
3869 if batch_line:
3872 if batch_line:
3870 write(line)
3873 write(line)
3871 else:
3874 else:
3872 for i in pycompat.xrange(nitems):
3875 for i in pycompat.xrange(nitems):
3873 write(item)
3876 write(item)
3874 write(b'\n')
3877 write(b'\n')
3875 if flush_line:
3878 if flush_line:
3876 ui.flush()
3879 ui.flush()
3877 ui.flush()
3880 ui.flush()
3878
3881
3879 timer, fm = gettimer(ui, opts)
3882 timer, fm = gettimer(ui, opts)
3880 timer(benchmark)
3883 timer(benchmark)
3881 fm.end()
3884 fm.end()
3882
3885
3883
3886
3884 def uisetup(ui):
3887 def uisetup(ui):
3885 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
3888 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
3886 commands, b'debugrevlogopts'
3889 commands, b'debugrevlogopts'
3887 ):
3890 ):
3888 # for "historical portability":
3891 # for "historical portability":
3889 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3892 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3890 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3893 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3891 # openrevlog() should cause failure, because it has been
3894 # openrevlog() should cause failure, because it has been
3892 # available since 3.5 (or 49c583ca48c4).
3895 # available since 3.5 (or 49c583ca48c4).
3893 def openrevlog(orig, repo, cmd, file_, opts):
3896 def openrevlog(orig, repo, cmd, file_, opts):
3894 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3897 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3895 raise error.Abort(
3898 raise error.Abort(
3896 b"This version doesn't support --dir option",
3899 b"This version doesn't support --dir option",
3897 hint=b"use 3.5 or later",
3900 hint=b"use 3.5 or later",
3898 )
3901 )
3899 return orig(repo, cmd, file_, opts)
3902 return orig(repo, cmd, file_, opts)
3900
3903
3901 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3904 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3902
3905
3903
3906
3904 @command(
3907 @command(
3905 b'perf::progress|perfprogress',
3908 b'perf::progress|perfprogress',
3906 formatteropts
3909 formatteropts
3907 + [
3910 + [
3908 (b'', b'topic', b'topic', b'topic for progress messages'),
3911 (b'', b'topic', b'topic', b'topic for progress messages'),
3909 (b'c', b'total', 1000000, b'total value we are progressing to'),
3912 (b'c', b'total', 1000000, b'total value we are progressing to'),
3910 ],
3913 ],
3911 norepo=True,
3914 norepo=True,
3912 )
3915 )
3913 def perfprogress(ui, topic=None, total=None, **opts):
3916 def perfprogress(ui, topic=None, total=None, **opts):
3914 """printing of progress bars"""
3917 """printing of progress bars"""
3915 opts = _byteskwargs(opts)
3918 opts = _byteskwargs(opts)
3916
3919
3917 timer, fm = gettimer(ui, opts)
3920 timer, fm = gettimer(ui, opts)
3918
3921
3919 def doprogress():
3922 def doprogress():
3920 with ui.makeprogress(topic, total=total) as progress:
3923 with ui.makeprogress(topic, total=total) as progress:
3921 for i in _xrange(total):
3924 for i in _xrange(total):
3922 progress.increment()
3925 progress.increment()
3923
3926
3924 timer(doprogress)
3927 timer(doprogress)
3925 fm.end()
3928 fm.end()
@@ -1,2977 +1,2984 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18
19
19 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
20 #include "charencode.h"
21 #include "charencode.h"
21 #include "compat.h"
22 #include "compat.h"
22 #include "revlog.h"
23 #include "revlog.h"
23 #include "util.h"
24 #include "util.h"
24
25
25 #ifdef IS_PY3K
26 #ifdef IS_PY3K
26 /* The mapping of Python types is meant to be temporary to get Python
27 /* The mapping of Python types is meant to be temporary to get Python
27 * 3 to compile. We should remove this once Python 3 support is fully
28 * 3 to compile. We should remove this once Python 3 support is fully
28 * supported and proper types are used in the extensions themselves. */
29 * supported and proper types are used in the extensions themselves. */
29 #define PyInt_Check PyLong_Check
30 #define PyInt_Check PyLong_Check
30 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_AsLong PyLong_AsLong
33 #define PyInt_AsLong PyLong_AsLong
33 #endif
34 #endif
34
35
35 typedef struct indexObjectStruct indexObject;
36 typedef struct indexObjectStruct indexObject;
36
37
37 typedef struct {
38 typedef struct {
38 int children[16];
39 int children[16];
39 } nodetreenode;
40 } nodetreenode;
40
41
41 typedef struct {
42 typedef struct {
42 int abi_version;
43 int abi_version;
43 Py_ssize_t (*index_length)(const indexObject *);
44 Py_ssize_t (*index_length)(const indexObject *);
44 const char *(*index_node)(indexObject *, Py_ssize_t);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 int (*index_parents)(PyObject *, int, int *);
46 int (*index_parents)(PyObject *, int, int *);
46 } Revlog_CAPI;
47 } Revlog_CAPI;
47
48
48 /*
49 /*
49 * A base-16 trie for fast node->rev mapping.
50 * A base-16 trie for fast node->rev mapping.
50 *
51 *
51 * Positive value is index of the next node in the trie
52 * Positive value is index of the next node in the trie
52 * Negative value is a leaf: -(rev + 2)
53 * Negative value is a leaf: -(rev + 2)
53 * Zero is empty
54 * Zero is empty
54 */
55 */
55 typedef struct {
56 typedef struct {
56 indexObject *index;
57 indexObject *index;
57 nodetreenode *nodes;
58 nodetreenode *nodes;
58 Py_ssize_t nodelen;
59 Py_ssize_t nodelen;
59 size_t length; /* # nodes in use */
60 size_t length; /* # nodes in use */
60 size_t capacity; /* # nodes allocated */
61 size_t capacity; /* # nodes allocated */
61 int depth; /* maximum depth of tree */
62 int depth; /* maximum depth of tree */
62 int splits; /* # splits performed */
63 int splits; /* # splits performed */
63 } nodetree;
64 } nodetree;
64
65
65 typedef struct {
66 typedef struct {
66 PyObject_HEAD /* ; */
67 PyObject_HEAD /* ; */
67 nodetree nt;
68 nodetree nt;
68 } nodetreeObject;
69 } nodetreeObject;
69
70
70 /*
71 /*
71 * This class has two behaviors.
72 * This class has two behaviors.
72 *
73 *
73 * When used in a list-like way (with integer keys), we decode an
74 * When used in a list-like way (with integer keys), we decode an
74 * entry in a RevlogNG index file on demand. We have limited support for
75 * entry in a RevlogNG index file on demand. We have limited support for
75 * integer-keyed insert and delete, only at elements right before the
76 * integer-keyed insert and delete, only at elements right before the
76 * end.
77 * end.
77 *
78 *
78 * With string keys, we lazily perform a reverse mapping from node to
79 * With string keys, we lazily perform a reverse mapping from node to
79 * rev, using a base-16 trie.
80 * rev, using a base-16 trie.
80 */
81 */
81 struct indexObjectStruct {
82 struct indexObjectStruct {
82 PyObject_HEAD
83 PyObject_HEAD
83 /* Type-specific fields go here. */
84 /* Type-specific fields go here. */
84 PyObject *data; /* raw bytes of index */
85 PyObject *data; /* raw bytes of index */
85 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 PyObject *nullentry; /* fast path for references to null */
87 PyObject *nullentry; /* fast path for references to null */
87 Py_buffer buf; /* buffer of data */
88 Py_buffer buf; /* buffer of data */
88 const char **offsets; /* populated on demand */
89 const char **offsets; /* populated on demand */
89 Py_ssize_t length; /* current on-disk number of elements */
90 Py_ssize_t length; /* current on-disk number of elements */
90 unsigned new_length; /* number of added elements */
91 unsigned new_length; /* number of added elements */
91 unsigned added_length; /* space reserved for added elements */
92 unsigned added_length; /* space reserved for added elements */
92 char *added; /* populated on demand */
93 char *added; /* populated on demand */
93 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *filteredrevs; /* filtered revs set */
95 PyObject *filteredrevs; /* filtered revs set */
95 nodetree nt; /* base-16 trie */
96 nodetree nt; /* base-16 trie */
96 int ntinitialized; /* 0 or 1 */
97 int ntinitialized; /* 0 or 1 */
97 int ntrev; /* last rev scanned */
98 int ntrev; /* last rev scanned */
98 int ntlookups; /* # lookups */
99 int ntlookups; /* # lookups */
99 int ntmisses; /* # lookups that miss the cache */
100 int ntmisses; /* # lookups that miss the cache */
100 int inlined;
101 int inlined;
101 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
102 };
103 };
103
104
104 static Py_ssize_t index_length(const indexObject *self)
105 static Py_ssize_t index_length(const indexObject *self)
105 {
106 {
106 return self->length + self->new_length;
107 return self->length + self->new_length;
107 }
108 }
108
109
109 static const char nullid[32] = {0};
110 static const char nullid[32] = {0};
110 static const Py_ssize_t nullrev = -1;
111 static const Py_ssize_t nullrev = -1;
111
112
112 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
113
114
114 static int index_find_node(indexObject *self, const char *node);
115 static int index_find_node(indexObject *self, const char *node);
115
116
116 #if LONG_MAX == 0x7fffffffL
117 #if LONG_MAX == 0x7fffffffL
117 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
118 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
119 #else
120 #else
120 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
121 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
122 #endif
123 #endif
123
124
124 /* A RevlogNG v1 index entry is 64 bytes long. */
125 /* A RevlogNG v1 index entry is 64 bytes long. */
125 static const long v1_hdrsize = 64;
126 static const long v1_hdrsize = 64;
126
127
127 /* A Revlogv2 index entry is 96 bytes long. */
128 /* A Revlogv2 index entry is 96 bytes long. */
128 static const long v2_hdrsize = 96;
129 static const long v2_hdrsize = 96;
129
130
130 static void raise_revlog_error(void)
131 static void raise_revlog_error(void)
131 {
132 {
132 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
133
134
134 mod = PyImport_ImportModule("mercurial.error");
135 mod = PyImport_ImportModule("mercurial.error");
135 if (mod == NULL) {
136 if (mod == NULL) {
136 goto cleanup;
137 goto cleanup;
137 }
138 }
138
139
139 dict = PyModule_GetDict(mod);
140 dict = PyModule_GetDict(mod);
140 if (dict == NULL) {
141 if (dict == NULL) {
141 goto cleanup;
142 goto cleanup;
142 }
143 }
143 Py_INCREF(dict);
144 Py_INCREF(dict);
144
145
145 errclass = PyDict_GetItemString(dict, "RevlogError");
146 errclass = PyDict_GetItemString(dict, "RevlogError");
146 if (errclass == NULL) {
147 if (errclass == NULL) {
147 PyErr_SetString(PyExc_SystemError,
148 PyErr_SetString(PyExc_SystemError,
148 "could not find RevlogError");
149 "could not find RevlogError");
149 goto cleanup;
150 goto cleanup;
150 }
151 }
151
152
152 /* value of exception is ignored by callers */
153 /* value of exception is ignored by callers */
153 PyErr_SetString(errclass, "RevlogError");
154 PyErr_SetString(errclass, "RevlogError");
154
155
155 cleanup:
156 cleanup:
156 Py_XDECREF(dict);
157 Py_XDECREF(dict);
157 Py_XDECREF(mod);
158 Py_XDECREF(mod);
158 }
159 }
159
160
160 /*
161 /*
161 * Return a pointer to the beginning of a RevlogNG record.
162 * Return a pointer to the beginning of a RevlogNG record.
162 */
163 */
163 static const char *index_deref(indexObject *self, Py_ssize_t pos)
164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
164 {
165 {
165 if (pos >= self->length)
166 if (pos >= self->length)
166 return self->added + (pos - self->length) * self->hdrsize;
167 return self->added + (pos - self->length) * self->hdrsize;
167
168
168 if (self->inlined && pos > 0) {
169 if (self->inlined && pos > 0) {
169 if (self->offsets == NULL) {
170 if (self->offsets == NULL) {
170 Py_ssize_t ret;
171 Py_ssize_t ret;
171 self->offsets =
172 self->offsets =
172 PyMem_Malloc(self->length * sizeof(*self->offsets));
173 PyMem_Malloc(self->length * sizeof(*self->offsets));
173 if (self->offsets == NULL)
174 if (self->offsets == NULL)
174 return (const char *)PyErr_NoMemory();
175 return (const char *)PyErr_NoMemory();
175 ret = inline_scan(self, self->offsets);
176 ret = inline_scan(self, self->offsets);
176 if (ret == -1) {
177 if (ret == -1) {
177 return NULL;
178 return NULL;
178 };
179 };
179 }
180 }
180 return self->offsets[pos];
181 return self->offsets[pos];
181 }
182 }
182
183
183 return (const char *)(self->buf.buf) + pos * self->hdrsize;
184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
184 }
185 }
185
186
186 /*
187 /*
187 * Get parents of the given rev.
188 * Get parents of the given rev.
188 *
189 *
189 * The specified rev must be valid and must not be nullrev. A returned
190 * The specified rev must be valid and must not be nullrev. A returned
190 * parent revision may be nullrev, but is guaranteed to be in valid range.
191 * parent revision may be nullrev, but is guaranteed to be in valid range.
191 */
192 */
192 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
193 int maxrev)
194 int maxrev)
194 {
195 {
195 const char *data = index_deref(self, rev);
196 const char *data = index_deref(self, rev);
196
197
197 ps[0] = getbe32(data + 24);
198 ps[0] = getbe32(data + 24);
198 ps[1] = getbe32(data + 28);
199 ps[1] = getbe32(data + 28);
199
200
200 /* If index file is corrupted, ps[] may point to invalid revisions. So
201 /* If index file is corrupted, ps[] may point to invalid revisions. So
201 * there is a risk of buffer overflow to trust them unconditionally. */
202 * there is a risk of buffer overflow to trust them unconditionally. */
202 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
203 PyErr_SetString(PyExc_ValueError, "parent out of range");
204 PyErr_SetString(PyExc_ValueError, "parent out of range");
204 return -1;
205 return -1;
205 }
206 }
206 return 0;
207 return 0;
207 }
208 }
208
209
209 /*
210 /*
210 * Get parents of the given rev.
211 * Get parents of the given rev.
211 *
212 *
212 * If the specified rev is out of range, IndexError will be raised. If the
213 * If the specified rev is out of range, IndexError will be raised. If the
213 * revlog entry is corrupted, ValueError may be raised.
214 * revlog entry is corrupted, ValueError may be raised.
214 *
215 *
215 * Returns 0 on success or -1 on failure.
216 * Returns 0 on success or -1 on failure.
216 */
217 */
217 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
218 {
219 {
219 int tiprev;
220 int tiprev;
220 if (!op || !HgRevlogIndex_Check(op) || !ps) {
221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
221 PyErr_BadInternalCall();
222 PyErr_BadInternalCall();
222 return -1;
223 return -1;
223 }
224 }
224 tiprev = (int)index_length((indexObject *)op) - 1;
225 tiprev = (int)index_length((indexObject *)op) - 1;
225 if (rev < -1 || rev > tiprev) {
226 if (rev < -1 || rev > tiprev) {
226 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
227 return -1;
228 return -1;
228 } else if (rev == -1) {
229 } else if (rev == -1) {
229 ps[0] = ps[1] = -1;
230 ps[0] = ps[1] = -1;
230 return 0;
231 return 0;
231 } else {
232 } else {
232 return index_get_parents((indexObject *)op, rev, ps, tiprev);
233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
233 }
234 }
234 }
235 }
235
236
236 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
237 {
238 {
238 const char *data;
239 const char *data;
239 uint64_t offset;
240 uint64_t offset;
240
241
241 if (rev == nullrev)
242 if (rev == nullrev)
242 return 0;
243 return 0;
243
244
244 data = index_deref(self, rev);
245 data = index_deref(self, rev);
245 offset = getbe32(data + 4);
246 offset = getbe32(data + 4);
246 if (rev == 0) {
247 if (rev == 0) {
247 /* mask out version number for the first entry */
248 /* mask out version number for the first entry */
248 offset &= 0xFFFF;
249 offset &= 0xFFFF;
249 } else {
250 } else {
250 uint32_t offset_high = getbe32(data);
251 uint32_t offset_high = getbe32(data);
251 offset |= ((uint64_t)offset_high) << 32;
252 offset |= ((uint64_t)offset_high) << 32;
252 }
253 }
253 return (int64_t)(offset >> 16);
254 return (int64_t)(offset >> 16);
254 }
255 }
255
256
256 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
257 {
258 {
258 const char *data;
259 const char *data;
259 int tmp;
260 int tmp;
260
261
261 if (rev == nullrev)
262 if (rev == nullrev)
262 return 0;
263 return 0;
263
264
264 data = index_deref(self, rev);
265 data = index_deref(self, rev);
265
266
266 tmp = (int)getbe32(data + 8);
267 tmp = (int)getbe32(data + 8);
267 if (tmp < 0) {
268 if (tmp < 0) {
268 PyErr_Format(PyExc_OverflowError,
269 PyErr_Format(PyExc_OverflowError,
269 "revlog entry size out of bound (%d)", tmp);
270 "revlog entry size out of bound (%d)", tmp);
270 return -1;
271 return -1;
271 }
272 }
272 return tmp;
273 return tmp;
273 }
274 }
274
275
275 /*
276 /*
276 * RevlogNG format (all in big endian, data may be inlined):
277 * RevlogNG format (all in big endian, data may be inlined):
277 * 6 bytes: offset
278 * 6 bytes: offset
278 * 2 bytes: flags
279 * 2 bytes: flags
279 * 4 bytes: compressed length
280 * 4 bytes: compressed length
280 * 4 bytes: uncompressed length
281 * 4 bytes: uncompressed length
281 * 4 bytes: base revision
282 * 4 bytes: base revision
282 * 4 bytes: link revision
283 * 4 bytes: link revision
283 * 4 bytes: parent 1 revision
284 * 4 bytes: parent 1 revision
284 * 4 bytes: parent 2 revision
285 * 4 bytes: parent 2 revision
285 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
286 */
287 */
287 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
288 {
289 {
289 uint64_t offset_flags, sidedata_offset;
290 uint64_t offset_flags, sidedata_offset;
290 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
291 sidedata_comp_len;
292 sidedata_comp_len;
292 const char *c_node_id;
293 const char *c_node_id;
293 const char *data;
294 const char *data;
294 Py_ssize_t length = index_length(self);
295 Py_ssize_t length = index_length(self);
295
296
296 if (pos == nullrev) {
297 if (pos == nullrev) {
297 Py_INCREF(self->nullentry);
298 Py_INCREF(self->nullentry);
298 return self->nullentry;
299 return self->nullentry;
299 }
300 }
300
301
301 if (pos < 0 || pos >= length) {
302 if (pos < 0 || pos >= length) {
302 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
303 return NULL;
304 return NULL;
304 }
305 }
305
306
306 data = index_deref(self, pos);
307 data = index_deref(self, pos);
307 if (data == NULL)
308 if (data == NULL)
308 return NULL;
309 return NULL;
309
310
310 offset_flags = getbe32(data + 4);
311 offset_flags = getbe32(data + 4);
311 /*
312 /*
312 * The first entry on-disk needs the version number masked out,
313 * The first entry on-disk needs the version number masked out,
313 * but this doesn't apply if entries are added to an empty index.
314 * but this doesn't apply if entries are added to an empty index.
314 */
315 */
315 if (self->length && pos == 0)
316 if (self->length && pos == 0)
316 offset_flags &= 0xFFFF;
317 offset_flags &= 0xFFFF;
317 else {
318 else {
318 uint32_t offset_high = getbe32(data);
319 uint32_t offset_high = getbe32(data);
319 offset_flags |= ((uint64_t)offset_high) << 32;
320 offset_flags |= ((uint64_t)offset_high) << 32;
320 }
321 }
321
322
322 comp_len = getbe32(data + 8);
323 comp_len = getbe32(data + 8);
323 uncomp_len = getbe32(data + 12);
324 uncomp_len = getbe32(data + 12);
324 base_rev = getbe32(data + 16);
325 base_rev = getbe32(data + 16);
325 link_rev = getbe32(data + 20);
326 link_rev = getbe32(data + 20);
326 parent_1 = getbe32(data + 24);
327 parent_1 = getbe32(data + 24);
327 parent_2 = getbe32(data + 28);
328 parent_2 = getbe32(data + 28);
328 c_node_id = data + 32;
329 c_node_id = data + 32;
329
330
330 if (self->hdrsize == v1_hdrsize) {
331 if (self->hdrsize == v1_hdrsize) {
331 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
332 uncomp_len, base_rev, link_rev, parent_1,
333 uncomp_len, base_rev, link_rev, parent_1,
333 parent_2, c_node_id, self->nodelen);
334 parent_2, c_node_id, self->nodelen);
334 } else {
335 } else {
335 sidedata_offset = getbe64(data + 64);
336 sidedata_offset = getbe64(data + 64);
336 sidedata_comp_len = getbe32(data + 72);
337 sidedata_comp_len = getbe32(data + 72);
337
338
338 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
339 uncomp_len, base_rev, link_rev, parent_1,
340 uncomp_len, base_rev, link_rev, parent_1,
340 parent_2, c_node_id, self->nodelen,
341 parent_2, c_node_id, self->nodelen,
341 sidedata_offset, sidedata_comp_len);
342 sidedata_offset, sidedata_comp_len);
342 }
343 }
343 }
344 }
344
345
345 /*
346 /*
346 * Return the hash of node corresponding to the given rev.
347 * Return the hash of node corresponding to the given rev.
347 */
348 */
348 static const char *index_node(indexObject *self, Py_ssize_t pos)
349 static const char *index_node(indexObject *self, Py_ssize_t pos)
349 {
350 {
350 Py_ssize_t length = index_length(self);
351 Py_ssize_t length = index_length(self);
351 const char *data;
352 const char *data;
352
353
353 if (pos == nullrev)
354 if (pos == nullrev)
354 return nullid;
355 return nullid;
355
356
356 if (pos >= length)
357 if (pos >= length)
357 return NULL;
358 return NULL;
358
359
359 data = index_deref(self, pos);
360 data = index_deref(self, pos);
360 return data ? data + 32 : NULL;
361 return data ? data + 32 : NULL;
361 }
362 }
362
363
363 /*
364 /*
364 * Return the hash of the node corresponding to the given rev. The
365 * Return the hash of the node corresponding to the given rev. The
365 * rev is assumed to be existing. If not, an exception is set.
366 * rev is assumed to be existing. If not, an exception is set.
366 */
367 */
367 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
368 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
368 {
369 {
369 const char *node = index_node(self, pos);
370 const char *node = index_node(self, pos);
370 if (node == NULL) {
371 if (node == NULL) {
371 PyErr_Format(PyExc_IndexError, "could not access rev %d",
372 PyErr_Format(PyExc_IndexError, "could not access rev %d",
372 (int)pos);
373 (int)pos);
373 }
374 }
374 return node;
375 return node;
375 }
376 }
376
377
377 static int nt_insert(nodetree *self, const char *node, int rev);
378 static int nt_insert(nodetree *self, const char *node, int rev);
378
379
379 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
380 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
380 {
381 {
381 Py_ssize_t thisnodelen;
382 Py_ssize_t thisnodelen;
382 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
383 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
383 return -1;
384 return -1;
384 if (nodelen == thisnodelen)
385 if (nodelen == thisnodelen)
385 return 0;
386 return 0;
386 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
387 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
387 thisnodelen, nodelen);
388 thisnodelen, nodelen);
388 return -1;
389 return -1;
389 }
390 }
390
391
391 static PyObject *index_append(indexObject *self, PyObject *obj)
392 static PyObject *index_append(indexObject *self, PyObject *obj)
392 {
393 {
393 uint64_t offset_flags, sidedata_offset;
394 uint64_t offset_flags, sidedata_offset;
394 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
395 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
395 Py_ssize_t c_node_id_len, sidedata_comp_len;
396 Py_ssize_t c_node_id_len, sidedata_comp_len;
396 const char *c_node_id;
397 const char *c_node_id;
397 char *data;
398 char *data;
398
399
399 if (self->hdrsize == v1_hdrsize) {
400 if (self->hdrsize == v1_hdrsize) {
400 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
401 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
401 &comp_len, &uncomp_len, &base_rev,
402 &comp_len, &uncomp_len, &base_rev,
402 &link_rev, &parent_1, &parent_2,
403 &link_rev, &parent_1, &parent_2,
403 &c_node_id, &c_node_id_len)) {
404 &c_node_id, &c_node_id_len)) {
404 PyErr_SetString(PyExc_TypeError, "8-tuple required");
405 PyErr_SetString(PyExc_TypeError, "8-tuple required");
405 return NULL;
406 return NULL;
406 }
407 }
407 } else {
408 } else {
408 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
409 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
409 &comp_len, &uncomp_len, &base_rev,
410 &comp_len, &uncomp_len, &base_rev,
410 &link_rev, &parent_1, &parent_2,
411 &link_rev, &parent_1, &parent_2,
411 &c_node_id, &c_node_id_len,
412 &c_node_id, &c_node_id_len,
412 &sidedata_offset, &sidedata_comp_len)) {
413 &sidedata_offset, &sidedata_comp_len)) {
413 PyErr_SetString(PyExc_TypeError, "10-tuple required");
414 PyErr_SetString(PyExc_TypeError, "10-tuple required");
414 return NULL;
415 return NULL;
415 }
416 }
416 }
417 }
417
418
418 if (c_node_id_len != self->nodelen) {
419 if (c_node_id_len != self->nodelen) {
419 PyErr_SetString(PyExc_TypeError, "invalid node");
420 PyErr_SetString(PyExc_TypeError, "invalid node");
420 return NULL;
421 return NULL;
421 }
422 }
422
423
423 if (self->new_length == self->added_length) {
424 if (self->new_length == self->added_length) {
424 size_t new_added_length =
425 size_t new_added_length =
425 self->added_length ? self->added_length * 2 : 4096;
426 self->added_length ? self->added_length * 2 : 4096;
426 void *new_added = PyMem_Realloc(self->added, new_added_length *
427 void *new_added = PyMem_Realloc(self->added, new_added_length *
427 self->hdrsize);
428 self->hdrsize);
428 if (!new_added)
429 if (!new_added)
429 return PyErr_NoMemory();
430 return PyErr_NoMemory();
430 self->added = new_added;
431 self->added = new_added;
431 self->added_length = new_added_length;
432 self->added_length = new_added_length;
432 }
433 }
433 rev = self->length + self->new_length;
434 rev = self->length + self->new_length;
434 data = self->added + self->hdrsize * self->new_length++;
435 data = self->added + self->hdrsize * self->new_length++;
435 putbe32(offset_flags >> 32, data);
436 putbe32(offset_flags >> 32, data);
436 putbe32(offset_flags & 0xffffffffU, data + 4);
437 putbe32(offset_flags & 0xffffffffU, data + 4);
437 putbe32(comp_len, data + 8);
438 putbe32(comp_len, data + 8);
438 putbe32(uncomp_len, data + 12);
439 putbe32(uncomp_len, data + 12);
439 putbe32(base_rev, data + 16);
440 putbe32(base_rev, data + 16);
440 putbe32(link_rev, data + 20);
441 putbe32(link_rev, data + 20);
441 putbe32(parent_1, data + 24);
442 putbe32(parent_1, data + 24);
442 putbe32(parent_2, data + 28);
443 putbe32(parent_2, data + 28);
443 memcpy(data + 32, c_node_id, c_node_id_len);
444 memcpy(data + 32, c_node_id, c_node_id_len);
444 /* Padding since SHA-1 is only 20 bytes for now */
445 /* Padding since SHA-1 is only 20 bytes for now */
445 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
446 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
446 if (self->hdrsize != v1_hdrsize) {
447 if (self->hdrsize != v1_hdrsize) {
447 putbe64(sidedata_offset, data + 64);
448 putbe64(sidedata_offset, data + 64);
448 putbe32(sidedata_comp_len, data + 72);
449 putbe32(sidedata_comp_len, data + 72);
449 /* Padding for 96 bytes alignment */
450 /* Padding for 96 bytes alignment */
450 memset(data + 76, 0, self->hdrsize - 76);
451 memset(data + 76, 0, self->hdrsize - 76);
451 }
452 }
452
453
453 if (self->ntinitialized)
454 if (self->ntinitialized)
454 nt_insert(&self->nt, c_node_id, rev);
455 nt_insert(&self->nt, c_node_id, rev);
455
456
456 Py_CLEAR(self->headrevs);
457 Py_CLEAR(self->headrevs);
457 Py_RETURN_NONE;
458 Py_RETURN_NONE;
458 }
459 }
459
460
460 /* Replace an existing index entry's sidedata offset and length with new ones.
461 /* Replace an existing index entry's sidedata offset and length with new ones.
461 This cannot be used outside of the context of sidedata rewriting,
462 This cannot be used outside of the context of sidedata rewriting,
462 inside the transaction that creates the given revision. */
463 inside the transaction that creates the given revision. */
463 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
464 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
464 {
465 {
465 uint64_t sidedata_offset;
466 uint64_t sidedata_offset;
466 int rev;
467 int rev;
467 Py_ssize_t sidedata_comp_len;
468 Py_ssize_t sidedata_comp_len;
468 char *data;
469 char *data;
469 #if LONG_MAX == 0x7fffffffL
470 #if LONG_MAX == 0x7fffffffL
470 const char *const sidedata_format = PY23("nKi", "nKi");
471 const char *const sidedata_format = PY23("nKi", "nKi");
471 #else
472 #else
472 const char *const sidedata_format = PY23("nki", "nki");
473 const char *const sidedata_format = PY23("nki", "nki");
473 #endif
474 #endif
474
475
475 if (self->hdrsize == v1_hdrsize || self->inlined) {
476 if (self->hdrsize == v1_hdrsize || self->inlined) {
476 /*
477 /*
477 There is a bug in the transaction handling when going from an
478 There is a bug in the transaction handling when going from an
478 inline revlog to a separate index and data file. Turn it off until
479 inline revlog to a separate index and data file. Turn it off until
479 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
480 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
480 See issue6485.
481 See issue6485.
481 */
482 */
482 raise_revlog_error();
483 raise_revlog_error();
483 return NULL;
484 return NULL;
484 }
485 }
485
486
486 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
487 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
487 &sidedata_comp_len))
488 &sidedata_comp_len))
488 return NULL;
489 return NULL;
489
490
490 if (rev < 0 || rev >= index_length(self)) {
491 if (rev < 0 || rev >= index_length(self)) {
491 PyErr_SetString(PyExc_IndexError, "revision outside index");
492 PyErr_SetString(PyExc_IndexError, "revision outside index");
492 return NULL;
493 return NULL;
493 }
494 }
494 if (rev < self->length) {
495 if (rev < self->length) {
495 PyErr_SetString(
496 PyErr_SetString(
496 PyExc_IndexError,
497 PyExc_IndexError,
497 "cannot rewrite entries outside of this transaction");
498 "cannot rewrite entries outside of this transaction");
498 return NULL;
499 return NULL;
499 }
500 }
500
501
501 /* Find the newly added node, offset from the "already on-disk" length
502 /* Find the newly added node, offset from the "already on-disk" length
502 */
503 */
503 data = self->added + self->hdrsize * (rev - self->length);
504 data = self->added + self->hdrsize * (rev - self->length);
504 putbe64(sidedata_offset, data + 64);
505 putbe64(sidedata_offset, data + 64);
505 putbe32(sidedata_comp_len, data + 72);
506 putbe32(sidedata_comp_len, data + 72);
506
507
507 Py_RETURN_NONE;
508 Py_RETURN_NONE;
508 }
509 }
509
510
510 static PyObject *index_stats(indexObject *self)
511 static PyObject *index_stats(indexObject *self)
511 {
512 {
512 PyObject *obj = PyDict_New();
513 PyObject *obj = PyDict_New();
513 PyObject *s = NULL;
514 PyObject *s = NULL;
514 PyObject *t = NULL;
515 PyObject *t = NULL;
515
516
516 if (obj == NULL)
517 if (obj == NULL)
517 return NULL;
518 return NULL;
518
519
519 #define istat(__n, __d) \
520 #define istat(__n, __d) \
520 do { \
521 do { \
521 s = PyBytes_FromString(__d); \
522 s = PyBytes_FromString(__d); \
522 t = PyInt_FromSsize_t(self->__n); \
523 t = PyInt_FromSsize_t(self->__n); \
523 if (!s || !t) \
524 if (!s || !t) \
524 goto bail; \
525 goto bail; \
525 if (PyDict_SetItem(obj, s, t) == -1) \
526 if (PyDict_SetItem(obj, s, t) == -1) \
526 goto bail; \
527 goto bail; \
527 Py_CLEAR(s); \
528 Py_CLEAR(s); \
528 Py_CLEAR(t); \
529 Py_CLEAR(t); \
529 } while (0)
530 } while (0)
530
531
531 if (self->added_length)
532 if (self->added_length)
532 istat(new_length, "index entries added");
533 istat(new_length, "index entries added");
533 istat(length, "revs in memory");
534 istat(length, "revs in memory");
534 istat(ntlookups, "node trie lookups");
535 istat(ntlookups, "node trie lookups");
535 istat(ntmisses, "node trie misses");
536 istat(ntmisses, "node trie misses");
536 istat(ntrev, "node trie last rev scanned");
537 istat(ntrev, "node trie last rev scanned");
537 if (self->ntinitialized) {
538 if (self->ntinitialized) {
538 istat(nt.capacity, "node trie capacity");
539 istat(nt.capacity, "node trie capacity");
539 istat(nt.depth, "node trie depth");
540 istat(nt.depth, "node trie depth");
540 istat(nt.length, "node trie count");
541 istat(nt.length, "node trie count");
541 istat(nt.splits, "node trie splits");
542 istat(nt.splits, "node trie splits");
542 }
543 }
543
544
544 #undef istat
545 #undef istat
545
546
546 return obj;
547 return obj;
547
548
548 bail:
549 bail:
549 Py_XDECREF(obj);
550 Py_XDECREF(obj);
550 Py_XDECREF(s);
551 Py_XDECREF(s);
551 Py_XDECREF(t);
552 Py_XDECREF(t);
552 return NULL;
553 return NULL;
553 }
554 }
554
555
555 /*
556 /*
556 * When we cache a list, we want to be sure the caller can't mutate
557 * When we cache a list, we want to be sure the caller can't mutate
557 * the cached copy.
558 * the cached copy.
558 */
559 */
559 static PyObject *list_copy(PyObject *list)
560 static PyObject *list_copy(PyObject *list)
560 {
561 {
561 Py_ssize_t len = PyList_GET_SIZE(list);
562 Py_ssize_t len = PyList_GET_SIZE(list);
562 PyObject *newlist = PyList_New(len);
563 PyObject *newlist = PyList_New(len);
563 Py_ssize_t i;
564 Py_ssize_t i;
564
565
565 if (newlist == NULL)
566 if (newlist == NULL)
566 return NULL;
567 return NULL;
567
568
568 for (i = 0; i < len; i++) {
569 for (i = 0; i < len; i++) {
569 PyObject *obj = PyList_GET_ITEM(list, i);
570 PyObject *obj = PyList_GET_ITEM(list, i);
570 Py_INCREF(obj);
571 Py_INCREF(obj);
571 PyList_SET_ITEM(newlist, i, obj);
572 PyList_SET_ITEM(newlist, i, obj);
572 }
573 }
573
574
574 return newlist;
575 return newlist;
575 }
576 }
576
577
577 static int check_filter(PyObject *filter, Py_ssize_t arg)
578 static int check_filter(PyObject *filter, Py_ssize_t arg)
578 {
579 {
579 if (filter) {
580 if (filter) {
580 PyObject *arglist, *result;
581 PyObject *arglist, *result;
581 int isfiltered;
582 int isfiltered;
582
583
583 arglist = Py_BuildValue("(n)", arg);
584 arglist = Py_BuildValue("(n)", arg);
584 if (!arglist) {
585 if (!arglist) {
585 return -1;
586 return -1;
586 }
587 }
587
588
588 result = PyObject_Call(filter, arglist, NULL);
589 result = PyObject_Call(filter, arglist, NULL);
589 Py_DECREF(arglist);
590 Py_DECREF(arglist);
590 if (!result) {
591 if (!result) {
591 return -1;
592 return -1;
592 }
593 }
593
594
594 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
595 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
595 * same as this function, so we can just return it directly.*/
596 * same as this function, so we can just return it directly.*/
596 isfiltered = PyObject_IsTrue(result);
597 isfiltered = PyObject_IsTrue(result);
597 Py_DECREF(result);
598 Py_DECREF(result);
598 return isfiltered;
599 return isfiltered;
599 } else {
600 } else {
600 return 0;
601 return 0;
601 }
602 }
602 }
603 }
603
604
604 static inline void set_phase_from_parents(char *phases, int parent_1,
605 static inline void set_phase_from_parents(char *phases, int parent_1,
605 int parent_2, Py_ssize_t i)
606 int parent_2, Py_ssize_t i)
606 {
607 {
607 if (parent_1 >= 0 && phases[parent_1] > phases[i])
608 if (parent_1 >= 0 && phases[parent_1] > phases[i])
608 phases[i] = phases[parent_1];
609 phases[i] = phases[parent_1];
609 if (parent_2 >= 0 && phases[parent_2] > phases[i])
610 if (parent_2 >= 0 && phases[parent_2] > phases[i])
610 phases[i] = phases[parent_2];
611 phases[i] = phases[parent_2];
611 }
612 }
612
613
613 static PyObject *reachableroots2(indexObject *self, PyObject *args)
614 static PyObject *reachableroots2(indexObject *self, PyObject *args)
614 {
615 {
615
616
616 /* Input */
617 /* Input */
617 long minroot;
618 long minroot;
618 PyObject *includepatharg = NULL;
619 PyObject *includepatharg = NULL;
619 int includepath = 0;
620 int includepath = 0;
620 /* heads and roots are lists */
621 /* heads and roots are lists */
621 PyObject *heads = NULL;
622 PyObject *heads = NULL;
622 PyObject *roots = NULL;
623 PyObject *roots = NULL;
623 PyObject *reachable = NULL;
624 PyObject *reachable = NULL;
624
625
625 PyObject *val;
626 PyObject *val;
626 Py_ssize_t len = index_length(self);
627 Py_ssize_t len = index_length(self);
627 long revnum;
628 long revnum;
628 Py_ssize_t k;
629 Py_ssize_t k;
629 Py_ssize_t i;
630 Py_ssize_t i;
630 Py_ssize_t l;
631 Py_ssize_t l;
631 int r;
632 int r;
632 int parents[2];
633 int parents[2];
633
634
634 /* Internal data structure:
635 /* Internal data structure:
635 * tovisit: array of length len+1 (all revs + nullrev), filled upto
636 * tovisit: array of length len+1 (all revs + nullrev), filled upto
636 * lentovisit
637 * lentovisit
637 *
638 *
638 * revstates: array of length len+1 (all revs + nullrev) */
639 * revstates: array of length len+1 (all revs + nullrev) */
639 int *tovisit = NULL;
640 int *tovisit = NULL;
640 long lentovisit = 0;
641 long lentovisit = 0;
641 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
642 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
642 char *revstates = NULL;
643 char *revstates = NULL;
643
644
644 /* Get arguments */
645 /* Get arguments */
645 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
646 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
646 &PyList_Type, &roots, &PyBool_Type,
647 &PyList_Type, &roots, &PyBool_Type,
647 &includepatharg))
648 &includepatharg))
648 goto bail;
649 goto bail;
649
650
650 if (includepatharg == Py_True)
651 if (includepatharg == Py_True)
651 includepath = 1;
652 includepath = 1;
652
653
653 /* Initialize return set */
654 /* Initialize return set */
654 reachable = PyList_New(0);
655 reachable = PyList_New(0);
655 if (reachable == NULL)
656 if (reachable == NULL)
656 goto bail;
657 goto bail;
657
658
658 /* Initialize internal datastructures */
659 /* Initialize internal datastructures */
659 tovisit = (int *)malloc((len + 1) * sizeof(int));
660 tovisit = (int *)malloc((len + 1) * sizeof(int));
660 if (tovisit == NULL) {
661 if (tovisit == NULL) {
661 PyErr_NoMemory();
662 PyErr_NoMemory();
662 goto bail;
663 goto bail;
663 }
664 }
664
665
665 revstates = (char *)calloc(len + 1, 1);
666 revstates = (char *)calloc(len + 1, 1);
666 if (revstates == NULL) {
667 if (revstates == NULL) {
667 PyErr_NoMemory();
668 PyErr_NoMemory();
668 goto bail;
669 goto bail;
669 }
670 }
670
671
671 l = PyList_GET_SIZE(roots);
672 l = PyList_GET_SIZE(roots);
672 for (i = 0; i < l; i++) {
673 for (i = 0; i < l; i++) {
673 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
674 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
674 if (revnum == -1 && PyErr_Occurred())
675 if (revnum == -1 && PyErr_Occurred())
675 goto bail;
676 goto bail;
676 /* If root is out of range, e.g. wdir(), it must be unreachable
677 /* If root is out of range, e.g. wdir(), it must be unreachable
677 * from heads. So we can just ignore it. */
678 * from heads. So we can just ignore it. */
678 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
679 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
679 continue;
680 continue;
680 revstates[revnum + 1] |= RS_ROOT;
681 revstates[revnum + 1] |= RS_ROOT;
681 }
682 }
682
683
683 /* Populate tovisit with all the heads */
684 /* Populate tovisit with all the heads */
684 l = PyList_GET_SIZE(heads);
685 l = PyList_GET_SIZE(heads);
685 for (i = 0; i < l; i++) {
686 for (i = 0; i < l; i++) {
686 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
687 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
687 if (revnum == -1 && PyErr_Occurred())
688 if (revnum == -1 && PyErr_Occurred())
688 goto bail;
689 goto bail;
689 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
690 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
690 PyErr_SetString(PyExc_IndexError, "head out of range");
691 PyErr_SetString(PyExc_IndexError, "head out of range");
691 goto bail;
692 goto bail;
692 }
693 }
693 if (!(revstates[revnum + 1] & RS_SEEN)) {
694 if (!(revstates[revnum + 1] & RS_SEEN)) {
694 tovisit[lentovisit++] = (int)revnum;
695 tovisit[lentovisit++] = (int)revnum;
695 revstates[revnum + 1] |= RS_SEEN;
696 revstates[revnum + 1] |= RS_SEEN;
696 }
697 }
697 }
698 }
698
699
699 /* Visit the tovisit list and find the reachable roots */
700 /* Visit the tovisit list and find the reachable roots */
700 k = 0;
701 k = 0;
701 while (k < lentovisit) {
702 while (k < lentovisit) {
702 /* Add the node to reachable if it is a root*/
703 /* Add the node to reachable if it is a root*/
703 revnum = tovisit[k++];
704 revnum = tovisit[k++];
704 if (revstates[revnum + 1] & RS_ROOT) {
705 if (revstates[revnum + 1] & RS_ROOT) {
705 revstates[revnum + 1] |= RS_REACHABLE;
706 revstates[revnum + 1] |= RS_REACHABLE;
706 val = PyInt_FromLong(revnum);
707 val = PyInt_FromLong(revnum);
707 if (val == NULL)
708 if (val == NULL)
708 goto bail;
709 goto bail;
709 r = PyList_Append(reachable, val);
710 r = PyList_Append(reachable, val);
710 Py_DECREF(val);
711 Py_DECREF(val);
711 if (r < 0)
712 if (r < 0)
712 goto bail;
713 goto bail;
713 if (includepath == 0)
714 if (includepath == 0)
714 continue;
715 continue;
715 }
716 }
716
717
717 /* Add its parents to the list of nodes to visit */
718 /* Add its parents to the list of nodes to visit */
718 if (revnum == nullrev)
719 if (revnum == nullrev)
719 continue;
720 continue;
720 r = index_get_parents(self, revnum, parents, (int)len - 1);
721 r = index_get_parents(self, revnum, parents, (int)len - 1);
721 if (r < 0)
722 if (r < 0)
722 goto bail;
723 goto bail;
723 for (i = 0; i < 2; i++) {
724 for (i = 0; i < 2; i++) {
724 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
725 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
725 parents[i] >= minroot) {
726 parents[i] >= minroot) {
726 tovisit[lentovisit++] = parents[i];
727 tovisit[lentovisit++] = parents[i];
727 revstates[parents[i] + 1] |= RS_SEEN;
728 revstates[parents[i] + 1] |= RS_SEEN;
728 }
729 }
729 }
730 }
730 }
731 }
731
732
732 /* Find all the nodes in between the roots we found and the heads
733 /* Find all the nodes in between the roots we found and the heads
733 * and add them to the reachable set */
734 * and add them to the reachable set */
734 if (includepath == 1) {
735 if (includepath == 1) {
735 long minidx = minroot;
736 long minidx = minroot;
736 if (minidx < 0)
737 if (minidx < 0)
737 minidx = 0;
738 minidx = 0;
738 for (i = minidx; i < len; i++) {
739 for (i = minidx; i < len; i++) {
739 if (!(revstates[i + 1] & RS_SEEN))
740 if (!(revstates[i + 1] & RS_SEEN))
740 continue;
741 continue;
741 r = index_get_parents(self, i, parents, (int)len - 1);
742 r = index_get_parents(self, i, parents, (int)len - 1);
742 /* Corrupted index file, error is set from
743 /* Corrupted index file, error is set from
743 * index_get_parents */
744 * index_get_parents */
744 if (r < 0)
745 if (r < 0)
745 goto bail;
746 goto bail;
746 if (((revstates[parents[0] + 1] |
747 if (((revstates[parents[0] + 1] |
747 revstates[parents[1] + 1]) &
748 revstates[parents[1] + 1]) &
748 RS_REACHABLE) &&
749 RS_REACHABLE) &&
749 !(revstates[i + 1] & RS_REACHABLE)) {
750 !(revstates[i + 1] & RS_REACHABLE)) {
750 revstates[i + 1] |= RS_REACHABLE;
751 revstates[i + 1] |= RS_REACHABLE;
751 val = PyInt_FromSsize_t(i);
752 val = PyInt_FromSsize_t(i);
752 if (val == NULL)
753 if (val == NULL)
753 goto bail;
754 goto bail;
754 r = PyList_Append(reachable, val);
755 r = PyList_Append(reachable, val);
755 Py_DECREF(val);
756 Py_DECREF(val);
756 if (r < 0)
757 if (r < 0)
757 goto bail;
758 goto bail;
758 }
759 }
759 }
760 }
760 }
761 }
761
762
762 free(revstates);
763 free(revstates);
763 free(tovisit);
764 free(tovisit);
764 return reachable;
765 return reachable;
765 bail:
766 bail:
766 Py_XDECREF(reachable);
767 Py_XDECREF(reachable);
767 free(revstates);
768 free(revstates);
768 free(tovisit);
769 free(tovisit);
769 return NULL;
770 return NULL;
770 }
771 }
771
772
772 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
773 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
773 char phase)
774 char phase)
774 {
775 {
775 Py_ssize_t len = index_length(self);
776 Py_ssize_t len = index_length(self);
776 PyObject *item;
777 PyObject *item;
777 PyObject *iterator;
778 PyObject *iterator;
778 int rev, minrev = -1;
779 int rev, minrev = -1;
779 char *node;
780 char *node;
780
781
781 if (!PySet_Check(roots)) {
782 if (!PySet_Check(roots)) {
782 PyErr_SetString(PyExc_TypeError,
783 PyErr_SetString(PyExc_TypeError,
783 "roots must be a set of nodes");
784 "roots must be a set of nodes");
784 return -2;
785 return -2;
785 }
786 }
786 iterator = PyObject_GetIter(roots);
787 iterator = PyObject_GetIter(roots);
787 if (iterator == NULL)
788 if (iterator == NULL)
788 return -2;
789 return -2;
789 while ((item = PyIter_Next(iterator))) {
790 while ((item = PyIter_Next(iterator))) {
790 if (node_check(self->nodelen, item, &node) == -1)
791 if (node_check(self->nodelen, item, &node) == -1)
791 goto failed;
792 goto failed;
792 rev = index_find_node(self, node);
793 rev = index_find_node(self, node);
793 /* null is implicitly public, so negative is invalid */
794 /* null is implicitly public, so negative is invalid */
794 if (rev < 0 || rev >= len)
795 if (rev < 0 || rev >= len)
795 goto failed;
796 goto failed;
796 phases[rev] = phase;
797 phases[rev] = phase;
797 if (minrev == -1 || minrev > rev)
798 if (minrev == -1 || minrev > rev)
798 minrev = rev;
799 minrev = rev;
799 Py_DECREF(item);
800 Py_DECREF(item);
800 }
801 }
801 Py_DECREF(iterator);
802 Py_DECREF(iterator);
802 return minrev;
803 return minrev;
803 failed:
804 failed:
804 Py_DECREF(iterator);
805 Py_DECREF(iterator);
805 Py_DECREF(item);
806 Py_DECREF(item);
806 return -2;
807 return -2;
807 }
808 }
808
809
809 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
810 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
810 {
811 {
811 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
812 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
812 96: internal */
813 96: internal */
813 static const char trackedphases[] = {1, 2, 32, 96};
814 static const char trackedphases[] = {1, 2, 32, 96};
814 PyObject *roots = Py_None;
815 PyObject *roots = Py_None;
815 PyObject *phasesetsdict = NULL;
816 PyObject *phasesetsdict = NULL;
816 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
817 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
817 Py_ssize_t len = index_length(self);
818 Py_ssize_t len = index_length(self);
818 char *phases = NULL;
819 char *phases = NULL;
819 int minphaserev = -1, rev, i;
820 int minphaserev = -1, rev, i;
820 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
821 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
821
822
822 if (!PyArg_ParseTuple(args, "O", &roots))
823 if (!PyArg_ParseTuple(args, "O", &roots))
823 return NULL;
824 return NULL;
824 if (roots == NULL || !PyDict_Check(roots)) {
825 if (roots == NULL || !PyDict_Check(roots)) {
825 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
826 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
826 return NULL;
827 return NULL;
827 }
828 }
828
829
829 phases = calloc(len, 1);
830 phases = calloc(len, 1);
830 if (phases == NULL) {
831 if (phases == NULL) {
831 PyErr_NoMemory();
832 PyErr_NoMemory();
832 return NULL;
833 return NULL;
833 }
834 }
834
835
835 for (i = 0; i < numphases; ++i) {
836 for (i = 0; i < numphases; ++i) {
836 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
837 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
837 PyObject *phaseroots = NULL;
838 PyObject *phaseroots = NULL;
838 if (pyphase == NULL)
839 if (pyphase == NULL)
839 goto release;
840 goto release;
840 phaseroots = PyDict_GetItem(roots, pyphase);
841 phaseroots = PyDict_GetItem(roots, pyphase);
841 Py_DECREF(pyphase);
842 Py_DECREF(pyphase);
842 if (phaseroots == NULL)
843 if (phaseroots == NULL)
843 continue;
844 continue;
844 rev = add_roots_get_min(self, phaseroots, phases,
845 rev = add_roots_get_min(self, phaseroots, phases,
845 trackedphases[i]);
846 trackedphases[i]);
846 if (rev == -2)
847 if (rev == -2)
847 goto release;
848 goto release;
848 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
849 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
849 minphaserev = rev;
850 minphaserev = rev;
850 }
851 }
851
852
852 for (i = 0; i < numphases; ++i) {
853 for (i = 0; i < numphases; ++i) {
853 phasesets[i] = PySet_New(NULL);
854 phasesets[i] = PySet_New(NULL);
854 if (phasesets[i] == NULL)
855 if (phasesets[i] == NULL)
855 goto release;
856 goto release;
856 }
857 }
857
858
858 if (minphaserev == -1)
859 if (minphaserev == -1)
859 minphaserev = len;
860 minphaserev = len;
860 for (rev = minphaserev; rev < len; ++rev) {
861 for (rev = minphaserev; rev < len; ++rev) {
861 PyObject *pyphase = NULL;
862 PyObject *pyphase = NULL;
862 PyObject *pyrev = NULL;
863 PyObject *pyrev = NULL;
863 int parents[2];
864 int parents[2];
864 /*
865 /*
865 * The parent lookup could be skipped for phaseroots, but
866 * The parent lookup could be skipped for phaseroots, but
866 * phase --force would historically not recompute them
867 * phase --force would historically not recompute them
867 * correctly, leaving descendents with a lower phase around.
868 * correctly, leaving descendents with a lower phase around.
868 * As such, unconditionally recompute the phase.
869 * As such, unconditionally recompute the phase.
869 */
870 */
870 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
871 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
871 goto release;
872 goto release;
872 set_phase_from_parents(phases, parents[0], parents[1], rev);
873 set_phase_from_parents(phases, parents[0], parents[1], rev);
873 switch (phases[rev]) {
874 switch (phases[rev]) {
874 case 0:
875 case 0:
875 continue;
876 continue;
876 case 1:
877 case 1:
877 pyphase = phasesets[0];
878 pyphase = phasesets[0];
878 break;
879 break;
879 case 2:
880 case 2:
880 pyphase = phasesets[1];
881 pyphase = phasesets[1];
881 break;
882 break;
882 case 32:
883 case 32:
883 pyphase = phasesets[2];
884 pyphase = phasesets[2];
884 break;
885 break;
885 case 96:
886 case 96:
886 pyphase = phasesets[3];
887 pyphase = phasesets[3];
887 break;
888 break;
888 default:
889 default:
889 /* this should never happen since the phase number is
890 /* this should never happen since the phase number is
890 * specified by this function. */
891 * specified by this function. */
891 PyErr_SetString(PyExc_SystemError,
892 PyErr_SetString(PyExc_SystemError,
892 "bad phase number in internal list");
893 "bad phase number in internal list");
893 goto release;
894 goto release;
894 }
895 }
895 pyrev = PyInt_FromLong(rev);
896 pyrev = PyInt_FromLong(rev);
896 if (pyrev == NULL)
897 if (pyrev == NULL)
897 goto release;
898 goto release;
898 if (PySet_Add(pyphase, pyrev) == -1) {
899 if (PySet_Add(pyphase, pyrev) == -1) {
899 Py_DECREF(pyrev);
900 Py_DECREF(pyrev);
900 goto release;
901 goto release;
901 }
902 }
902 Py_DECREF(pyrev);
903 Py_DECREF(pyrev);
903 }
904 }
904
905
905 phasesetsdict = _dict_new_presized(numphases);
906 phasesetsdict = _dict_new_presized(numphases);
906 if (phasesetsdict == NULL)
907 if (phasesetsdict == NULL)
907 goto release;
908 goto release;
908 for (i = 0; i < numphases; ++i) {
909 for (i = 0; i < numphases; ++i) {
909 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
910 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
910 if (pyphase == NULL)
911 if (pyphase == NULL)
911 goto release;
912 goto release;
912 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
913 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
913 -1) {
914 -1) {
914 Py_DECREF(pyphase);
915 Py_DECREF(pyphase);
915 goto release;
916 goto release;
916 }
917 }
917 Py_DECREF(phasesets[i]);
918 Py_DECREF(phasesets[i]);
918 phasesets[i] = NULL;
919 phasesets[i] = NULL;
919 }
920 }
920
921
921 return Py_BuildValue("nN", len, phasesetsdict);
922 return Py_BuildValue("nN", len, phasesetsdict);
922
923
923 release:
924 release:
924 for (i = 0; i < numphases; ++i)
925 for (i = 0; i < numphases; ++i)
925 Py_XDECREF(phasesets[i]);
926 Py_XDECREF(phasesets[i]);
926 Py_XDECREF(phasesetsdict);
927 Py_XDECREF(phasesetsdict);
927
928
928 free(phases);
929 free(phases);
929 return NULL;
930 return NULL;
930 }
931 }
931
932
932 static PyObject *index_headrevs(indexObject *self, PyObject *args)
933 static PyObject *index_headrevs(indexObject *self, PyObject *args)
933 {
934 {
934 Py_ssize_t i, j, len;
935 Py_ssize_t i, j, len;
935 char *nothead = NULL;
936 char *nothead = NULL;
936 PyObject *heads = NULL;
937 PyObject *heads = NULL;
937 PyObject *filter = NULL;
938 PyObject *filter = NULL;
938 PyObject *filteredrevs = Py_None;
939 PyObject *filteredrevs = Py_None;
939
940
940 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
941 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
941 return NULL;
942 return NULL;
942 }
943 }
943
944
944 if (self->headrevs && filteredrevs == self->filteredrevs)
945 if (self->headrevs && filteredrevs == self->filteredrevs)
945 return list_copy(self->headrevs);
946 return list_copy(self->headrevs);
946
947
947 Py_DECREF(self->filteredrevs);
948 Py_DECREF(self->filteredrevs);
948 self->filteredrevs = filteredrevs;
949 self->filteredrevs = filteredrevs;
949 Py_INCREF(filteredrevs);
950 Py_INCREF(filteredrevs);
950
951
951 if (filteredrevs != Py_None) {
952 if (filteredrevs != Py_None) {
952 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
953 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
953 if (!filter) {
954 if (!filter) {
954 PyErr_SetString(
955 PyErr_SetString(
955 PyExc_TypeError,
956 PyExc_TypeError,
956 "filteredrevs has no attribute __contains__");
957 "filteredrevs has no attribute __contains__");
957 goto bail;
958 goto bail;
958 }
959 }
959 }
960 }
960
961
961 len = index_length(self);
962 len = index_length(self);
962 heads = PyList_New(0);
963 heads = PyList_New(0);
963 if (heads == NULL)
964 if (heads == NULL)
964 goto bail;
965 goto bail;
965 if (len == 0) {
966 if (len == 0) {
966 PyObject *nullid = PyInt_FromLong(-1);
967 PyObject *nullid = PyInt_FromLong(-1);
967 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
968 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
968 Py_XDECREF(nullid);
969 Py_XDECREF(nullid);
969 goto bail;
970 goto bail;
970 }
971 }
971 goto done;
972 goto done;
972 }
973 }
973
974
974 nothead = calloc(len, 1);
975 nothead = calloc(len, 1);
975 if (nothead == NULL) {
976 if (nothead == NULL) {
976 PyErr_NoMemory();
977 PyErr_NoMemory();
977 goto bail;
978 goto bail;
978 }
979 }
979
980
980 for (i = len - 1; i >= 0; i--) {
981 for (i = len - 1; i >= 0; i--) {
981 int isfiltered;
982 int isfiltered;
982 int parents[2];
983 int parents[2];
983
984
984 /* If nothead[i] == 1, it means we've seen an unfiltered child
985 /* If nothead[i] == 1, it means we've seen an unfiltered child
985 * of this node already, and therefore this node is not
986 * of this node already, and therefore this node is not
986 * filtered. So we can skip the expensive check_filter step.
987 * filtered. So we can skip the expensive check_filter step.
987 */
988 */
988 if (nothead[i] != 1) {
989 if (nothead[i] != 1) {
989 isfiltered = check_filter(filter, i);
990 isfiltered = check_filter(filter, i);
990 if (isfiltered == -1) {
991 if (isfiltered == -1) {
991 PyErr_SetString(PyExc_TypeError,
992 PyErr_SetString(PyExc_TypeError,
992 "unable to check filter");
993 "unable to check filter");
993 goto bail;
994 goto bail;
994 }
995 }
995
996
996 if (isfiltered) {
997 if (isfiltered) {
997 nothead[i] = 1;
998 nothead[i] = 1;
998 continue;
999 continue;
999 }
1000 }
1000 }
1001 }
1001
1002
1002 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1003 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1003 goto bail;
1004 goto bail;
1004 for (j = 0; j < 2; j++) {
1005 for (j = 0; j < 2; j++) {
1005 if (parents[j] >= 0)
1006 if (parents[j] >= 0)
1006 nothead[parents[j]] = 1;
1007 nothead[parents[j]] = 1;
1007 }
1008 }
1008 }
1009 }
1009
1010
1010 for (i = 0; i < len; i++) {
1011 for (i = 0; i < len; i++) {
1011 PyObject *head;
1012 PyObject *head;
1012
1013
1013 if (nothead[i])
1014 if (nothead[i])
1014 continue;
1015 continue;
1015 head = PyInt_FromSsize_t(i);
1016 head = PyInt_FromSsize_t(i);
1016 if (head == NULL || PyList_Append(heads, head) == -1) {
1017 if (head == NULL || PyList_Append(heads, head) == -1) {
1017 Py_XDECREF(head);
1018 Py_XDECREF(head);
1018 goto bail;
1019 goto bail;
1019 }
1020 }
1020 }
1021 }
1021
1022
1022 done:
1023 done:
1023 self->headrevs = heads;
1024 self->headrevs = heads;
1024 Py_XDECREF(filter);
1025 Py_XDECREF(filter);
1025 free(nothead);
1026 free(nothead);
1026 return list_copy(self->headrevs);
1027 return list_copy(self->headrevs);
1027 bail:
1028 bail:
1028 Py_XDECREF(filter);
1029 Py_XDECREF(filter);
1029 Py_XDECREF(heads);
1030 Py_XDECREF(heads);
1030 free(nothead);
1031 free(nothead);
1031 return NULL;
1032 return NULL;
1032 }
1033 }
1033
1034
1034 /**
1035 /**
1035 * Obtain the base revision index entry.
1036 * Obtain the base revision index entry.
1036 *
1037 *
1037 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1038 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1038 */
1039 */
1039 static inline int index_baserev(indexObject *self, int rev)
1040 static inline int index_baserev(indexObject *self, int rev)
1040 {
1041 {
1041 const char *data;
1042 const char *data;
1042 int result;
1043 int result;
1043
1044
1044 data = index_deref(self, rev);
1045 data = index_deref(self, rev);
1045 if (data == NULL)
1046 if (data == NULL)
1046 return -2;
1047 return -2;
1047 result = getbe32(data + 16);
1048 result = getbe32(data + 16);
1048
1049
1049 if (result > rev) {
1050 if (result > rev) {
1050 PyErr_Format(
1051 PyErr_Format(
1051 PyExc_ValueError,
1052 PyExc_ValueError,
1052 "corrupted revlog, revision base above revision: %d, %d",
1053 "corrupted revlog, revision base above revision: %d, %d",
1053 rev, result);
1054 rev, result);
1054 return -2;
1055 return -2;
1055 }
1056 }
1056 if (result < -1) {
1057 if (result < -1) {
1057 PyErr_Format(
1058 PyErr_Format(
1058 PyExc_ValueError,
1059 PyExc_ValueError,
1059 "corrupted revlog, revision base out of range: %d, %d", rev,
1060 "corrupted revlog, revision base out of range: %d, %d", rev,
1060 result);
1061 result);
1061 return -2;
1062 return -2;
1062 }
1063 }
1063 return result;
1064 return result;
1064 }
1065 }
1065
1066
1066 /**
1067 /**
1067 * Find if a revision is a snapshot or not
1068 * Find if a revision is a snapshot or not
1068 *
1069 *
1069 * Only relevant for sparse-revlog case.
1070 * Only relevant for sparse-revlog case.
1070 * Callers must ensure that rev is in a valid range.
1071 * Callers must ensure that rev is in a valid range.
1071 */
1072 */
1072 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1073 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1073 {
1074 {
1074 int ps[2];
1075 int ps[2];
1075 Py_ssize_t base;
1076 Py_ssize_t base;
1076 while (rev >= 0) {
1077 while (rev >= 0) {
1077 base = (Py_ssize_t)index_baserev(self, rev);
1078 base = (Py_ssize_t)index_baserev(self, rev);
1078 if (base == rev) {
1079 if (base == rev) {
1079 base = -1;
1080 base = -1;
1080 }
1081 }
1081 if (base == -2) {
1082 if (base == -2) {
1082 assert(PyErr_Occurred());
1083 assert(PyErr_Occurred());
1083 return -1;
1084 return -1;
1084 }
1085 }
1085 if (base == -1) {
1086 if (base == -1) {
1086 return 1;
1087 return 1;
1087 }
1088 }
1088 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1089 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1089 assert(PyErr_Occurred());
1090 assert(PyErr_Occurred());
1090 return -1;
1091 return -1;
1091 };
1092 };
1092 if (base == ps[0] || base == ps[1]) {
1093 if (base == ps[0] || base == ps[1]) {
1093 return 0;
1094 return 0;
1094 }
1095 }
1095 rev = base;
1096 rev = base;
1096 }
1097 }
1097 return rev == -1;
1098 return rev == -1;
1098 }
1099 }
1099
1100
1100 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1101 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1101 {
1102 {
1102 long rev;
1103 long rev;
1103 int issnap;
1104 int issnap;
1104 Py_ssize_t length = index_length(self);
1105 Py_ssize_t length = index_length(self);
1105
1106
1106 if (!pylong_to_long(value, &rev)) {
1107 if (!pylong_to_long(value, &rev)) {
1107 return NULL;
1108 return NULL;
1108 }
1109 }
1109 if (rev < -1 || rev >= length) {
1110 if (rev < -1 || rev >= length) {
1110 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1111 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1111 rev);
1112 rev);
1112 return NULL;
1113 return NULL;
1113 };
1114 };
1114 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1115 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1115 if (issnap < 0) {
1116 if (issnap < 0) {
1116 return NULL;
1117 return NULL;
1117 };
1118 };
1118 return PyBool_FromLong((long)issnap);
1119 return PyBool_FromLong((long)issnap);
1119 }
1120 }
1120
1121
1121 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1122 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1122 {
1123 {
1123 Py_ssize_t start_rev;
1124 Py_ssize_t start_rev;
1124 PyObject *cache;
1125 PyObject *cache;
1125 Py_ssize_t base;
1126 Py_ssize_t base;
1126 Py_ssize_t rev;
1127 Py_ssize_t rev;
1127 PyObject *key = NULL;
1128 PyObject *key = NULL;
1128 PyObject *value = NULL;
1129 PyObject *value = NULL;
1129 const Py_ssize_t length = index_length(self);
1130 const Py_ssize_t length = index_length(self);
1130 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1131 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1131 return NULL;
1132 return NULL;
1132 }
1133 }
1133 for (rev = start_rev; rev < length; rev++) {
1134 for (rev = start_rev; rev < length; rev++) {
1134 int issnap;
1135 int issnap;
1135 PyObject *allvalues = NULL;
1136 PyObject *allvalues = NULL;
1136 issnap = index_issnapshotrev(self, rev);
1137 issnap = index_issnapshotrev(self, rev);
1137 if (issnap < 0) {
1138 if (issnap < 0) {
1138 goto bail;
1139 goto bail;
1139 }
1140 }
1140 if (issnap == 0) {
1141 if (issnap == 0) {
1141 continue;
1142 continue;
1142 }
1143 }
1143 base = (Py_ssize_t)index_baserev(self, rev);
1144 base = (Py_ssize_t)index_baserev(self, rev);
1144 if (base == rev) {
1145 if (base == rev) {
1145 base = -1;
1146 base = -1;
1146 }
1147 }
1147 if (base == -2) {
1148 if (base == -2) {
1148 assert(PyErr_Occurred());
1149 assert(PyErr_Occurred());
1149 goto bail;
1150 goto bail;
1150 }
1151 }
1151 key = PyInt_FromSsize_t(base);
1152 key = PyInt_FromSsize_t(base);
1152 allvalues = PyDict_GetItem(cache, key);
1153 allvalues = PyDict_GetItem(cache, key);
1153 if (allvalues == NULL && PyErr_Occurred()) {
1154 if (allvalues == NULL && PyErr_Occurred()) {
1154 goto bail;
1155 goto bail;
1155 }
1156 }
1156 if (allvalues == NULL) {
1157 if (allvalues == NULL) {
1157 int r;
1158 int r;
1158 allvalues = PyList_New(0);
1159 allvalues = PyList_New(0);
1159 if (!allvalues) {
1160 if (!allvalues) {
1160 goto bail;
1161 goto bail;
1161 }
1162 }
1162 r = PyDict_SetItem(cache, key, allvalues);
1163 r = PyDict_SetItem(cache, key, allvalues);
1163 Py_DECREF(allvalues);
1164 Py_DECREF(allvalues);
1164 if (r < 0) {
1165 if (r < 0) {
1165 goto bail;
1166 goto bail;
1166 }
1167 }
1167 }
1168 }
1168 value = PyInt_FromSsize_t(rev);
1169 value = PyInt_FromSsize_t(rev);
1169 if (PyList_Append(allvalues, value)) {
1170 if (PyList_Append(allvalues, value)) {
1170 goto bail;
1171 goto bail;
1171 }
1172 }
1172 Py_CLEAR(key);
1173 Py_CLEAR(key);
1173 Py_CLEAR(value);
1174 Py_CLEAR(value);
1174 }
1175 }
1175 Py_RETURN_NONE;
1176 Py_RETURN_NONE;
1176 bail:
1177 bail:
1177 Py_XDECREF(key);
1178 Py_XDECREF(key);
1178 Py_XDECREF(value);
1179 Py_XDECREF(value);
1179 return NULL;
1180 return NULL;
1180 }
1181 }
1181
1182
1182 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1183 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1183 {
1184 {
1184 int rev, generaldelta;
1185 int rev, generaldelta;
1185 PyObject *stoparg;
1186 PyObject *stoparg;
1186 int stoprev, iterrev, baserev = -1;
1187 int stoprev, iterrev, baserev = -1;
1187 int stopped;
1188 int stopped;
1188 PyObject *chain = NULL, *result = NULL;
1189 PyObject *chain = NULL, *result = NULL;
1189 const Py_ssize_t length = index_length(self);
1190 const Py_ssize_t length = index_length(self);
1190
1191
1191 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1192 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1192 return NULL;
1193 return NULL;
1193 }
1194 }
1194
1195
1195 if (PyInt_Check(stoparg)) {
1196 if (PyInt_Check(stoparg)) {
1196 stoprev = (int)PyInt_AsLong(stoparg);
1197 stoprev = (int)PyInt_AsLong(stoparg);
1197 if (stoprev == -1 && PyErr_Occurred()) {
1198 if (stoprev == -1 && PyErr_Occurred()) {
1198 return NULL;
1199 return NULL;
1199 }
1200 }
1200 } else if (stoparg == Py_None) {
1201 } else if (stoparg == Py_None) {
1201 stoprev = -2;
1202 stoprev = -2;
1202 } else {
1203 } else {
1203 PyErr_SetString(PyExc_ValueError,
1204 PyErr_SetString(PyExc_ValueError,
1204 "stoprev must be integer or None");
1205 "stoprev must be integer or None");
1205 return NULL;
1206 return NULL;
1206 }
1207 }
1207
1208
1208 if (rev < 0 || rev >= length) {
1209 if (rev < 0 || rev >= length) {
1209 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1210 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1210 return NULL;
1211 return NULL;
1211 }
1212 }
1212
1213
1213 chain = PyList_New(0);
1214 chain = PyList_New(0);
1214 if (chain == NULL) {
1215 if (chain == NULL) {
1215 return NULL;
1216 return NULL;
1216 }
1217 }
1217
1218
1218 baserev = index_baserev(self, rev);
1219 baserev = index_baserev(self, rev);
1219
1220
1220 /* This should never happen. */
1221 /* This should never happen. */
1221 if (baserev <= -2) {
1222 if (baserev <= -2) {
1222 /* Error should be set by index_deref() */
1223 /* Error should be set by index_deref() */
1223 assert(PyErr_Occurred());
1224 assert(PyErr_Occurred());
1224 goto bail;
1225 goto bail;
1225 }
1226 }
1226
1227
1227 iterrev = rev;
1228 iterrev = rev;
1228
1229
1229 while (iterrev != baserev && iterrev != stoprev) {
1230 while (iterrev != baserev && iterrev != stoprev) {
1230 PyObject *value = PyInt_FromLong(iterrev);
1231 PyObject *value = PyInt_FromLong(iterrev);
1231 if (value == NULL) {
1232 if (value == NULL) {
1232 goto bail;
1233 goto bail;
1233 }
1234 }
1234 if (PyList_Append(chain, value)) {
1235 if (PyList_Append(chain, value)) {
1235 Py_DECREF(value);
1236 Py_DECREF(value);
1236 goto bail;
1237 goto bail;
1237 }
1238 }
1238 Py_DECREF(value);
1239 Py_DECREF(value);
1239
1240
1240 if (generaldelta) {
1241 if (generaldelta) {
1241 iterrev = baserev;
1242 iterrev = baserev;
1242 } else {
1243 } else {
1243 iterrev--;
1244 iterrev--;
1244 }
1245 }
1245
1246
1246 if (iterrev < 0) {
1247 if (iterrev < 0) {
1247 break;
1248 break;
1248 }
1249 }
1249
1250
1250 if (iterrev >= length) {
1251 if (iterrev >= length) {
1251 PyErr_SetString(PyExc_IndexError,
1252 PyErr_SetString(PyExc_IndexError,
1252 "revision outside index");
1253 "revision outside index");
1253 return NULL;
1254 return NULL;
1254 }
1255 }
1255
1256
1256 baserev = index_baserev(self, iterrev);
1257 baserev = index_baserev(self, iterrev);
1257
1258
1258 /* This should never happen. */
1259 /* This should never happen. */
1259 if (baserev <= -2) {
1260 if (baserev <= -2) {
1260 /* Error should be set by index_deref() */
1261 /* Error should be set by index_deref() */
1261 assert(PyErr_Occurred());
1262 assert(PyErr_Occurred());
1262 goto bail;
1263 goto bail;
1263 }
1264 }
1264 }
1265 }
1265
1266
1266 if (iterrev == stoprev) {
1267 if (iterrev == stoprev) {
1267 stopped = 1;
1268 stopped = 1;
1268 } else {
1269 } else {
1269 PyObject *value = PyInt_FromLong(iterrev);
1270 PyObject *value = PyInt_FromLong(iterrev);
1270 if (value == NULL) {
1271 if (value == NULL) {
1271 goto bail;
1272 goto bail;
1272 }
1273 }
1273 if (PyList_Append(chain, value)) {
1274 if (PyList_Append(chain, value)) {
1274 Py_DECREF(value);
1275 Py_DECREF(value);
1275 goto bail;
1276 goto bail;
1276 }
1277 }
1277 Py_DECREF(value);
1278 Py_DECREF(value);
1278
1279
1279 stopped = 0;
1280 stopped = 0;
1280 }
1281 }
1281
1282
1282 if (PyList_Reverse(chain)) {
1283 if (PyList_Reverse(chain)) {
1283 goto bail;
1284 goto bail;
1284 }
1285 }
1285
1286
1286 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1287 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1287 Py_DECREF(chain);
1288 Py_DECREF(chain);
1288 return result;
1289 return result;
1289
1290
1290 bail:
1291 bail:
1291 Py_DECREF(chain);
1292 Py_DECREF(chain);
1292 return NULL;
1293 return NULL;
1293 }
1294 }
1294
1295
1295 static inline int64_t
1296 static inline int64_t
1296 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1297 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1297 {
1298 {
1298 int64_t start_offset;
1299 int64_t start_offset;
1299 int64_t end_offset;
1300 int64_t end_offset;
1300 int end_size;
1301 int end_size;
1301 start_offset = index_get_start(self, start_rev);
1302 start_offset = index_get_start(self, start_rev);
1302 if (start_offset < 0) {
1303 if (start_offset < 0) {
1303 return -1;
1304 return -1;
1304 }
1305 }
1305 end_offset = index_get_start(self, end_rev);
1306 end_offset = index_get_start(self, end_rev);
1306 if (end_offset < 0) {
1307 if (end_offset < 0) {
1307 return -1;
1308 return -1;
1308 }
1309 }
1309 end_size = index_get_length(self, end_rev);
1310 end_size = index_get_length(self, end_rev);
1310 if (end_size < 0) {
1311 if (end_size < 0) {
1311 return -1;
1312 return -1;
1312 }
1313 }
1313 if (end_offset < start_offset) {
1314 if (end_offset < start_offset) {
1314 PyErr_Format(PyExc_ValueError,
1315 PyErr_Format(PyExc_ValueError,
1315 "corrupted revlog index: inconsistent offset "
1316 "corrupted revlog index: inconsistent offset "
1316 "between revisions (%zd) and (%zd)",
1317 "between revisions (%zd) and (%zd)",
1317 start_rev, end_rev);
1318 start_rev, end_rev);
1318 return -1;
1319 return -1;
1319 }
1320 }
1320 return (end_offset - start_offset) + (int64_t)end_size;
1321 return (end_offset - start_offset) + (int64_t)end_size;
1321 }
1322 }
1322
1323
1323 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1324 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1324 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1325 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1325 Py_ssize_t startidx, Py_ssize_t endidx)
1326 Py_ssize_t startidx, Py_ssize_t endidx)
1326 {
1327 {
1327 int length;
1328 int length;
1328 while (endidx > 1 && endidx > startidx) {
1329 while (endidx > 1 && endidx > startidx) {
1329 length = index_get_length(self, revs[endidx - 1]);
1330 length = index_get_length(self, revs[endidx - 1]);
1330 if (length < 0) {
1331 if (length < 0) {
1331 return -1;
1332 return -1;
1332 }
1333 }
1333 if (length != 0) {
1334 if (length != 0) {
1334 break;
1335 break;
1335 }
1336 }
1336 endidx -= 1;
1337 endidx -= 1;
1337 }
1338 }
1338 return endidx;
1339 return endidx;
1339 }
1340 }
1340
1341
1341 struct Gap {
1342 struct Gap {
1342 int64_t size;
1343 int64_t size;
1343 Py_ssize_t idx;
1344 Py_ssize_t idx;
1344 };
1345 };
1345
1346
1346 static int gap_compare(const void *left, const void *right)
1347 static int gap_compare(const void *left, const void *right)
1347 {
1348 {
1348 const struct Gap *l_left = ((const struct Gap *)left);
1349 const struct Gap *l_left = ((const struct Gap *)left);
1349 const struct Gap *l_right = ((const struct Gap *)right);
1350 const struct Gap *l_right = ((const struct Gap *)right);
1350 if (l_left->size < l_right->size) {
1351 if (l_left->size < l_right->size) {
1351 return -1;
1352 return -1;
1352 } else if (l_left->size > l_right->size) {
1353 } else if (l_left->size > l_right->size) {
1353 return 1;
1354 return 1;
1354 }
1355 }
1355 return 0;
1356 return 0;
1356 }
1357 }
1357 static int Py_ssize_t_compare(const void *left, const void *right)
1358 static int Py_ssize_t_compare(const void *left, const void *right)
1358 {
1359 {
1359 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1360 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1360 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1361 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1361 if (l_left < l_right) {
1362 if (l_left < l_right) {
1362 return -1;
1363 return -1;
1363 } else if (l_left > l_right) {
1364 } else if (l_left > l_right) {
1364 return 1;
1365 return 1;
1365 }
1366 }
1366 return 0;
1367 return 0;
1367 }
1368 }
1368
1369
1369 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1370 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1370 {
1371 {
1371 /* method arguments */
1372 /* method arguments */
1372 PyObject *list_revs = NULL; /* revisions in the chain */
1373 PyObject *list_revs = NULL; /* revisions in the chain */
1373 double targetdensity = 0; /* min density to achieve */
1374 double targetdensity = 0; /* min density to achieve */
1374 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1375 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1375
1376
1376 /* other core variables */
1377 /* other core variables */
1377 Py_ssize_t idxlen = index_length(self);
1378 Py_ssize_t idxlen = index_length(self);
1378 Py_ssize_t i; /* used for various iteration */
1379 Py_ssize_t i; /* used for various iteration */
1379 PyObject *result = NULL; /* the final return of the function */
1380 PyObject *result = NULL; /* the final return of the function */
1380
1381
1381 /* generic information about the delta chain being slice */
1382 /* generic information about the delta chain being slice */
1382 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1383 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1383 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1384 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1384 int64_t chainpayload = 0; /* sum of all delta in the chain */
1385 int64_t chainpayload = 0; /* sum of all delta in the chain */
1385 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1386 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1386
1387
1387 /* variable used for slicing the delta chain */
1388 /* variable used for slicing the delta chain */
1388 int64_t readdata = 0; /* amount of data currently planned to be read */
1389 int64_t readdata = 0; /* amount of data currently planned to be read */
1389 double density = 0; /* ration of payload data compared to read ones */
1390 double density = 0; /* ration of payload data compared to read ones */
1390 int64_t previous_end;
1391 int64_t previous_end;
1391 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1392 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1392 Py_ssize_t num_gaps =
1393 Py_ssize_t num_gaps =
1393 0; /* total number of notable gap recorded so far */
1394 0; /* total number of notable gap recorded so far */
1394 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1395 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1395 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1396 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1396 PyObject *chunk = NULL; /* individual slice */
1397 PyObject *chunk = NULL; /* individual slice */
1397 PyObject *allchunks = NULL; /* all slices */
1398 PyObject *allchunks = NULL; /* all slices */
1398 Py_ssize_t previdx;
1399 Py_ssize_t previdx;
1399
1400
1400 /* parsing argument */
1401 /* parsing argument */
1401 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1402 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1402 &targetdensity, &mingapsize)) {
1403 &targetdensity, &mingapsize)) {
1403 goto bail;
1404 goto bail;
1404 }
1405 }
1405
1406
1406 /* If the delta chain contains a single element, we do not need slicing
1407 /* If the delta chain contains a single element, we do not need slicing
1407 */
1408 */
1408 num_revs = PyList_GET_SIZE(list_revs);
1409 num_revs = PyList_GET_SIZE(list_revs);
1409 if (num_revs <= 1) {
1410 if (num_revs <= 1) {
1410 result = PyTuple_Pack(1, list_revs);
1411 result = PyTuple_Pack(1, list_revs);
1411 goto done;
1412 goto done;
1412 }
1413 }
1413
1414
1414 /* Turn the python list into a native integer array (for efficiency) */
1415 /* Turn the python list into a native integer array (for efficiency) */
1415 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1416 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1416 if (revs == NULL) {
1417 if (revs == NULL) {
1417 PyErr_NoMemory();
1418 PyErr_NoMemory();
1418 goto bail;
1419 goto bail;
1419 }
1420 }
1420 for (i = 0; i < num_revs; i++) {
1421 for (i = 0; i < num_revs; i++) {
1421 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1422 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1422 if (revnum == -1 && PyErr_Occurred()) {
1423 if (revnum == -1 && PyErr_Occurred()) {
1423 goto bail;
1424 goto bail;
1424 }
1425 }
1425 if (revnum < nullrev || revnum >= idxlen) {
1426 if (revnum < nullrev || revnum >= idxlen) {
1426 PyErr_Format(PyExc_IndexError,
1427 PyErr_Format(PyExc_IndexError,
1427 "index out of range: %zd", revnum);
1428 "index out of range: %zd", revnum);
1428 goto bail;
1429 goto bail;
1429 }
1430 }
1430 revs[i] = revnum;
1431 revs[i] = revnum;
1431 }
1432 }
1432
1433
1433 /* Compute and check various property of the unsliced delta chain */
1434 /* Compute and check various property of the unsliced delta chain */
1434 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1435 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1435 if (deltachainspan < 0) {
1436 if (deltachainspan < 0) {
1436 goto bail;
1437 goto bail;
1437 }
1438 }
1438
1439
1439 if (deltachainspan <= mingapsize) {
1440 if (deltachainspan <= mingapsize) {
1440 result = PyTuple_Pack(1, list_revs);
1441 result = PyTuple_Pack(1, list_revs);
1441 goto done;
1442 goto done;
1442 }
1443 }
1443 chainpayload = 0;
1444 chainpayload = 0;
1444 for (i = 0; i < num_revs; i++) {
1445 for (i = 0; i < num_revs; i++) {
1445 int tmp = index_get_length(self, revs[i]);
1446 int tmp = index_get_length(self, revs[i]);
1446 if (tmp < 0) {
1447 if (tmp < 0) {
1447 goto bail;
1448 goto bail;
1448 }
1449 }
1449 chainpayload += tmp;
1450 chainpayload += tmp;
1450 }
1451 }
1451
1452
1452 readdata = deltachainspan;
1453 readdata = deltachainspan;
1453 density = 1.0;
1454 density = 1.0;
1454
1455
1455 if (0 < deltachainspan) {
1456 if (0 < deltachainspan) {
1456 density = (double)chainpayload / (double)deltachainspan;
1457 density = (double)chainpayload / (double)deltachainspan;
1457 }
1458 }
1458
1459
1459 if (density >= targetdensity) {
1460 if (density >= targetdensity) {
1460 result = PyTuple_Pack(1, list_revs);
1461 result = PyTuple_Pack(1, list_revs);
1461 goto done;
1462 goto done;
1462 }
1463 }
1463
1464
1464 /* if chain is too sparse, look for relevant gaps */
1465 /* if chain is too sparse, look for relevant gaps */
1465 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1466 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1466 if (gaps == NULL) {
1467 if (gaps == NULL) {
1467 PyErr_NoMemory();
1468 PyErr_NoMemory();
1468 goto bail;
1469 goto bail;
1469 }
1470 }
1470
1471
1471 previous_end = -1;
1472 previous_end = -1;
1472 for (i = 0; i < num_revs; i++) {
1473 for (i = 0; i < num_revs; i++) {
1473 int64_t revstart;
1474 int64_t revstart;
1474 int revsize;
1475 int revsize;
1475 revstart = index_get_start(self, revs[i]);
1476 revstart = index_get_start(self, revs[i]);
1476 if (revstart < 0) {
1477 if (revstart < 0) {
1477 goto bail;
1478 goto bail;
1478 };
1479 };
1479 revsize = index_get_length(self, revs[i]);
1480 revsize = index_get_length(self, revs[i]);
1480 if (revsize < 0) {
1481 if (revsize < 0) {
1481 goto bail;
1482 goto bail;
1482 };
1483 };
1483 if (revsize == 0) {
1484 if (revsize == 0) {
1484 continue;
1485 continue;
1485 }
1486 }
1486 if (previous_end >= 0) {
1487 if (previous_end >= 0) {
1487 int64_t gapsize = revstart - previous_end;
1488 int64_t gapsize = revstart - previous_end;
1488 if (gapsize > mingapsize) {
1489 if (gapsize > mingapsize) {
1489 gaps[num_gaps].size = gapsize;
1490 gaps[num_gaps].size = gapsize;
1490 gaps[num_gaps].idx = i;
1491 gaps[num_gaps].idx = i;
1491 num_gaps += 1;
1492 num_gaps += 1;
1492 }
1493 }
1493 }
1494 }
1494 previous_end = revstart + revsize;
1495 previous_end = revstart + revsize;
1495 }
1496 }
1496 if (num_gaps == 0) {
1497 if (num_gaps == 0) {
1497 result = PyTuple_Pack(1, list_revs);
1498 result = PyTuple_Pack(1, list_revs);
1498 goto done;
1499 goto done;
1499 }
1500 }
1500 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1501 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1501
1502
1502 /* Slice the largest gap first, they improve the density the most */
1503 /* Slice the largest gap first, they improve the density the most */
1503 selected_indices =
1504 selected_indices =
1504 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1505 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1505 if (selected_indices == NULL) {
1506 if (selected_indices == NULL) {
1506 PyErr_NoMemory();
1507 PyErr_NoMemory();
1507 goto bail;
1508 goto bail;
1508 }
1509 }
1509
1510
1510 for (i = num_gaps - 1; i >= 0; i--) {
1511 for (i = num_gaps - 1; i >= 0; i--) {
1511 selected_indices[num_selected] = gaps[i].idx;
1512 selected_indices[num_selected] = gaps[i].idx;
1512 readdata -= gaps[i].size;
1513 readdata -= gaps[i].size;
1513 num_selected += 1;
1514 num_selected += 1;
1514 if (readdata <= 0) {
1515 if (readdata <= 0) {
1515 density = 1.0;
1516 density = 1.0;
1516 } else {
1517 } else {
1517 density = (double)chainpayload / (double)readdata;
1518 density = (double)chainpayload / (double)readdata;
1518 }
1519 }
1519 if (density >= targetdensity) {
1520 if (density >= targetdensity) {
1520 break;
1521 break;
1521 }
1522 }
1522 }
1523 }
1523 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1524 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1524 &Py_ssize_t_compare);
1525 &Py_ssize_t_compare);
1525
1526
1526 /* create the resulting slice */
1527 /* create the resulting slice */
1527 allchunks = PyList_New(0);
1528 allchunks = PyList_New(0);
1528 if (allchunks == NULL) {
1529 if (allchunks == NULL) {
1529 goto bail;
1530 goto bail;
1530 }
1531 }
1531 previdx = 0;
1532 previdx = 0;
1532 selected_indices[num_selected] = num_revs;
1533 selected_indices[num_selected] = num_revs;
1533 for (i = 0; i <= num_selected; i++) {
1534 for (i = 0; i <= num_selected; i++) {
1534 Py_ssize_t idx = selected_indices[i];
1535 Py_ssize_t idx = selected_indices[i];
1535 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1536 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1536 if (endidx < 0) {
1537 if (endidx < 0) {
1537 goto bail;
1538 goto bail;
1538 }
1539 }
1539 if (previdx < endidx) {
1540 if (previdx < endidx) {
1540 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1541 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1541 if (chunk == NULL) {
1542 if (chunk == NULL) {
1542 goto bail;
1543 goto bail;
1543 }
1544 }
1544 if (PyList_Append(allchunks, chunk) == -1) {
1545 if (PyList_Append(allchunks, chunk) == -1) {
1545 goto bail;
1546 goto bail;
1546 }
1547 }
1547 Py_DECREF(chunk);
1548 Py_DECREF(chunk);
1548 chunk = NULL;
1549 chunk = NULL;
1549 }
1550 }
1550 previdx = idx;
1551 previdx = idx;
1551 }
1552 }
1552 result = allchunks;
1553 result = allchunks;
1553 goto done;
1554 goto done;
1554
1555
1555 bail:
1556 bail:
1556 Py_XDECREF(allchunks);
1557 Py_XDECREF(allchunks);
1557 Py_XDECREF(chunk);
1558 Py_XDECREF(chunk);
1558 done:
1559 done:
1559 free(revs);
1560 free(revs);
1560 free(gaps);
1561 free(gaps);
1561 free(selected_indices);
1562 free(selected_indices);
1562 return result;
1563 return result;
1563 }
1564 }
1564
1565
1565 static inline int nt_level(const char *node, Py_ssize_t level)
1566 static inline int nt_level(const char *node, Py_ssize_t level)
1566 {
1567 {
1567 int v = node[level >> 1];
1568 int v = node[level >> 1];
1568 if (!(level & 1))
1569 if (!(level & 1))
1569 v >>= 4;
1570 v >>= 4;
1570 return v & 0xf;
1571 return v & 0xf;
1571 }
1572 }
1572
1573
1573 /*
1574 /*
1574 * Return values:
1575 * Return values:
1575 *
1576 *
1576 * -4: match is ambiguous (multiple candidates)
1577 * -4: match is ambiguous (multiple candidates)
1577 * -2: not found
1578 * -2: not found
1578 * rest: valid rev
1579 * rest: valid rev
1579 */
1580 */
1580 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1581 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1581 int hex)
1582 int hex)
1582 {
1583 {
1583 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1584 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1584 int level, maxlevel, off;
1585 int level, maxlevel, off;
1585
1586
1586 /* If the input is binary, do a fast check for the nullid first. */
1587 /* If the input is binary, do a fast check for the nullid first. */
1587 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1588 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1588 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1589 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1589 return -1;
1590 return -1;
1590
1591
1591 if (hex)
1592 if (hex)
1592 maxlevel = nodelen;
1593 maxlevel = nodelen;
1593 else
1594 else
1594 maxlevel = 2 * nodelen;
1595 maxlevel = 2 * nodelen;
1595 if (maxlevel > 2 * self->nodelen)
1596 if (maxlevel > 2 * self->nodelen)
1596 maxlevel = 2 * self->nodelen;
1597 maxlevel = 2 * self->nodelen;
1597
1598
1598 for (level = off = 0; level < maxlevel; level++) {
1599 for (level = off = 0; level < maxlevel; level++) {
1599 int k = getnybble(node, level);
1600 int k = getnybble(node, level);
1600 nodetreenode *n = &self->nodes[off];
1601 nodetreenode *n = &self->nodes[off];
1601 int v = n->children[k];
1602 int v = n->children[k];
1602
1603
1603 if (v < 0) {
1604 if (v < 0) {
1604 const char *n;
1605 const char *n;
1605 Py_ssize_t i;
1606 Py_ssize_t i;
1606
1607
1607 v = -(v + 2);
1608 v = -(v + 2);
1608 n = index_node(self->index, v);
1609 n = index_node(self->index, v);
1609 if (n == NULL)
1610 if (n == NULL)
1610 return -2;
1611 return -2;
1611 for (i = level; i < maxlevel; i++)
1612 for (i = level; i < maxlevel; i++)
1612 if (getnybble(node, i) != nt_level(n, i))
1613 if (getnybble(node, i) != nt_level(n, i))
1613 return -2;
1614 return -2;
1614 return v;
1615 return v;
1615 }
1616 }
1616 if (v == 0)
1617 if (v == 0)
1617 return -2;
1618 return -2;
1618 off = v;
1619 off = v;
1619 }
1620 }
1620 /* multiple matches against an ambiguous prefix */
1621 /* multiple matches against an ambiguous prefix */
1621 return -4;
1622 return -4;
1622 }
1623 }
1623
1624
1624 static int nt_new(nodetree *self)
1625 static int nt_new(nodetree *self)
1625 {
1626 {
1626 if (self->length == self->capacity) {
1627 if (self->length == self->capacity) {
1627 size_t newcapacity;
1628 size_t newcapacity;
1628 nodetreenode *newnodes;
1629 nodetreenode *newnodes;
1629 newcapacity = self->capacity * 2;
1630 newcapacity = self->capacity * 2;
1630 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1631 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1631 PyErr_SetString(PyExc_MemoryError,
1632 PyErr_SetString(PyExc_MemoryError,
1632 "overflow in nt_new");
1633 "overflow in nt_new");
1633 return -1;
1634 return -1;
1634 }
1635 }
1635 newnodes =
1636 newnodes =
1636 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1637 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1637 if (newnodes == NULL) {
1638 if (newnodes == NULL) {
1638 PyErr_SetString(PyExc_MemoryError, "out of memory");
1639 PyErr_SetString(PyExc_MemoryError, "out of memory");
1639 return -1;
1640 return -1;
1640 }
1641 }
1641 self->capacity = newcapacity;
1642 self->capacity = newcapacity;
1642 self->nodes = newnodes;
1643 self->nodes = newnodes;
1643 memset(&self->nodes[self->length], 0,
1644 memset(&self->nodes[self->length], 0,
1644 sizeof(nodetreenode) * (self->capacity - self->length));
1645 sizeof(nodetreenode) * (self->capacity - self->length));
1645 }
1646 }
1646 return self->length++;
1647 return self->length++;
1647 }
1648 }
1648
1649
1649 static int nt_insert(nodetree *self, const char *node, int rev)
1650 static int nt_insert(nodetree *self, const char *node, int rev)
1650 {
1651 {
1651 int level = 0;
1652 int level = 0;
1652 int off = 0;
1653 int off = 0;
1653
1654
1654 while (level < 2 * self->nodelen) {
1655 while (level < 2 * self->nodelen) {
1655 int k = nt_level(node, level);
1656 int k = nt_level(node, level);
1656 nodetreenode *n;
1657 nodetreenode *n;
1657 int v;
1658 int v;
1658
1659
1659 n = &self->nodes[off];
1660 n = &self->nodes[off];
1660 v = n->children[k];
1661 v = n->children[k];
1661
1662
1662 if (v == 0) {
1663 if (v == 0) {
1663 n->children[k] = -rev - 2;
1664 n->children[k] = -rev - 2;
1664 return 0;
1665 return 0;
1665 }
1666 }
1666 if (v < 0) {
1667 if (v < 0) {
1667 const char *oldnode =
1668 const char *oldnode =
1668 index_node_existing(self->index, -(v + 2));
1669 index_node_existing(self->index, -(v + 2));
1669 int noff;
1670 int noff;
1670
1671
1671 if (oldnode == NULL)
1672 if (oldnode == NULL)
1672 return -1;
1673 return -1;
1673 if (!memcmp(oldnode, node, self->nodelen)) {
1674 if (!memcmp(oldnode, node, self->nodelen)) {
1674 n->children[k] = -rev - 2;
1675 n->children[k] = -rev - 2;
1675 return 0;
1676 return 0;
1676 }
1677 }
1677 noff = nt_new(self);
1678 noff = nt_new(self);
1678 if (noff == -1)
1679 if (noff == -1)
1679 return -1;
1680 return -1;
1680 /* self->nodes may have been changed by realloc */
1681 /* self->nodes may have been changed by realloc */
1681 self->nodes[off].children[k] = noff;
1682 self->nodes[off].children[k] = noff;
1682 off = noff;
1683 off = noff;
1683 n = &self->nodes[off];
1684 n = &self->nodes[off];
1684 n->children[nt_level(oldnode, ++level)] = v;
1685 n->children[nt_level(oldnode, ++level)] = v;
1685 if (level > self->depth)
1686 if (level > self->depth)
1686 self->depth = level;
1687 self->depth = level;
1687 self->splits += 1;
1688 self->splits += 1;
1688 } else {
1689 } else {
1689 level += 1;
1690 level += 1;
1690 off = v;
1691 off = v;
1691 }
1692 }
1692 }
1693 }
1693
1694
1694 return -1;
1695 return -1;
1695 }
1696 }
1696
1697
1697 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1698 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1698 {
1699 {
1699 Py_ssize_t rev;
1700 Py_ssize_t rev;
1700 const char *node;
1701 const char *node;
1701 Py_ssize_t length;
1702 Py_ssize_t length;
1702 if (!PyArg_ParseTuple(args, "n", &rev))
1703 if (!PyArg_ParseTuple(args, "n", &rev))
1703 return NULL;
1704 return NULL;
1704 length = index_length(self->nt.index);
1705 length = index_length(self->nt.index);
1705 if (rev < 0 || rev >= length) {
1706 if (rev < 0 || rev >= length) {
1706 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1707 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1707 return NULL;
1708 return NULL;
1708 }
1709 }
1709 node = index_node_existing(self->nt.index, rev);
1710 node = index_node_existing(self->nt.index, rev);
1710 if (nt_insert(&self->nt, node, (int)rev) == -1)
1711 if (nt_insert(&self->nt, node, (int)rev) == -1)
1711 return NULL;
1712 return NULL;
1712 Py_RETURN_NONE;
1713 Py_RETURN_NONE;
1713 }
1714 }
1714
1715
1715 static int nt_delete_node(nodetree *self, const char *node)
1716 static int nt_delete_node(nodetree *self, const char *node)
1716 {
1717 {
1717 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1718 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1718 */
1719 */
1719 return nt_insert(self, node, -2);
1720 return nt_insert(self, node, -2);
1720 }
1721 }
1721
1722
1722 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1723 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1723 {
1724 {
1724 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1725 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1725 self->nodes = NULL;
1726 self->nodes = NULL;
1726
1727
1727 self->index = index;
1728 self->index = index;
1728 /* The input capacity is in terms of revisions, while the field is in
1729 /* The input capacity is in terms of revisions, while the field is in
1729 * terms of nodetree nodes. */
1730 * terms of nodetree nodes. */
1730 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1731 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1731 self->nodelen = index->nodelen;
1732 self->nodelen = index->nodelen;
1732 self->depth = 0;
1733 self->depth = 0;
1733 self->splits = 0;
1734 self->splits = 0;
1734 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1735 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1735 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1736 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1736 return -1;
1737 return -1;
1737 }
1738 }
1738 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1739 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1739 if (self->nodes == NULL) {
1740 if (self->nodes == NULL) {
1740 PyErr_NoMemory();
1741 PyErr_NoMemory();
1741 return -1;
1742 return -1;
1742 }
1743 }
1743 self->length = 1;
1744 self->length = 1;
1744 return 0;
1745 return 0;
1745 }
1746 }
1746
1747
1747 static int ntobj_init(nodetreeObject *self, PyObject *args)
1748 static int ntobj_init(nodetreeObject *self, PyObject *args)
1748 {
1749 {
1749 PyObject *index;
1750 PyObject *index;
1750 unsigned capacity;
1751 unsigned capacity;
1751 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1752 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1752 &capacity))
1753 &capacity))
1753 return -1;
1754 return -1;
1754 Py_INCREF(index);
1755 Py_INCREF(index);
1755 return nt_init(&self->nt, (indexObject *)index, capacity);
1756 return nt_init(&self->nt, (indexObject *)index, capacity);
1756 }
1757 }
1757
1758
1758 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1759 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1759 {
1760 {
1760 return nt_find(self, node, nodelen, 1);
1761 return nt_find(self, node, nodelen, 1);
1761 }
1762 }
1762
1763
1763 /*
1764 /*
1764 * Find the length of the shortest unique prefix of node.
1765 * Find the length of the shortest unique prefix of node.
1765 *
1766 *
1766 * Return values:
1767 * Return values:
1767 *
1768 *
1768 * -3: error (exception set)
1769 * -3: error (exception set)
1769 * -2: not found (no exception set)
1770 * -2: not found (no exception set)
1770 * rest: length of shortest prefix
1771 * rest: length of shortest prefix
1771 */
1772 */
1772 static int nt_shortest(nodetree *self, const char *node)
1773 static int nt_shortest(nodetree *self, const char *node)
1773 {
1774 {
1774 int level, off;
1775 int level, off;
1775
1776
1776 for (level = off = 0; level < 2 * self->nodelen; level++) {
1777 for (level = off = 0; level < 2 * self->nodelen; level++) {
1777 int k, v;
1778 int k, v;
1778 nodetreenode *n = &self->nodes[off];
1779 nodetreenode *n = &self->nodes[off];
1779 k = nt_level(node, level);
1780 k = nt_level(node, level);
1780 v = n->children[k];
1781 v = n->children[k];
1781 if (v < 0) {
1782 if (v < 0) {
1782 const char *n;
1783 const char *n;
1783 v = -(v + 2);
1784 v = -(v + 2);
1784 n = index_node_existing(self->index, v);
1785 n = index_node_existing(self->index, v);
1785 if (n == NULL)
1786 if (n == NULL)
1786 return -3;
1787 return -3;
1787 if (memcmp(node, n, self->nodelen) != 0)
1788 if (memcmp(node, n, self->nodelen) != 0)
1788 /*
1789 /*
1789 * Found a unique prefix, but it wasn't for the
1790 * Found a unique prefix, but it wasn't for the
1790 * requested node (i.e the requested node does
1791 * requested node (i.e the requested node does
1791 * not exist).
1792 * not exist).
1792 */
1793 */
1793 return -2;
1794 return -2;
1794 return level + 1;
1795 return level + 1;
1795 }
1796 }
1796 if (v == 0)
1797 if (v == 0)
1797 return -2;
1798 return -2;
1798 off = v;
1799 off = v;
1799 }
1800 }
1800 /*
1801 /*
1801 * The node was still not unique after 40 hex digits, so this won't
1802 * The node was still not unique after 40 hex digits, so this won't
1802 * happen. Also, if we get here, then there's a programming error in
1803 * happen. Also, if we get here, then there's a programming error in
1803 * this file that made us insert a node longer than 40 hex digits.
1804 * this file that made us insert a node longer than 40 hex digits.
1804 */
1805 */
1805 PyErr_SetString(PyExc_Exception, "broken node tree");
1806 PyErr_SetString(PyExc_Exception, "broken node tree");
1806 return -3;
1807 return -3;
1807 }
1808 }
1808
1809
1809 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1810 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1810 {
1811 {
1811 PyObject *val;
1812 PyObject *val;
1812 char *node;
1813 char *node;
1813 int length;
1814 int length;
1814
1815
1815 if (!PyArg_ParseTuple(args, "O", &val))
1816 if (!PyArg_ParseTuple(args, "O", &val))
1816 return NULL;
1817 return NULL;
1817 if (node_check(self->nt.nodelen, val, &node) == -1)
1818 if (node_check(self->nt.nodelen, val, &node) == -1)
1818 return NULL;
1819 return NULL;
1819
1820
1820 length = nt_shortest(&self->nt, node);
1821 length = nt_shortest(&self->nt, node);
1821 if (length == -3)
1822 if (length == -3)
1822 return NULL;
1823 return NULL;
1823 if (length == -2) {
1824 if (length == -2) {
1824 raise_revlog_error();
1825 raise_revlog_error();
1825 return NULL;
1826 return NULL;
1826 }
1827 }
1827 return PyInt_FromLong(length);
1828 return PyInt_FromLong(length);
1828 }
1829 }
1829
1830
1830 static void nt_dealloc(nodetree *self)
1831 static void nt_dealloc(nodetree *self)
1831 {
1832 {
1832 free(self->nodes);
1833 free(self->nodes);
1833 self->nodes = NULL;
1834 self->nodes = NULL;
1834 }
1835 }
1835
1836
1836 static void ntobj_dealloc(nodetreeObject *self)
1837 static void ntobj_dealloc(nodetreeObject *self)
1837 {
1838 {
1838 Py_XDECREF(self->nt.index);
1839 Py_XDECREF(self->nt.index);
1839 nt_dealloc(&self->nt);
1840 nt_dealloc(&self->nt);
1840 PyObject_Del(self);
1841 PyObject_Del(self);
1841 }
1842 }
1842
1843
1843 static PyMethodDef ntobj_methods[] = {
1844 static PyMethodDef ntobj_methods[] = {
1844 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1845 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1845 "insert an index entry"},
1846 "insert an index entry"},
1846 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1847 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1847 "find length of shortest hex nodeid of a binary ID"},
1848 "find length of shortest hex nodeid of a binary ID"},
1848 {NULL} /* Sentinel */
1849 {NULL} /* Sentinel */
1849 };
1850 };
1850
1851
1851 static PyTypeObject nodetreeType = {
1852 static PyTypeObject nodetreeType = {
1852 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1853 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1853 "parsers.nodetree", /* tp_name */
1854 "parsers.nodetree", /* tp_name */
1854 sizeof(nodetreeObject), /* tp_basicsize */
1855 sizeof(nodetreeObject), /* tp_basicsize */
1855 0, /* tp_itemsize */
1856 0, /* tp_itemsize */
1856 (destructor)ntobj_dealloc, /* tp_dealloc */
1857 (destructor)ntobj_dealloc, /* tp_dealloc */
1857 0, /* tp_print */
1858 0, /* tp_print */
1858 0, /* tp_getattr */
1859 0, /* tp_getattr */
1859 0, /* tp_setattr */
1860 0, /* tp_setattr */
1860 0, /* tp_compare */
1861 0, /* tp_compare */
1861 0, /* tp_repr */
1862 0, /* tp_repr */
1862 0, /* tp_as_number */
1863 0, /* tp_as_number */
1863 0, /* tp_as_sequence */
1864 0, /* tp_as_sequence */
1864 0, /* tp_as_mapping */
1865 0, /* tp_as_mapping */
1865 0, /* tp_hash */
1866 0, /* tp_hash */
1866 0, /* tp_call */
1867 0, /* tp_call */
1867 0, /* tp_str */
1868 0, /* tp_str */
1868 0, /* tp_getattro */
1869 0, /* tp_getattro */
1869 0, /* tp_setattro */
1870 0, /* tp_setattro */
1870 0, /* tp_as_buffer */
1871 0, /* tp_as_buffer */
1871 Py_TPFLAGS_DEFAULT, /* tp_flags */
1872 Py_TPFLAGS_DEFAULT, /* tp_flags */
1872 "nodetree", /* tp_doc */
1873 "nodetree", /* tp_doc */
1873 0, /* tp_traverse */
1874 0, /* tp_traverse */
1874 0, /* tp_clear */
1875 0, /* tp_clear */
1875 0, /* tp_richcompare */
1876 0, /* tp_richcompare */
1876 0, /* tp_weaklistoffset */
1877 0, /* tp_weaklistoffset */
1877 0, /* tp_iter */
1878 0, /* tp_iter */
1878 0, /* tp_iternext */
1879 0, /* tp_iternext */
1879 ntobj_methods, /* tp_methods */
1880 ntobj_methods, /* tp_methods */
1880 0, /* tp_members */
1881 0, /* tp_members */
1881 0, /* tp_getset */
1882 0, /* tp_getset */
1882 0, /* tp_base */
1883 0, /* tp_base */
1883 0, /* tp_dict */
1884 0, /* tp_dict */
1884 0, /* tp_descr_get */
1885 0, /* tp_descr_get */
1885 0, /* tp_descr_set */
1886 0, /* tp_descr_set */
1886 0, /* tp_dictoffset */
1887 0, /* tp_dictoffset */
1887 (initproc)ntobj_init, /* tp_init */
1888 (initproc)ntobj_init, /* tp_init */
1888 0, /* tp_alloc */
1889 0, /* tp_alloc */
1889 };
1890 };
1890
1891
1891 static int index_init_nt(indexObject *self)
1892 static int index_init_nt(indexObject *self)
1892 {
1893 {
1893 if (!self->ntinitialized) {
1894 if (!self->ntinitialized) {
1894 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1895 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1895 nt_dealloc(&self->nt);
1896 nt_dealloc(&self->nt);
1896 return -1;
1897 return -1;
1897 }
1898 }
1898 if (nt_insert(&self->nt, nullid, -1) == -1) {
1899 if (nt_insert(&self->nt, nullid, -1) == -1) {
1899 nt_dealloc(&self->nt);
1900 nt_dealloc(&self->nt);
1900 return -1;
1901 return -1;
1901 }
1902 }
1902 self->ntinitialized = 1;
1903 self->ntinitialized = 1;
1903 self->ntrev = (int)index_length(self);
1904 self->ntrev = (int)index_length(self);
1904 self->ntlookups = 1;
1905 self->ntlookups = 1;
1905 self->ntmisses = 0;
1906 self->ntmisses = 0;
1906 }
1907 }
1907 return 0;
1908 return 0;
1908 }
1909 }
1909
1910
1910 /*
1911 /*
1911 * Return values:
1912 * Return values:
1912 *
1913 *
1913 * -3: error (exception set)
1914 * -3: error (exception set)
1914 * -2: not found (no exception set)
1915 * -2: not found (no exception set)
1915 * rest: valid rev
1916 * rest: valid rev
1916 */
1917 */
1917 static int index_find_node(indexObject *self, const char *node)
1918 static int index_find_node(indexObject *self, const char *node)
1918 {
1919 {
1919 int rev;
1920 int rev;
1920
1921
1921 if (index_init_nt(self) == -1)
1922 if (index_init_nt(self) == -1)
1922 return -3;
1923 return -3;
1923
1924
1924 self->ntlookups++;
1925 self->ntlookups++;
1925 rev = nt_find(&self->nt, node, self->nodelen, 0);
1926 rev = nt_find(&self->nt, node, self->nodelen, 0);
1926 if (rev >= -1)
1927 if (rev >= -1)
1927 return rev;
1928 return rev;
1928
1929
1929 /*
1930 /*
1930 * For the first handful of lookups, we scan the entire index,
1931 * For the first handful of lookups, we scan the entire index,
1931 * and cache only the matching nodes. This optimizes for cases
1932 * and cache only the matching nodes. This optimizes for cases
1932 * like "hg tip", where only a few nodes are accessed.
1933 * like "hg tip", where only a few nodes are accessed.
1933 *
1934 *
1934 * After that, we cache every node we visit, using a single
1935 * After that, we cache every node we visit, using a single
1935 * scan amortized over multiple lookups. This gives the best
1936 * scan amortized over multiple lookups. This gives the best
1936 * bulk performance, e.g. for "hg log".
1937 * bulk performance, e.g. for "hg log".
1937 */
1938 */
1938 if (self->ntmisses++ < 4) {
1939 if (self->ntmisses++ < 4) {
1939 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1940 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1940 const char *n = index_node_existing(self, rev);
1941 const char *n = index_node_existing(self, rev);
1941 if (n == NULL)
1942 if (n == NULL)
1942 return -3;
1943 return -3;
1943 if (memcmp(node, n, self->nodelen) == 0) {
1944 if (memcmp(node, n, self->nodelen) == 0) {
1944 if (nt_insert(&self->nt, n, rev) == -1)
1945 if (nt_insert(&self->nt, n, rev) == -1)
1945 return -3;
1946 return -3;
1946 break;
1947 break;
1947 }
1948 }
1948 }
1949 }
1949 } else {
1950 } else {
1950 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1951 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1951 const char *n = index_node_existing(self, rev);
1952 const char *n = index_node_existing(self, rev);
1952 if (n == NULL)
1953 if (n == NULL)
1953 return -3;
1954 return -3;
1954 if (nt_insert(&self->nt, n, rev) == -1) {
1955 if (nt_insert(&self->nt, n, rev) == -1) {
1955 self->ntrev = rev + 1;
1956 self->ntrev = rev + 1;
1956 return -3;
1957 return -3;
1957 }
1958 }
1958 if (memcmp(node, n, self->nodelen) == 0) {
1959 if (memcmp(node, n, self->nodelen) == 0) {
1959 break;
1960 break;
1960 }
1961 }
1961 }
1962 }
1962 self->ntrev = rev;
1963 self->ntrev = rev;
1963 }
1964 }
1964
1965
1965 if (rev >= 0)
1966 if (rev >= 0)
1966 return rev;
1967 return rev;
1967 return -2;
1968 return -2;
1968 }
1969 }
1969
1970
1970 static PyObject *index_getitem(indexObject *self, PyObject *value)
1971 static PyObject *index_getitem(indexObject *self, PyObject *value)
1971 {
1972 {
1972 char *node;
1973 char *node;
1973 int rev;
1974 int rev;
1974
1975
1975 if (PyInt_Check(value)) {
1976 if (PyInt_Check(value)) {
1976 long idx;
1977 long idx;
1977 if (!pylong_to_long(value, &idx)) {
1978 if (!pylong_to_long(value, &idx)) {
1978 return NULL;
1979 return NULL;
1979 }
1980 }
1980 return index_get(self, idx);
1981 return index_get(self, idx);
1981 }
1982 }
1982
1983
1983 if (node_check(self->nodelen, value, &node) == -1)
1984 if (node_check(self->nodelen, value, &node) == -1)
1984 return NULL;
1985 return NULL;
1985 rev = index_find_node(self, node);
1986 rev = index_find_node(self, node);
1986 if (rev >= -1)
1987 if (rev >= -1)
1987 return PyInt_FromLong(rev);
1988 return PyInt_FromLong(rev);
1988 if (rev == -2)
1989 if (rev == -2)
1989 raise_revlog_error();
1990 raise_revlog_error();
1990 return NULL;
1991 return NULL;
1991 }
1992 }
1992
1993
1993 /*
1994 /*
1994 * Fully populate the radix tree.
1995 * Fully populate the radix tree.
1995 */
1996 */
1996 static int index_populate_nt(indexObject *self)
1997 static int index_populate_nt(indexObject *self)
1997 {
1998 {
1998 int rev;
1999 int rev;
1999 if (self->ntrev > 0) {
2000 if (self->ntrev > 0) {
2000 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2001 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2001 const char *n = index_node_existing(self, rev);
2002 const char *n = index_node_existing(self, rev);
2002 if (n == NULL)
2003 if (n == NULL)
2003 return -1;
2004 return -1;
2004 if (nt_insert(&self->nt, n, rev) == -1)
2005 if (nt_insert(&self->nt, n, rev) == -1)
2005 return -1;
2006 return -1;
2006 }
2007 }
2007 self->ntrev = -1;
2008 self->ntrev = -1;
2008 }
2009 }
2009 return 0;
2010 return 0;
2010 }
2011 }
2011
2012
2012 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2013 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2013 {
2014 {
2014 const char *fullnode;
2015 const char *fullnode;
2015 Py_ssize_t nodelen;
2016 Py_ssize_t nodelen;
2016 char *node;
2017 char *node;
2017 int rev, i;
2018 int rev, i;
2018
2019
2019 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2020 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2020 return NULL;
2021 return NULL;
2021
2022
2022 if (nodelen < 1) {
2023 if (nodelen < 1) {
2023 PyErr_SetString(PyExc_ValueError, "key too short");
2024 PyErr_SetString(PyExc_ValueError, "key too short");
2024 return NULL;
2025 return NULL;
2025 }
2026 }
2026
2027
2027 if (nodelen > 2 * self->nodelen) {
2028 if (nodelen > 2 * self->nodelen) {
2028 PyErr_SetString(PyExc_ValueError, "key too long");
2029 PyErr_SetString(PyExc_ValueError, "key too long");
2029 return NULL;
2030 return NULL;
2030 }
2031 }
2031
2032
2032 for (i = 0; i < nodelen; i++)
2033 for (i = 0; i < nodelen; i++)
2033 hexdigit(node, i);
2034 hexdigit(node, i);
2034 if (PyErr_Occurred()) {
2035 if (PyErr_Occurred()) {
2035 /* input contains non-hex characters */
2036 /* input contains non-hex characters */
2036 PyErr_Clear();
2037 PyErr_Clear();
2037 Py_RETURN_NONE;
2038 Py_RETURN_NONE;
2038 }
2039 }
2039
2040
2040 if (index_init_nt(self) == -1)
2041 if (index_init_nt(self) == -1)
2041 return NULL;
2042 return NULL;
2042 if (index_populate_nt(self) == -1)
2043 if (index_populate_nt(self) == -1)
2043 return NULL;
2044 return NULL;
2044 rev = nt_partialmatch(&self->nt, node, nodelen);
2045 rev = nt_partialmatch(&self->nt, node, nodelen);
2045
2046
2046 switch (rev) {
2047 switch (rev) {
2047 case -4:
2048 case -4:
2048 raise_revlog_error();
2049 raise_revlog_error();
2049 return NULL;
2050 return NULL;
2050 case -2:
2051 case -2:
2051 Py_RETURN_NONE;
2052 Py_RETURN_NONE;
2052 case -1:
2053 case -1:
2053 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2054 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2054 }
2055 }
2055
2056
2056 fullnode = index_node_existing(self, rev);
2057 fullnode = index_node_existing(self, rev);
2057 if (fullnode == NULL) {
2058 if (fullnode == NULL) {
2058 return NULL;
2059 return NULL;
2059 }
2060 }
2060 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2061 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2061 }
2062 }
2062
2063
2063 static PyObject *index_shortest(indexObject *self, PyObject *args)
2064 static PyObject *index_shortest(indexObject *self, PyObject *args)
2064 {
2065 {
2065 PyObject *val;
2066 PyObject *val;
2066 char *node;
2067 char *node;
2067 int length;
2068 int length;
2068
2069
2069 if (!PyArg_ParseTuple(args, "O", &val))
2070 if (!PyArg_ParseTuple(args, "O", &val))
2070 return NULL;
2071 return NULL;
2071 if (node_check(self->nodelen, val, &node) == -1)
2072 if (node_check(self->nodelen, val, &node) == -1)
2072 return NULL;
2073 return NULL;
2073
2074
2074 self->ntlookups++;
2075 self->ntlookups++;
2075 if (index_init_nt(self) == -1)
2076 if (index_init_nt(self) == -1)
2076 return NULL;
2077 return NULL;
2077 if (index_populate_nt(self) == -1)
2078 if (index_populate_nt(self) == -1)
2078 return NULL;
2079 return NULL;
2079 length = nt_shortest(&self->nt, node);
2080 length = nt_shortest(&self->nt, node);
2080 if (length == -3)
2081 if (length == -3)
2081 return NULL;
2082 return NULL;
2082 if (length == -2) {
2083 if (length == -2) {
2083 raise_revlog_error();
2084 raise_revlog_error();
2084 return NULL;
2085 return NULL;
2085 }
2086 }
2086 return PyInt_FromLong(length);
2087 return PyInt_FromLong(length);
2087 }
2088 }
2088
2089
2089 static PyObject *index_m_get(indexObject *self, PyObject *args)
2090 static PyObject *index_m_get(indexObject *self, PyObject *args)
2090 {
2091 {
2091 PyObject *val;
2092 PyObject *val;
2092 char *node;
2093 char *node;
2093 int rev;
2094 int rev;
2094
2095
2095 if (!PyArg_ParseTuple(args, "O", &val))
2096 if (!PyArg_ParseTuple(args, "O", &val))
2096 return NULL;
2097 return NULL;
2097 if (node_check(self->nodelen, val, &node) == -1)
2098 if (node_check(self->nodelen, val, &node) == -1)
2098 return NULL;
2099 return NULL;
2099 rev = index_find_node(self, node);
2100 rev = index_find_node(self, node);
2100 if (rev == -3)
2101 if (rev == -3)
2101 return NULL;
2102 return NULL;
2102 if (rev == -2)
2103 if (rev == -2)
2103 Py_RETURN_NONE;
2104 Py_RETURN_NONE;
2104 return PyInt_FromLong(rev);
2105 return PyInt_FromLong(rev);
2105 }
2106 }
2106
2107
2107 static int index_contains(indexObject *self, PyObject *value)
2108 static int index_contains(indexObject *self, PyObject *value)
2108 {
2109 {
2109 char *node;
2110 char *node;
2110
2111
2111 if (PyInt_Check(value)) {
2112 if (PyInt_Check(value)) {
2112 long rev;
2113 long rev;
2113 if (!pylong_to_long(value, &rev)) {
2114 if (!pylong_to_long(value, &rev)) {
2114 return -1;
2115 return -1;
2115 }
2116 }
2116 return rev >= -1 && rev < index_length(self);
2117 return rev >= -1 && rev < index_length(self);
2117 }
2118 }
2118
2119
2119 if (node_check(self->nodelen, value, &node) == -1)
2120 if (node_check(self->nodelen, value, &node) == -1)
2120 return -1;
2121 return -1;
2121
2122
2122 switch (index_find_node(self, node)) {
2123 switch (index_find_node(self, node)) {
2123 case -3:
2124 case -3:
2124 return -1;
2125 return -1;
2125 case -2:
2126 case -2:
2126 return 0;
2127 return 0;
2127 default:
2128 default:
2128 return 1;
2129 return 1;
2129 }
2130 }
2130 }
2131 }
2131
2132
2132 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2133 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2133 {
2134 {
2134 int ret = index_contains(self, args);
2135 int ret = index_contains(self, args);
2135 if (ret < 0)
2136 if (ret < 0)
2136 return NULL;
2137 return NULL;
2137 return PyBool_FromLong((long)ret);
2138 return PyBool_FromLong((long)ret);
2138 }
2139 }
2139
2140
2140 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2141 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2141 {
2142 {
2142 char *node;
2143 char *node;
2143 int rev;
2144 int rev;
2144
2145
2145 if (node_check(self->nodelen, val, &node) == -1)
2146 if (node_check(self->nodelen, val, &node) == -1)
2146 return NULL;
2147 return NULL;
2147 rev = index_find_node(self, node);
2148 rev = index_find_node(self, node);
2148 if (rev >= -1)
2149 if (rev >= -1)
2149 return PyInt_FromLong(rev);
2150 return PyInt_FromLong(rev);
2150 if (rev == -2)
2151 if (rev == -2)
2151 raise_revlog_error();
2152 raise_revlog_error();
2152 return NULL;
2153 return NULL;
2153 }
2154 }
2154
2155
2155 typedef uint64_t bitmask;
2156 typedef uint64_t bitmask;
2156
2157
2157 /*
2158 /*
2158 * Given a disjoint set of revs, return all candidates for the
2159 * Given a disjoint set of revs, return all candidates for the
2159 * greatest common ancestor. In revset notation, this is the set
2160 * greatest common ancestor. In revset notation, this is the set
2160 * "heads(::a and ::b and ...)"
2161 * "heads(::a and ::b and ...)"
2161 */
2162 */
2162 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2163 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2163 int revcount)
2164 int revcount)
2164 {
2165 {
2165 const bitmask allseen = (1ull << revcount) - 1;
2166 const bitmask allseen = (1ull << revcount) - 1;
2166 const bitmask poison = 1ull << revcount;
2167 const bitmask poison = 1ull << revcount;
2167 PyObject *gca = PyList_New(0);
2168 PyObject *gca = PyList_New(0);
2168 int i, v, interesting;
2169 int i, v, interesting;
2169 int maxrev = -1;
2170 int maxrev = -1;
2170 bitmask sp;
2171 bitmask sp;
2171 bitmask *seen;
2172 bitmask *seen;
2172
2173
2173 if (gca == NULL)
2174 if (gca == NULL)
2174 return PyErr_NoMemory();
2175 return PyErr_NoMemory();
2175
2176
2176 for (i = 0; i < revcount; i++) {
2177 for (i = 0; i < revcount; i++) {
2177 if (revs[i] > maxrev)
2178 if (revs[i] > maxrev)
2178 maxrev = revs[i];
2179 maxrev = revs[i];
2179 }
2180 }
2180
2181
2181 seen = calloc(sizeof(*seen), maxrev + 1);
2182 seen = calloc(sizeof(*seen), maxrev + 1);
2182 if (seen == NULL) {
2183 if (seen == NULL) {
2183 Py_DECREF(gca);
2184 Py_DECREF(gca);
2184 return PyErr_NoMemory();
2185 return PyErr_NoMemory();
2185 }
2186 }
2186
2187
2187 for (i = 0; i < revcount; i++)
2188 for (i = 0; i < revcount; i++)
2188 seen[revs[i]] = 1ull << i;
2189 seen[revs[i]] = 1ull << i;
2189
2190
2190 interesting = revcount;
2191 interesting = revcount;
2191
2192
2192 for (v = maxrev; v >= 0 && interesting; v--) {
2193 for (v = maxrev; v >= 0 && interesting; v--) {
2193 bitmask sv = seen[v];
2194 bitmask sv = seen[v];
2194 int parents[2];
2195 int parents[2];
2195
2196
2196 if (!sv)
2197 if (!sv)
2197 continue;
2198 continue;
2198
2199
2199 if (sv < poison) {
2200 if (sv < poison) {
2200 interesting -= 1;
2201 interesting -= 1;
2201 if (sv == allseen) {
2202 if (sv == allseen) {
2202 PyObject *obj = PyInt_FromLong(v);
2203 PyObject *obj = PyInt_FromLong(v);
2203 if (obj == NULL)
2204 if (obj == NULL)
2204 goto bail;
2205 goto bail;
2205 if (PyList_Append(gca, obj) == -1) {
2206 if (PyList_Append(gca, obj) == -1) {
2206 Py_DECREF(obj);
2207 Py_DECREF(obj);
2207 goto bail;
2208 goto bail;
2208 }
2209 }
2209 sv |= poison;
2210 sv |= poison;
2210 for (i = 0; i < revcount; i++) {
2211 for (i = 0; i < revcount; i++) {
2211 if (revs[i] == v)
2212 if (revs[i] == v)
2212 goto done;
2213 goto done;
2213 }
2214 }
2214 }
2215 }
2215 }
2216 }
2216 if (index_get_parents(self, v, parents, maxrev) < 0)
2217 if (index_get_parents(self, v, parents, maxrev) < 0)
2217 goto bail;
2218 goto bail;
2218
2219
2219 for (i = 0; i < 2; i++) {
2220 for (i = 0; i < 2; i++) {
2220 int p = parents[i];
2221 int p = parents[i];
2221 if (p == -1)
2222 if (p == -1)
2222 continue;
2223 continue;
2223 sp = seen[p];
2224 sp = seen[p];
2224 if (sv < poison) {
2225 if (sv < poison) {
2225 if (sp == 0) {
2226 if (sp == 0) {
2226 seen[p] = sv;
2227 seen[p] = sv;
2227 interesting++;
2228 interesting++;
2228 } else if (sp != sv)
2229 } else if (sp != sv)
2229 seen[p] |= sv;
2230 seen[p] |= sv;
2230 } else {
2231 } else {
2231 if (sp && sp < poison)
2232 if (sp && sp < poison)
2232 interesting--;
2233 interesting--;
2233 seen[p] = sv;
2234 seen[p] = sv;
2234 }
2235 }
2235 }
2236 }
2236 }
2237 }
2237
2238
2238 done:
2239 done:
2239 free(seen);
2240 free(seen);
2240 return gca;
2241 return gca;
2241 bail:
2242 bail:
2242 free(seen);
2243 free(seen);
2243 Py_XDECREF(gca);
2244 Py_XDECREF(gca);
2244 return NULL;
2245 return NULL;
2245 }
2246 }
2246
2247
2247 /*
2248 /*
2248 * Given a disjoint set of revs, return the subset with the longest
2249 * Given a disjoint set of revs, return the subset with the longest
2249 * path to the root.
2250 * path to the root.
2250 */
2251 */
2251 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2252 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2252 {
2253 {
2253 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2254 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2254 static const Py_ssize_t capacity = 24;
2255 static const Py_ssize_t capacity = 24;
2255 int *depth, *interesting = NULL;
2256 int *depth, *interesting = NULL;
2256 int i, j, v, ninteresting;
2257 int i, j, v, ninteresting;
2257 PyObject *dict = NULL, *keys = NULL;
2258 PyObject *dict = NULL, *keys = NULL;
2258 long *seen = NULL;
2259 long *seen = NULL;
2259 int maxrev = -1;
2260 int maxrev = -1;
2260 long final;
2261 long final;
2261
2262
2262 if (revcount > capacity) {
2263 if (revcount > capacity) {
2263 PyErr_Format(PyExc_OverflowError,
2264 PyErr_Format(PyExc_OverflowError,
2264 "bitset size (%ld) > capacity (%ld)",
2265 "bitset size (%ld) > capacity (%ld)",
2265 (long)revcount, (long)capacity);
2266 (long)revcount, (long)capacity);
2266 return NULL;
2267 return NULL;
2267 }
2268 }
2268
2269
2269 for (i = 0; i < revcount; i++) {
2270 for (i = 0; i < revcount; i++) {
2270 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2271 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2271 if (n > maxrev)
2272 if (n > maxrev)
2272 maxrev = n;
2273 maxrev = n;
2273 }
2274 }
2274
2275
2275 depth = calloc(sizeof(*depth), maxrev + 1);
2276 depth = calloc(sizeof(*depth), maxrev + 1);
2276 if (depth == NULL)
2277 if (depth == NULL)
2277 return PyErr_NoMemory();
2278 return PyErr_NoMemory();
2278
2279
2279 seen = calloc(sizeof(*seen), maxrev + 1);
2280 seen = calloc(sizeof(*seen), maxrev + 1);
2280 if (seen == NULL) {
2281 if (seen == NULL) {
2281 PyErr_NoMemory();
2282 PyErr_NoMemory();
2282 goto bail;
2283 goto bail;
2283 }
2284 }
2284
2285
2285 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2286 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2286 if (interesting == NULL) {
2287 if (interesting == NULL) {
2287 PyErr_NoMemory();
2288 PyErr_NoMemory();
2288 goto bail;
2289 goto bail;
2289 }
2290 }
2290
2291
2291 if (PyList_Sort(revs) == -1)
2292 if (PyList_Sort(revs) == -1)
2292 goto bail;
2293 goto bail;
2293
2294
2294 for (i = 0; i < revcount; i++) {
2295 for (i = 0; i < revcount; i++) {
2295 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2296 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2296 long b = 1l << i;
2297 long b = 1l << i;
2297 depth[n] = 1;
2298 depth[n] = 1;
2298 seen[n] = b;
2299 seen[n] = b;
2299 interesting[b] = 1;
2300 interesting[b] = 1;
2300 }
2301 }
2301
2302
2302 /* invariant: ninteresting is the number of non-zero entries in
2303 /* invariant: ninteresting is the number of non-zero entries in
2303 * interesting. */
2304 * interesting. */
2304 ninteresting = (int)revcount;
2305 ninteresting = (int)revcount;
2305
2306
2306 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2307 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2307 int dv = depth[v];
2308 int dv = depth[v];
2308 int parents[2];
2309 int parents[2];
2309 long sv;
2310 long sv;
2310
2311
2311 if (dv == 0)
2312 if (dv == 0)
2312 continue;
2313 continue;
2313
2314
2314 sv = seen[v];
2315 sv = seen[v];
2315 if (index_get_parents(self, v, parents, maxrev) < 0)
2316 if (index_get_parents(self, v, parents, maxrev) < 0)
2316 goto bail;
2317 goto bail;
2317
2318
2318 for (i = 0; i < 2; i++) {
2319 for (i = 0; i < 2; i++) {
2319 int p = parents[i];
2320 int p = parents[i];
2320 long sp;
2321 long sp;
2321 int dp;
2322 int dp;
2322
2323
2323 if (p == -1)
2324 if (p == -1)
2324 continue;
2325 continue;
2325
2326
2326 dp = depth[p];
2327 dp = depth[p];
2327 sp = seen[p];
2328 sp = seen[p];
2328 if (dp <= dv) {
2329 if (dp <= dv) {
2329 depth[p] = dv + 1;
2330 depth[p] = dv + 1;
2330 if (sp != sv) {
2331 if (sp != sv) {
2331 interesting[sv] += 1;
2332 interesting[sv] += 1;
2332 seen[p] = sv;
2333 seen[p] = sv;
2333 if (sp) {
2334 if (sp) {
2334 interesting[sp] -= 1;
2335 interesting[sp] -= 1;
2335 if (interesting[sp] == 0)
2336 if (interesting[sp] == 0)
2336 ninteresting -= 1;
2337 ninteresting -= 1;
2337 }
2338 }
2338 }
2339 }
2339 } else if (dv == dp - 1) {
2340 } else if (dv == dp - 1) {
2340 long nsp = sp | sv;
2341 long nsp = sp | sv;
2341 if (nsp == sp)
2342 if (nsp == sp)
2342 continue;
2343 continue;
2343 seen[p] = nsp;
2344 seen[p] = nsp;
2344 interesting[sp] -= 1;
2345 interesting[sp] -= 1;
2345 if (interesting[sp] == 0)
2346 if (interesting[sp] == 0)
2346 ninteresting -= 1;
2347 ninteresting -= 1;
2347 if (interesting[nsp] == 0)
2348 if (interesting[nsp] == 0)
2348 ninteresting += 1;
2349 ninteresting += 1;
2349 interesting[nsp] += 1;
2350 interesting[nsp] += 1;
2350 }
2351 }
2351 }
2352 }
2352 interesting[sv] -= 1;
2353 interesting[sv] -= 1;
2353 if (interesting[sv] == 0)
2354 if (interesting[sv] == 0)
2354 ninteresting -= 1;
2355 ninteresting -= 1;
2355 }
2356 }
2356
2357
2357 final = 0;
2358 final = 0;
2358 j = ninteresting;
2359 j = ninteresting;
2359 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2360 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2360 if (interesting[i] == 0)
2361 if (interesting[i] == 0)
2361 continue;
2362 continue;
2362 final |= i;
2363 final |= i;
2363 j -= 1;
2364 j -= 1;
2364 }
2365 }
2365 if (final == 0) {
2366 if (final == 0) {
2366 keys = PyList_New(0);
2367 keys = PyList_New(0);
2367 goto bail;
2368 goto bail;
2368 }
2369 }
2369
2370
2370 dict = PyDict_New();
2371 dict = PyDict_New();
2371 if (dict == NULL)
2372 if (dict == NULL)
2372 goto bail;
2373 goto bail;
2373
2374
2374 for (i = 0; i < revcount; i++) {
2375 for (i = 0; i < revcount; i++) {
2375 PyObject *key;
2376 PyObject *key;
2376
2377
2377 if ((final & (1 << i)) == 0)
2378 if ((final & (1 << i)) == 0)
2378 continue;
2379 continue;
2379
2380
2380 key = PyList_GET_ITEM(revs, i);
2381 key = PyList_GET_ITEM(revs, i);
2381 Py_INCREF(key);
2382 Py_INCREF(key);
2382 Py_INCREF(Py_None);
2383 Py_INCREF(Py_None);
2383 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2384 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2384 Py_DECREF(key);
2385 Py_DECREF(key);
2385 Py_DECREF(Py_None);
2386 Py_DECREF(Py_None);
2386 goto bail;
2387 goto bail;
2387 }
2388 }
2388 }
2389 }
2389
2390
2390 keys = PyDict_Keys(dict);
2391 keys = PyDict_Keys(dict);
2391
2392
2392 bail:
2393 bail:
2393 free(depth);
2394 free(depth);
2394 free(seen);
2395 free(seen);
2395 free(interesting);
2396 free(interesting);
2396 Py_XDECREF(dict);
2397 Py_XDECREF(dict);
2397
2398
2398 return keys;
2399 return keys;
2399 }
2400 }
2400
2401
2401 /*
2402 /*
2402 * Given a (possibly overlapping) set of revs, return all the
2403 * Given a (possibly overlapping) set of revs, return all the
2403 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2404 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2404 */
2405 */
2405 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2406 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2406 {
2407 {
2407 PyObject *ret = NULL;
2408 PyObject *ret = NULL;
2408 Py_ssize_t argcount, i, len;
2409 Py_ssize_t argcount, i, len;
2409 bitmask repeat = 0;
2410 bitmask repeat = 0;
2410 int revcount = 0;
2411 int revcount = 0;
2411 int *revs;
2412 int *revs;
2412
2413
2413 argcount = PySequence_Length(args);
2414 argcount = PySequence_Length(args);
2414 revs = PyMem_Malloc(argcount * sizeof(*revs));
2415 revs = PyMem_Malloc(argcount * sizeof(*revs));
2415 if (argcount > 0 && revs == NULL)
2416 if (argcount > 0 && revs == NULL)
2416 return PyErr_NoMemory();
2417 return PyErr_NoMemory();
2417 len = index_length(self);
2418 len = index_length(self);
2418
2419
2419 for (i = 0; i < argcount; i++) {
2420 for (i = 0; i < argcount; i++) {
2420 static const int capacity = 24;
2421 static const int capacity = 24;
2421 PyObject *obj = PySequence_GetItem(args, i);
2422 PyObject *obj = PySequence_GetItem(args, i);
2422 bitmask x;
2423 bitmask x;
2423 long val;
2424 long val;
2424
2425
2425 if (!PyInt_Check(obj)) {
2426 if (!PyInt_Check(obj)) {
2426 PyErr_SetString(PyExc_TypeError,
2427 PyErr_SetString(PyExc_TypeError,
2427 "arguments must all be ints");
2428 "arguments must all be ints");
2428 Py_DECREF(obj);
2429 Py_DECREF(obj);
2429 goto bail;
2430 goto bail;
2430 }
2431 }
2431 val = PyInt_AsLong(obj);
2432 val = PyInt_AsLong(obj);
2432 Py_DECREF(obj);
2433 Py_DECREF(obj);
2433 if (val == -1) {
2434 if (val == -1) {
2434 ret = PyList_New(0);
2435 ret = PyList_New(0);
2435 goto done;
2436 goto done;
2436 }
2437 }
2437 if (val < 0 || val >= len) {
2438 if (val < 0 || val >= len) {
2438 PyErr_SetString(PyExc_IndexError, "index out of range");
2439 PyErr_SetString(PyExc_IndexError, "index out of range");
2439 goto bail;
2440 goto bail;
2440 }
2441 }
2441 /* this cheesy bloom filter lets us avoid some more
2442 /* this cheesy bloom filter lets us avoid some more
2442 * expensive duplicate checks in the common set-is-disjoint
2443 * expensive duplicate checks in the common set-is-disjoint
2443 * case */
2444 * case */
2444 x = 1ull << (val & 0x3f);
2445 x = 1ull << (val & 0x3f);
2445 if (repeat & x) {
2446 if (repeat & x) {
2446 int k;
2447 int k;
2447 for (k = 0; k < revcount; k++) {
2448 for (k = 0; k < revcount; k++) {
2448 if (val == revs[k])
2449 if (val == revs[k])
2449 goto duplicate;
2450 goto duplicate;
2450 }
2451 }
2451 } else
2452 } else
2452 repeat |= x;
2453 repeat |= x;
2453 if (revcount >= capacity) {
2454 if (revcount >= capacity) {
2454 PyErr_Format(PyExc_OverflowError,
2455 PyErr_Format(PyExc_OverflowError,
2455 "bitset size (%d) > capacity (%d)",
2456 "bitset size (%d) > capacity (%d)",
2456 revcount, capacity);
2457 revcount, capacity);
2457 goto bail;
2458 goto bail;
2458 }
2459 }
2459 revs[revcount++] = (int)val;
2460 revs[revcount++] = (int)val;
2460 duplicate:;
2461 duplicate:;
2461 }
2462 }
2462
2463
2463 if (revcount == 0) {
2464 if (revcount == 0) {
2464 ret = PyList_New(0);
2465 ret = PyList_New(0);
2465 goto done;
2466 goto done;
2466 }
2467 }
2467 if (revcount == 1) {
2468 if (revcount == 1) {
2468 PyObject *obj;
2469 PyObject *obj;
2469 ret = PyList_New(1);
2470 ret = PyList_New(1);
2470 if (ret == NULL)
2471 if (ret == NULL)
2471 goto bail;
2472 goto bail;
2472 obj = PyInt_FromLong(revs[0]);
2473 obj = PyInt_FromLong(revs[0]);
2473 if (obj == NULL)
2474 if (obj == NULL)
2474 goto bail;
2475 goto bail;
2475 PyList_SET_ITEM(ret, 0, obj);
2476 PyList_SET_ITEM(ret, 0, obj);
2476 goto done;
2477 goto done;
2477 }
2478 }
2478
2479
2479 ret = find_gca_candidates(self, revs, revcount);
2480 ret = find_gca_candidates(self, revs, revcount);
2480 if (ret == NULL)
2481 if (ret == NULL)
2481 goto bail;
2482 goto bail;
2482
2483
2483 done:
2484 done:
2484 PyMem_Free(revs);
2485 PyMem_Free(revs);
2485 return ret;
2486 return ret;
2486
2487
2487 bail:
2488 bail:
2488 PyMem_Free(revs);
2489 PyMem_Free(revs);
2489 Py_XDECREF(ret);
2490 Py_XDECREF(ret);
2490 return NULL;
2491 return NULL;
2491 }
2492 }
2492
2493
2493 /*
2494 /*
2494 * Given a (possibly overlapping) set of revs, return the greatest
2495 * Given a (possibly overlapping) set of revs, return the greatest
2495 * common ancestors: those with the longest path to the root.
2496 * common ancestors: those with the longest path to the root.
2496 */
2497 */
2497 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2498 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2498 {
2499 {
2499 PyObject *ret;
2500 PyObject *ret;
2500 PyObject *gca = index_commonancestorsheads(self, args);
2501 PyObject *gca = index_commonancestorsheads(self, args);
2501 if (gca == NULL)
2502 if (gca == NULL)
2502 return NULL;
2503 return NULL;
2503
2504
2504 if (PyList_GET_SIZE(gca) <= 1) {
2505 if (PyList_GET_SIZE(gca) <= 1) {
2505 return gca;
2506 return gca;
2506 }
2507 }
2507
2508
2508 ret = find_deepest(self, gca);
2509 ret = find_deepest(self, gca);
2509 Py_DECREF(gca);
2510 Py_DECREF(gca);
2510 return ret;
2511 return ret;
2511 }
2512 }
2512
2513
2513 /*
2514 /*
2514 * Invalidate any trie entries introduced by added revs.
2515 * Invalidate any trie entries introduced by added revs.
2515 */
2516 */
2516 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2517 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2517 {
2518 {
2518 Py_ssize_t i, len;
2519 Py_ssize_t i, len;
2519
2520
2520 len = self->length + self->new_length;
2521 len = self->length + self->new_length;
2521 i = start - self->length;
2522 i = start - self->length;
2522 if (i < 0)
2523 if (i < 0)
2523 return;
2524 return;
2524
2525
2525 for (i = start; i < len; i++)
2526 for (i = start; i < len; i++)
2526 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2527 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2527
2528
2528 self->new_length = start - self->length;
2529 self->new_length = start - self->length;
2529 }
2530 }
2530
2531
2531 /*
2532 /*
2532 * Delete a numeric range of revs, which must be at the end of the
2533 * Delete a numeric range of revs, which must be at the end of the
2533 * range.
2534 * range.
2534 */
2535 */
2535 static int index_slice_del(indexObject *self, PyObject *item)
2536 static int index_slice_del(indexObject *self, PyObject *item)
2536 {
2537 {
2537 Py_ssize_t start, stop, step, slicelength;
2538 Py_ssize_t start, stop, step, slicelength;
2538 Py_ssize_t length = index_length(self) + 1;
2539 Py_ssize_t length = index_length(self) + 1;
2539 int ret = 0;
2540 int ret = 0;
2540
2541
2541 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2542 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2542 #ifdef IS_PY3K
2543 #ifdef IS_PY3K
2543 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2544 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2544 &slicelength) < 0)
2545 &slicelength) < 0)
2545 #else
2546 #else
2546 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2547 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2547 &step, &slicelength) < 0)
2548 &step, &slicelength) < 0)
2548 #endif
2549 #endif
2549 return -1;
2550 return -1;
2550
2551
2551 if (slicelength <= 0)
2552 if (slicelength <= 0)
2552 return 0;
2553 return 0;
2553
2554
2554 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2555 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2555 stop = start;
2556 stop = start;
2556
2557
2557 if (step < 0) {
2558 if (step < 0) {
2558 stop = start + 1;
2559 stop = start + 1;
2559 start = stop + step * (slicelength - 1) - 1;
2560 start = stop + step * (slicelength - 1) - 1;
2560 step = -step;
2561 step = -step;
2561 }
2562 }
2562
2563
2563 if (step != 1) {
2564 if (step != 1) {
2564 PyErr_SetString(PyExc_ValueError,
2565 PyErr_SetString(PyExc_ValueError,
2565 "revlog index delete requires step size of 1");
2566 "revlog index delete requires step size of 1");
2566 return -1;
2567 return -1;
2567 }
2568 }
2568
2569
2569 if (stop != length - 1) {
2570 if (stop != length - 1) {
2570 PyErr_SetString(PyExc_IndexError,
2571 PyErr_SetString(PyExc_IndexError,
2571 "revlog index deletion indices are invalid");
2572 "revlog index deletion indices are invalid");
2572 return -1;
2573 return -1;
2573 }
2574 }
2574
2575
2575 if (start < self->length) {
2576 if (start < self->length) {
2576 if (self->ntinitialized) {
2577 if (self->ntinitialized) {
2577 Py_ssize_t i;
2578 Py_ssize_t i;
2578
2579
2579 for (i = start; i < self->length; i++) {
2580 for (i = start; i < self->length; i++) {
2580 const char *node = index_node_existing(self, i);
2581 const char *node = index_node_existing(self, i);
2581 if (node == NULL)
2582 if (node == NULL)
2582 return -1;
2583 return -1;
2583
2584
2584 nt_delete_node(&self->nt, node);
2585 nt_delete_node(&self->nt, node);
2585 }
2586 }
2586 if (self->new_length)
2587 if (self->new_length)
2587 index_invalidate_added(self, self->length);
2588 index_invalidate_added(self, self->length);
2588 if (self->ntrev > start)
2589 if (self->ntrev > start)
2589 self->ntrev = (int)start;
2590 self->ntrev = (int)start;
2590 } else if (self->new_length) {
2591 } else if (self->new_length) {
2591 self->new_length = 0;
2592 self->new_length = 0;
2592 }
2593 }
2593
2594
2594 self->length = start;
2595 self->length = start;
2595 goto done;
2596 goto done;
2596 }
2597 }
2597
2598
2598 if (self->ntinitialized) {
2599 if (self->ntinitialized) {
2599 index_invalidate_added(self, start);
2600 index_invalidate_added(self, start);
2600 if (self->ntrev > start)
2601 if (self->ntrev > start)
2601 self->ntrev = (int)start;
2602 self->ntrev = (int)start;
2602 } else {
2603 } else {
2603 self->new_length = start - self->length;
2604 self->new_length = start - self->length;
2604 }
2605 }
2605 done:
2606 done:
2606 Py_CLEAR(self->headrevs);
2607 Py_CLEAR(self->headrevs);
2607 return ret;
2608 return ret;
2608 }
2609 }
2609
2610
2610 /*
2611 /*
2611 * Supported ops:
2612 * Supported ops:
2612 *
2613 *
2613 * slice deletion
2614 * slice deletion
2614 * string assignment (extend node->rev mapping)
2615 * string assignment (extend node->rev mapping)
2615 * string deletion (shrink node->rev mapping)
2616 * string deletion (shrink node->rev mapping)
2616 */
2617 */
2617 static int index_assign_subscript(indexObject *self, PyObject *item,
2618 static int index_assign_subscript(indexObject *self, PyObject *item,
2618 PyObject *value)
2619 PyObject *value)
2619 {
2620 {
2620 char *node;
2621 char *node;
2621 long rev;
2622 long rev;
2622
2623
2623 if (PySlice_Check(item) && value == NULL)
2624 if (PySlice_Check(item) && value == NULL)
2624 return index_slice_del(self, item);
2625 return index_slice_del(self, item);
2625
2626
2626 if (node_check(self->nodelen, item, &node) == -1)
2627 if (node_check(self->nodelen, item, &node) == -1)
2627 return -1;
2628 return -1;
2628
2629
2629 if (value == NULL)
2630 if (value == NULL)
2630 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2631 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2631 : 0;
2632 : 0;
2632 rev = PyInt_AsLong(value);
2633 rev = PyInt_AsLong(value);
2633 if (rev > INT_MAX || rev < 0) {
2634 if (rev > INT_MAX || rev < 0) {
2634 if (!PyErr_Occurred())
2635 if (!PyErr_Occurred())
2635 PyErr_SetString(PyExc_ValueError, "rev out of range");
2636 PyErr_SetString(PyExc_ValueError, "rev out of range");
2636 return -1;
2637 return -1;
2637 }
2638 }
2638
2639
2639 if (index_init_nt(self) == -1)
2640 if (index_init_nt(self) == -1)
2640 return -1;
2641 return -1;
2641 return nt_insert(&self->nt, node, (int)rev);
2642 return nt_insert(&self->nt, node, (int)rev);
2642 }
2643 }
2643
2644
2644 /*
2645 /*
2645 * Find all RevlogNG entries in an index that has inline data. Update
2646 * Find all RevlogNG entries in an index that has inline data. Update
2646 * the optional "offsets" table with those entries.
2647 * the optional "offsets" table with those entries.
2647 */
2648 */
2648 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2649 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2649 {
2650 {
2650 const char *data = (const char *)self->buf.buf;
2651 const char *data = (const char *)self->buf.buf;
2651 Py_ssize_t pos = 0;
2652 Py_ssize_t pos = 0;
2652 Py_ssize_t end = self->buf.len;
2653 Py_ssize_t end = self->buf.len;
2653 long incr = self->hdrsize;
2654 long incr = self->hdrsize;
2654 Py_ssize_t len = 0;
2655 Py_ssize_t len = 0;
2655
2656
2656 while (pos + self->hdrsize <= end && pos >= 0) {
2657 while (pos + self->hdrsize <= end && pos >= 0) {
2657 uint32_t comp_len, sidedata_comp_len = 0;
2658 uint32_t comp_len, sidedata_comp_len = 0;
2658 /* 3rd element of header is length of compressed inline data */
2659 /* 3rd element of header is length of compressed inline data */
2659 comp_len = getbe32(data + pos + 8);
2660 comp_len = getbe32(data + pos + 8);
2660 if (self->hdrsize == v2_hdrsize) {
2661 if (self->hdrsize == v2_hdrsize) {
2661 sidedata_comp_len = getbe32(data + pos + 72);
2662 sidedata_comp_len = getbe32(data + pos + 72);
2662 }
2663 }
2663 incr = self->hdrsize + comp_len + sidedata_comp_len;
2664 incr = self->hdrsize + comp_len + sidedata_comp_len;
2664 if (offsets)
2665 if (offsets)
2665 offsets[len] = data + pos;
2666 offsets[len] = data + pos;
2666 len++;
2667 len++;
2667 pos += incr;
2668 pos += incr;
2668 }
2669 }
2669
2670
2670 if (pos != end) {
2671 if (pos != end) {
2671 if (!PyErr_Occurred())
2672 if (!PyErr_Occurred())
2672 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2673 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2673 return -1;
2674 return -1;
2674 }
2675 }
2675
2676
2676 return len;
2677 return len;
2677 }
2678 }
2678
2679
2679 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2680 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2680 {
2681 {
2681 PyObject *data_obj, *inlined_obj, *revlogv2;
2682 PyObject *data_obj, *inlined_obj, *revlogv2;
2682 Py_ssize_t size;
2683 Py_ssize_t size;
2683
2684
2684 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2685 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2685
2686
2686 /* Initialize before argument-checking to avoid index_dealloc() crash.
2687 /* Initialize before argument-checking to avoid index_dealloc() crash.
2687 */
2688 */
2688 self->added = NULL;
2689 self->added = NULL;
2689 self->new_length = 0;
2690 self->new_length = 0;
2690 self->added_length = 0;
2691 self->added_length = 0;
2691 self->data = NULL;
2692 self->data = NULL;
2692 memset(&self->buf, 0, sizeof(self->buf));
2693 memset(&self->buf, 0, sizeof(self->buf));
2693 self->headrevs = NULL;
2694 self->headrevs = NULL;
2694 self->filteredrevs = Py_None;
2695 self->filteredrevs = Py_None;
2695 Py_INCREF(Py_None);
2696 Py_INCREF(Py_None);
2696 self->ntinitialized = 0;
2697 self->ntinitialized = 0;
2697 self->offsets = NULL;
2698 self->offsets = NULL;
2698 self->nodelen = 20;
2699 self->nodelen = 20;
2699 self->nullentry = NULL;
2700 self->nullentry = NULL;
2700
2701
2701 revlogv2 = NULL;
2702 revlogv2 = NULL;
2702 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2703 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2703 &data_obj, &inlined_obj, &revlogv2))
2704 &data_obj, &inlined_obj, &revlogv2))
2704 return -1;
2705 return -1;
2705 if (!PyObject_CheckBuffer(data_obj)) {
2706 if (!PyObject_CheckBuffer(data_obj)) {
2706 PyErr_SetString(PyExc_TypeError,
2707 PyErr_SetString(PyExc_TypeError,
2707 "data does not support buffer interface");
2708 "data does not support buffer interface");
2708 return -1;
2709 return -1;
2709 }
2710 }
2710 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2711 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2711 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2712 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2712 return -1;
2713 return -1;
2713 }
2714 }
2714
2715
2715 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2716 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2716 self->hdrsize = v2_hdrsize;
2717 self->hdrsize = v2_hdrsize;
2717 } else {
2718 } else {
2718 self->hdrsize = v1_hdrsize;
2719 self->hdrsize = v1_hdrsize;
2719 }
2720 }
2720
2721
2721 if (self->hdrsize == v1_hdrsize) {
2722 if (self->hdrsize == v1_hdrsize) {
2722 self->nullentry =
2723 self->nullentry =
2723 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2724 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2724 -1, -1, -1, nullid, self->nodelen);
2725 -1, -1, -1, nullid, self->nodelen);
2725 } else {
2726 } else {
2726 self->nullentry =
2727 self->nullentry =
2727 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2728 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2728 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2729 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2729 }
2730 }
2730
2731
2731 if (!self->nullentry)
2732 if (!self->nullentry)
2732 return -1;
2733 return -1;
2733 PyObject_GC_UnTrack(self->nullentry);
2734 PyObject_GC_UnTrack(self->nullentry);
2734
2735
2735 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2736 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2736 return -1;
2737 return -1;
2737 size = self->buf.len;
2738 size = self->buf.len;
2738
2739
2739 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2740 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2740 self->data = data_obj;
2741 self->data = data_obj;
2741
2742
2742 self->ntlookups = self->ntmisses = 0;
2743 self->ntlookups = self->ntmisses = 0;
2743 self->ntrev = -1;
2744 self->ntrev = -1;
2744 Py_INCREF(self->data);
2745 Py_INCREF(self->data);
2745
2746
2746 if (self->inlined) {
2747 if (self->inlined) {
2747 Py_ssize_t len = inline_scan(self, NULL);
2748 Py_ssize_t len = inline_scan(self, NULL);
2748 if (len == -1)
2749 if (len == -1)
2749 goto bail;
2750 goto bail;
2750 self->length = len;
2751 self->length = len;
2751 } else {
2752 } else {
2752 if (size % self->hdrsize) {
2753 if (size % self->hdrsize) {
2753 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2754 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2754 goto bail;
2755 goto bail;
2755 }
2756 }
2756 self->length = size / self->hdrsize;
2757 self->length = size / self->hdrsize;
2757 }
2758 }
2758
2759
2759 return 0;
2760 return 0;
2760 bail:
2761 bail:
2761 return -1;
2762 return -1;
2762 }
2763 }
2763
2764
2764 static PyObject *index_nodemap(indexObject *self)
2765 static PyObject *index_nodemap(indexObject *self)
2765 {
2766 {
2766 Py_INCREF(self);
2767 Py_INCREF(self);
2767 return (PyObject *)self;
2768 return (PyObject *)self;
2768 }
2769 }
2769
2770
2770 static void _index_clearcaches(indexObject *self)
2771 static void _index_clearcaches(indexObject *self)
2771 {
2772 {
2772 if (self->offsets) {
2773 if (self->offsets) {
2773 PyMem_Free((void *)self->offsets);
2774 PyMem_Free((void *)self->offsets);
2774 self->offsets = NULL;
2775 self->offsets = NULL;
2775 }
2776 }
2776 if (self->ntinitialized) {
2777 if (self->ntinitialized) {
2777 nt_dealloc(&self->nt);
2778 nt_dealloc(&self->nt);
2778 }
2779 }
2779 self->ntinitialized = 0;
2780 self->ntinitialized = 0;
2780 Py_CLEAR(self->headrevs);
2781 Py_CLEAR(self->headrevs);
2781 }
2782 }
2782
2783
2783 static PyObject *index_clearcaches(indexObject *self)
2784 static PyObject *index_clearcaches(indexObject *self)
2784 {
2785 {
2785 _index_clearcaches(self);
2786 _index_clearcaches(self);
2786 self->ntrev = -1;
2787 self->ntrev = -1;
2787 self->ntlookups = self->ntmisses = 0;
2788 self->ntlookups = self->ntmisses = 0;
2788 Py_RETURN_NONE;
2789 Py_RETURN_NONE;
2789 }
2790 }
2790
2791
2791 static void index_dealloc(indexObject *self)
2792 static void index_dealloc(indexObject *self)
2792 {
2793 {
2793 _index_clearcaches(self);
2794 _index_clearcaches(self);
2794 Py_XDECREF(self->filteredrevs);
2795 Py_XDECREF(self->filteredrevs);
2795 if (self->buf.buf) {
2796 if (self->buf.buf) {
2796 PyBuffer_Release(&self->buf);
2797 PyBuffer_Release(&self->buf);
2797 memset(&self->buf, 0, sizeof(self->buf));
2798 memset(&self->buf, 0, sizeof(self->buf));
2798 }
2799 }
2799 Py_XDECREF(self->data);
2800 Py_XDECREF(self->data);
2800 PyMem_Free(self->added);
2801 PyMem_Free(self->added);
2801 Py_XDECREF(self->nullentry);
2802 Py_XDECREF(self->nullentry);
2802 PyObject_Del(self);
2803 PyObject_Del(self);
2803 }
2804 }
2804
2805
2805 static PySequenceMethods index_sequence_methods = {
2806 static PySequenceMethods index_sequence_methods = {
2806 (lenfunc)index_length, /* sq_length */
2807 (lenfunc)index_length, /* sq_length */
2807 0, /* sq_concat */
2808 0, /* sq_concat */
2808 0, /* sq_repeat */
2809 0, /* sq_repeat */
2809 (ssizeargfunc)index_get, /* sq_item */
2810 (ssizeargfunc)index_get, /* sq_item */
2810 0, /* sq_slice */
2811 0, /* sq_slice */
2811 0, /* sq_ass_item */
2812 0, /* sq_ass_item */
2812 0, /* sq_ass_slice */
2813 0, /* sq_ass_slice */
2813 (objobjproc)index_contains, /* sq_contains */
2814 (objobjproc)index_contains, /* sq_contains */
2814 };
2815 };
2815
2816
2816 static PyMappingMethods index_mapping_methods = {
2817 static PyMappingMethods index_mapping_methods = {
2817 (lenfunc)index_length, /* mp_length */
2818 (lenfunc)index_length, /* mp_length */
2818 (binaryfunc)index_getitem, /* mp_subscript */
2819 (binaryfunc)index_getitem, /* mp_subscript */
2819 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2820 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2820 };
2821 };
2821
2822
2822 static PyMethodDef index_methods[] = {
2823 static PyMethodDef index_methods[] = {
2823 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2824 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2824 "return the gca set of the given revs"},
2825 "return the gca set of the given revs"},
2825 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2826 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2826 METH_VARARGS,
2827 METH_VARARGS,
2827 "return the heads of the common ancestors of the given revs"},
2828 "return the heads of the common ancestors of the given revs"},
2828 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2829 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2829 "clear the index caches"},
2830 "clear the index caches"},
2830 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2831 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2831 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2832 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2832 "return `rev` associated with a node or None"},
2833 "return `rev` associated with a node or None"},
2833 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2834 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2834 "return True if the node exist in the index"},
2835 "return True if the node exist in the index"},
2835 {"rev", (PyCFunction)index_m_rev, METH_O,
2836 {"rev", (PyCFunction)index_m_rev, METH_O,
2836 "return `rev` associated with a node or raise RevlogError"},
2837 "return `rev` associated with a node or raise RevlogError"},
2837 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2838 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2838 "compute phases"},
2839 "compute phases"},
2839 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2840 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2840 "reachableroots"},
2841 "reachableroots"},
2841 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2842 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2842 METH_VARARGS, "replace an existing index entry with a new value"},
2843 METH_VARARGS, "replace an existing index entry with a new value"},
2843 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2844 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2844 "get head revisions"}, /* Can do filtering since 3.2 */
2845 "get head revisions"}, /* Can do filtering since 3.2 */
2845 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2846 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2846 "get filtered head revisions"}, /* Can always do filtering */
2847 "get filtered head revisions"}, /* Can always do filtering */
2847 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2848 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2848 "True if the object is a snapshot"},
2849 "True if the object is a snapshot"},
2849 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2850 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2850 "Gather snapshot data in a cache dict"},
2851 "Gather snapshot data in a cache dict"},
2851 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2852 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2852 "determine revisions with deltas to reconstruct fulltext"},
2853 "determine revisions with deltas to reconstruct fulltext"},
2853 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2854 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2854 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2855 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2855 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2856 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2856 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2857 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2857 "match a potentially ambiguous node ID"},
2858 "match a potentially ambiguous node ID"},
2858 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2859 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2859 "find length of shortest hex nodeid of a binary ID"},
2860 "find length of shortest hex nodeid of a binary ID"},
2860 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2861 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2861 {NULL} /* Sentinel */
2862 {NULL} /* Sentinel */
2862 };
2863 };
2863
2864
2864 static PyGetSetDef index_getset[] = {
2865 static PyGetSetDef index_getset[] = {
2865 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2866 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2866 {NULL} /* Sentinel */
2867 {NULL} /* Sentinel */
2867 };
2868 };
2868
2869
2870 static PyMemberDef index_members[] = {
2871 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2872 "size of an index entry"},
2873 {NULL} /* Sentinel */
2874 };
2875
2869 PyTypeObject HgRevlogIndex_Type = {
2876 PyTypeObject HgRevlogIndex_Type = {
2870 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2877 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2871 "parsers.index", /* tp_name */
2878 "parsers.index", /* tp_name */
2872 sizeof(indexObject), /* tp_basicsize */
2879 sizeof(indexObject), /* tp_basicsize */
2873 0, /* tp_itemsize */
2880 0, /* tp_itemsize */
2874 (destructor)index_dealloc, /* tp_dealloc */
2881 (destructor)index_dealloc, /* tp_dealloc */
2875 0, /* tp_print */
2882 0, /* tp_print */
2876 0, /* tp_getattr */
2883 0, /* tp_getattr */
2877 0, /* tp_setattr */
2884 0, /* tp_setattr */
2878 0, /* tp_compare */
2885 0, /* tp_compare */
2879 0, /* tp_repr */
2886 0, /* tp_repr */
2880 0, /* tp_as_number */
2887 0, /* tp_as_number */
2881 &index_sequence_methods, /* tp_as_sequence */
2888 &index_sequence_methods, /* tp_as_sequence */
2882 &index_mapping_methods, /* tp_as_mapping */
2889 &index_mapping_methods, /* tp_as_mapping */
2883 0, /* tp_hash */
2890 0, /* tp_hash */
2884 0, /* tp_call */
2891 0, /* tp_call */
2885 0, /* tp_str */
2892 0, /* tp_str */
2886 0, /* tp_getattro */
2893 0, /* tp_getattro */
2887 0, /* tp_setattro */
2894 0, /* tp_setattro */
2888 0, /* tp_as_buffer */
2895 0, /* tp_as_buffer */
2889 Py_TPFLAGS_DEFAULT, /* tp_flags */
2896 Py_TPFLAGS_DEFAULT, /* tp_flags */
2890 "revlog index", /* tp_doc */
2897 "revlog index", /* tp_doc */
2891 0, /* tp_traverse */
2898 0, /* tp_traverse */
2892 0, /* tp_clear */
2899 0, /* tp_clear */
2893 0, /* tp_richcompare */
2900 0, /* tp_richcompare */
2894 0, /* tp_weaklistoffset */
2901 0, /* tp_weaklistoffset */
2895 0, /* tp_iter */
2902 0, /* tp_iter */
2896 0, /* tp_iternext */
2903 0, /* tp_iternext */
2897 index_methods, /* tp_methods */
2904 index_methods, /* tp_methods */
2898 0, /* tp_members */
2905 index_members, /* tp_members */
2899 index_getset, /* tp_getset */
2906 index_getset, /* tp_getset */
2900 0, /* tp_base */
2907 0, /* tp_base */
2901 0, /* tp_dict */
2908 0, /* tp_dict */
2902 0, /* tp_descr_get */
2909 0, /* tp_descr_get */
2903 0, /* tp_descr_set */
2910 0, /* tp_descr_set */
2904 0, /* tp_dictoffset */
2911 0, /* tp_dictoffset */
2905 (initproc)index_init, /* tp_init */
2912 (initproc)index_init, /* tp_init */
2906 0, /* tp_alloc */
2913 0, /* tp_alloc */
2907 };
2914 };
2908
2915
2909 /*
2916 /*
2910 * returns a tuple of the form (index, cache) with elements as
2917 * returns a tuple of the form (index, cache) with elements as
2911 * follows:
2918 * follows:
2912 *
2919 *
2913 * index: an index object that lazily parses Revlog (v1 or v2) records
2920 * index: an index object that lazily parses Revlog (v1 or v2) records
2914 * cache: if data is inlined, a tuple (0, index_file_content), else None
2921 * cache: if data is inlined, a tuple (0, index_file_content), else None
2915 * index_file_content could be a string, or a buffer
2922 * index_file_content could be a string, or a buffer
2916 *
2923 *
2917 * added complications are for backwards compatibility
2924 * added complications are for backwards compatibility
2918 */
2925 */
2919 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2926 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2920 {
2927 {
2921 PyObject *cache = NULL;
2928 PyObject *cache = NULL;
2922 indexObject *idx;
2929 indexObject *idx;
2923 int ret;
2930 int ret;
2924
2931
2925 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2932 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2926 if (idx == NULL)
2933 if (idx == NULL)
2927 goto bail;
2934 goto bail;
2928
2935
2929 ret = index_init(idx, args, kwargs);
2936 ret = index_init(idx, args, kwargs);
2930 if (ret == -1)
2937 if (ret == -1)
2931 goto bail;
2938 goto bail;
2932
2939
2933 if (idx->inlined) {
2940 if (idx->inlined) {
2934 cache = Py_BuildValue("iO", 0, idx->data);
2941 cache = Py_BuildValue("iO", 0, idx->data);
2935 if (cache == NULL)
2942 if (cache == NULL)
2936 goto bail;
2943 goto bail;
2937 } else {
2944 } else {
2938 cache = Py_None;
2945 cache = Py_None;
2939 Py_INCREF(cache);
2946 Py_INCREF(cache);
2940 }
2947 }
2941
2948
2942 return Py_BuildValue("NN", idx, cache);
2949 return Py_BuildValue("NN", idx, cache);
2943
2950
2944 bail:
2951 bail:
2945 Py_XDECREF(idx);
2952 Py_XDECREF(idx);
2946 Py_XDECREF(cache);
2953 Py_XDECREF(cache);
2947 return NULL;
2954 return NULL;
2948 }
2955 }
2949
2956
2950 static Revlog_CAPI CAPI = {
2957 static Revlog_CAPI CAPI = {
2951 /* increment the abi_version field upon each change in the Revlog_CAPI
2958 /* increment the abi_version field upon each change in the Revlog_CAPI
2952 struct or in the ABI of the listed functions */
2959 struct or in the ABI of the listed functions */
2953 2,
2960 2,
2954 index_length,
2961 index_length,
2955 index_node,
2962 index_node,
2956 HgRevlogIndex_GetParents,
2963 HgRevlogIndex_GetParents,
2957 };
2964 };
2958
2965
2959 void revlog_module_init(PyObject *mod)
2966 void revlog_module_init(PyObject *mod)
2960 {
2967 {
2961 PyObject *caps = NULL;
2968 PyObject *caps = NULL;
2962 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2969 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2963 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2970 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2964 return;
2971 return;
2965 Py_INCREF(&HgRevlogIndex_Type);
2972 Py_INCREF(&HgRevlogIndex_Type);
2966 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2973 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2967
2974
2968 nodetreeType.tp_new = PyType_GenericNew;
2975 nodetreeType.tp_new = PyType_GenericNew;
2969 if (PyType_Ready(&nodetreeType) < 0)
2976 if (PyType_Ready(&nodetreeType) < 0)
2970 return;
2977 return;
2971 Py_INCREF(&nodetreeType);
2978 Py_INCREF(&nodetreeType);
2972 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2979 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2973
2980
2974 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2981 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2975 if (caps != NULL)
2982 if (caps != NULL)
2976 PyModule_AddObject(mod, "revlog_CAPI", caps);
2983 PyModule_AddObject(mod, "revlog_CAPI", caps);
2977 }
2984 }
@@ -1,352 +1,353 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from ..revlogutils import nodemap as nodemaputil
19 from ..revlogutils import nodemap as nodemaputil
20 from ..revlogutils import constants as revlog_constants
20 from ..revlogutils import constants as revlog_constants
21
21
22 stringio = pycompat.bytesio
22 stringio = pycompat.bytesio
23
23
24
24
25 _pack = struct.pack
25 _pack = struct.pack
26 _unpack = struct.unpack
26 _unpack = struct.unpack
27 _compress = zlib.compress
27 _compress = zlib.compress
28 _decompress = zlib.decompress
28 _decompress = zlib.decompress
29
29
30 # Some code below makes tuples directly because it's more convenient. However,
30 # Some code below makes tuples directly because it's more convenient. However,
31 # code outside this module should always use dirstatetuple.
31 # code outside this module should always use dirstatetuple.
32 def dirstatetuple(*x):
32 def dirstatetuple(*x):
33 # x is a tuple
33 # x is a tuple
34 return x
34 return x
35
35
36
36
37 def gettype(q):
37 def gettype(q):
38 return int(q & 0xFFFF)
38 return int(q & 0xFFFF)
39
39
40
40
41 def offset_type(offset, type):
41 def offset_type(offset, type):
42 return int(int(offset) << 16 | type)
42 return int(int(offset) << 16 | type)
43
43
44
44
45 class BaseIndexObject(object):
45 class BaseIndexObject(object):
46 # Format of an index entry according to Python's `struct` language
46 # Format of an index entry according to Python's `struct` language
47 index_format = revlog_constants.INDEX_ENTRY_V1
47 index_format = revlog_constants.INDEX_ENTRY_V1
48 # Size of a C unsigned long long int, platform independent
48 # Size of a C unsigned long long int, platform independent
49 big_int_size = struct.calcsize(b'>Q')
49 big_int_size = struct.calcsize(b'>Q')
50 # Size of a C long int, platform independent
50 # Size of a C long int, platform independent
51 int_size = struct.calcsize(b'>i')
51 int_size = struct.calcsize(b'>i')
52 # Size of the entire index format
53 index_size = revlog_constants.INDEX_ENTRY_V1.size
54 # An empty index entry, used as a default value to be overridden, or nullrev
52 # An empty index entry, used as a default value to be overridden, or nullrev
55 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
53 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
56
54
55 @util.propertycache
56 def entry_size(self):
57 return self.index_format.size
58
57 @property
59 @property
58 def nodemap(self):
60 def nodemap(self):
59 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
61 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
60 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
62 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
61 return self._nodemap
63 return self._nodemap
62
64
63 @util.propertycache
65 @util.propertycache
64 def _nodemap(self):
66 def _nodemap(self):
65 nodemap = nodemaputil.NodeMap({nullid: nullrev})
67 nodemap = nodemaputil.NodeMap({nullid: nullrev})
66 for r in range(0, len(self)):
68 for r in range(0, len(self)):
67 n = self[r][7]
69 n = self[r][7]
68 nodemap[n] = r
70 nodemap[n] = r
69 return nodemap
71 return nodemap
70
72
71 def has_node(self, node):
73 def has_node(self, node):
72 """return True if the node exist in the index"""
74 """return True if the node exist in the index"""
73 return node in self._nodemap
75 return node in self._nodemap
74
76
75 def rev(self, node):
77 def rev(self, node):
76 """return a revision for a node
78 """return a revision for a node
77
79
78 If the node is unknown, raise a RevlogError"""
80 If the node is unknown, raise a RevlogError"""
79 return self._nodemap[node]
81 return self._nodemap[node]
80
82
81 def get_rev(self, node):
83 def get_rev(self, node):
82 """return a revision for a node
84 """return a revision for a node
83
85
84 If the node is unknown, return None"""
86 If the node is unknown, return None"""
85 return self._nodemap.get(node)
87 return self._nodemap.get(node)
86
88
87 def _stripnodes(self, start):
89 def _stripnodes(self, start):
88 if '_nodemap' in vars(self):
90 if '_nodemap' in vars(self):
89 for r in range(start, len(self)):
91 for r in range(start, len(self)):
90 n = self[r][7]
92 n = self[r][7]
91 del self._nodemap[n]
93 del self._nodemap[n]
92
94
93 def clearcaches(self):
95 def clearcaches(self):
94 self.__dict__.pop('_nodemap', None)
96 self.__dict__.pop('_nodemap', None)
95
97
96 def __len__(self):
98 def __len__(self):
97 return self._lgt + len(self._extra)
99 return self._lgt + len(self._extra)
98
100
99 def append(self, tup):
101 def append(self, tup):
100 if '_nodemap' in vars(self):
102 if '_nodemap' in vars(self):
101 self._nodemap[tup[7]] = len(self)
103 self._nodemap[tup[7]] = len(self)
102 data = self.index_format.pack(*tup)
104 data = self.index_format.pack(*tup)
103 self._extra.append(data)
105 self._extra.append(data)
104
106
105 def _check_index(self, i):
107 def _check_index(self, i):
106 if not isinstance(i, int):
108 if not isinstance(i, int):
107 raise TypeError(b"expecting int indexes")
109 raise TypeError(b"expecting int indexes")
108 if i < 0 or i >= len(self):
110 if i < 0 or i >= len(self):
109 raise IndexError
111 raise IndexError
110
112
111 def __getitem__(self, i):
113 def __getitem__(self, i):
112 if i == -1:
114 if i == -1:
113 return self.null_item
115 return self.null_item
114 self._check_index(i)
116 self._check_index(i)
115 if i >= self._lgt:
117 if i >= self._lgt:
116 data = self._extra[i - self._lgt]
118 data = self._extra[i - self._lgt]
117 else:
119 else:
118 index = self._calculate_index(i)
120 index = self._calculate_index(i)
119 data = self._data[index : index + self.index_size]
121 data = self._data[index : index + self.entry_size]
120 r = self.index_format.unpack(data)
122 r = self.index_format.unpack(data)
121 if self._lgt and i == 0:
123 if self._lgt and i == 0:
122 r = (offset_type(0, gettype(r[0])),) + r[1:]
124 r = (offset_type(0, gettype(r[0])),) + r[1:]
123 return r
125 return r
124
126
125
127
126 class IndexObject(BaseIndexObject):
128 class IndexObject(BaseIndexObject):
127 def __init__(self, data):
129 def __init__(self, data):
128 assert len(data) % self.index_size == 0
130 assert len(data) % self.entry_size == 0
129 self._data = data
131 self._data = data
130 self._lgt = len(data) // self.index_size
132 self._lgt = len(data) // self.entry_size
131 self._extra = []
133 self._extra = []
132
134
133 def _calculate_index(self, i):
135 def _calculate_index(self, i):
134 return i * self.index_size
136 return i * self.entry_size
135
137
136 def __delitem__(self, i):
138 def __delitem__(self, i):
137 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
139 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
138 raise ValueError(b"deleting slices only supports a:-1 with step 1")
140 raise ValueError(b"deleting slices only supports a:-1 with step 1")
139 i = i.start
141 i = i.start
140 self._check_index(i)
142 self._check_index(i)
141 self._stripnodes(i)
143 self._stripnodes(i)
142 if i < self._lgt:
144 if i < self._lgt:
143 self._data = self._data[: i * self.index_size]
145 self._data = self._data[: i * self.entry_size]
144 self._lgt = i
146 self._lgt = i
145 self._extra = []
147 self._extra = []
146 else:
148 else:
147 self._extra = self._extra[: i - self._lgt]
149 self._extra = self._extra[: i - self._lgt]
148
150
149
151
150 class PersistentNodeMapIndexObject(IndexObject):
152 class PersistentNodeMapIndexObject(IndexObject):
151 """a Debug oriented class to test persistent nodemap
153 """a Debug oriented class to test persistent nodemap
152
154
153 We need a simple python object to test API and higher level behavior. See
155 We need a simple python object to test API and higher level behavior. See
154 the Rust implementation for more serious usage. This should be used only
156 the Rust implementation for more serious usage. This should be used only
155 through the dedicated `devel.persistent-nodemap` config.
157 through the dedicated `devel.persistent-nodemap` config.
156 """
158 """
157
159
158 def nodemap_data_all(self):
160 def nodemap_data_all(self):
159 """Return bytes containing a full serialization of a nodemap
161 """Return bytes containing a full serialization of a nodemap
160
162
161 The nodemap should be valid for the full set of revisions in the
163 The nodemap should be valid for the full set of revisions in the
162 index."""
164 index."""
163 return nodemaputil.persistent_data(self)
165 return nodemaputil.persistent_data(self)
164
166
165 def nodemap_data_incremental(self):
167 def nodemap_data_incremental(self):
166 """Return bytes containing a incremental update to persistent nodemap
168 """Return bytes containing a incremental update to persistent nodemap
167
169
168 This containst the data for an append-only update of the data provided
170 This containst the data for an append-only update of the data provided
169 in the last call to `update_nodemap_data`.
171 in the last call to `update_nodemap_data`.
170 """
172 """
171 if self._nm_root is None:
173 if self._nm_root is None:
172 return None
174 return None
173 docket = self._nm_docket
175 docket = self._nm_docket
174 changed, data = nodemaputil.update_persistent_data(
176 changed, data = nodemaputil.update_persistent_data(
175 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
177 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
176 )
178 )
177
179
178 self._nm_root = self._nm_max_idx = self._nm_docket = None
180 self._nm_root = self._nm_max_idx = self._nm_docket = None
179 return docket, changed, data
181 return docket, changed, data
180
182
181 def update_nodemap_data(self, docket, nm_data):
183 def update_nodemap_data(self, docket, nm_data):
182 """provide full block of persisted binary data for a nodemap
184 """provide full block of persisted binary data for a nodemap
183
185
184 The data are expected to come from disk. See `nodemap_data_all` for a
186 The data are expected to come from disk. See `nodemap_data_all` for a
185 produceur of such data."""
187 produceur of such data."""
186 if nm_data is not None:
188 if nm_data is not None:
187 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
189 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
188 if self._nm_root:
190 if self._nm_root:
189 self._nm_docket = docket
191 self._nm_docket = docket
190 else:
192 else:
191 self._nm_root = self._nm_max_idx = self._nm_docket = None
193 self._nm_root = self._nm_max_idx = self._nm_docket = None
192
194
193
195
194 class InlinedIndexObject(BaseIndexObject):
196 class InlinedIndexObject(BaseIndexObject):
195 def __init__(self, data, inline=0):
197 def __init__(self, data, inline=0):
196 self._data = data
198 self._data = data
197 self._lgt = self._inline_scan(None)
199 self._lgt = self._inline_scan(None)
198 self._inline_scan(self._lgt)
200 self._inline_scan(self._lgt)
199 self._extra = []
201 self._extra = []
200
202
201 def _inline_scan(self, lgt):
203 def _inline_scan(self, lgt):
202 off = 0
204 off = 0
203 if lgt is not None:
205 if lgt is not None:
204 self._offsets = [0] * lgt
206 self._offsets = [0] * lgt
205 count = 0
207 count = 0
206 while off <= len(self._data) - self.index_size:
208 while off <= len(self._data) - self.entry_size:
207 start = off + self.big_int_size
209 start = off + self.big_int_size
208 (s,) = struct.unpack(
210 (s,) = struct.unpack(
209 b'>i',
211 b'>i',
210 self._data[start : start + self.int_size],
212 self._data[start : start + self.int_size],
211 )
213 )
212 if lgt is not None:
214 if lgt is not None:
213 self._offsets[count] = off
215 self._offsets[count] = off
214 count += 1
216 count += 1
215 off += self.index_size + s
217 off += self.entry_size + s
216 if off != len(self._data):
218 if off != len(self._data):
217 raise ValueError(b"corrupted data")
219 raise ValueError(b"corrupted data")
218 return count
220 return count
219
221
220 def __delitem__(self, i):
222 def __delitem__(self, i):
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
223 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
224 raise ValueError(b"deleting slices only supports a:-1 with step 1")
223 i = i.start
225 i = i.start
224 self._check_index(i)
226 self._check_index(i)
225 self._stripnodes(i)
227 self._stripnodes(i)
226 if i < self._lgt:
228 if i < self._lgt:
227 self._offsets = self._offsets[:i]
229 self._offsets = self._offsets[:i]
228 self._lgt = i
230 self._lgt = i
229 self._extra = []
231 self._extra = []
230 else:
232 else:
231 self._extra = self._extra[: i - self._lgt]
233 self._extra = self._extra[: i - self._lgt]
232
234
233 def _calculate_index(self, i):
235 def _calculate_index(self, i):
234 return self._offsets[i]
236 return self._offsets[i]
235
237
236
238
237 def parse_index2(data, inline, revlogv2=False):
239 def parse_index2(data, inline, revlogv2=False):
238 if not inline:
240 if not inline:
239 cls = IndexObject2 if revlogv2 else IndexObject
241 cls = IndexObject2 if revlogv2 else IndexObject
240 return cls(data), None
242 return cls(data), None
241 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
243 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
242 return cls(data, inline), (0, data)
244 return cls(data, inline), (0, data)
243
245
244
246
245 class Index2Mixin(object):
247 class Index2Mixin(object):
246 index_format = revlog_constants.INDEX_ENTRY_V2
248 index_format = revlog_constants.INDEX_ENTRY_V2
247 index_size = revlog_constants.INDEX_ENTRY_V2.size
248 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
249 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
249
250
250 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
251 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
251 """
252 """
252 Replace an existing index entry's sidedata offset and length with new
253 Replace an existing index entry's sidedata offset and length with new
253 ones.
254 ones.
254 This cannot be used outside of the context of sidedata rewriting,
255 This cannot be used outside of the context of sidedata rewriting,
255 inside the transaction that creates the revision `i`.
256 inside the transaction that creates the revision `i`.
256 """
257 """
257 if i < 0:
258 if i < 0:
258 raise KeyError
259 raise KeyError
259 self._check_index(i)
260 self._check_index(i)
260 sidedata_format = b">Qi"
261 sidedata_format = b">Qi"
261 packed_size = struct.calcsize(sidedata_format)
262 packed_size = struct.calcsize(sidedata_format)
262 if i >= self._lgt:
263 if i >= self._lgt:
263 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
264 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
264 old = self._extra[i - self._lgt]
265 old = self._extra[i - self._lgt]
265 new = old[:64] + packed + old[64 + packed_size :]
266 new = old[:64] + packed + old[64 + packed_size :]
266 self._extra[i - self._lgt] = new
267 self._extra[i - self._lgt] = new
267 else:
268 else:
268 msg = b"cannot rewrite entries outside of this transaction"
269 msg = b"cannot rewrite entries outside of this transaction"
269 raise KeyError(msg)
270 raise KeyError(msg)
270
271
271
272
272 class IndexObject2(Index2Mixin, IndexObject):
273 class IndexObject2(Index2Mixin, IndexObject):
273 pass
274 pass
274
275
275
276
276 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
277 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
277 def _inline_scan(self, lgt):
278 def _inline_scan(self, lgt):
278 sidedata_length_pos = 72
279 sidedata_length_pos = 72
279 off = 0
280 off = 0
280 if lgt is not None:
281 if lgt is not None:
281 self._offsets = [0] * lgt
282 self._offsets = [0] * lgt
282 count = 0
283 count = 0
283 while off <= len(self._data) - self.index_size:
284 while off <= len(self._data) - self.entry_size:
284 start = off + self.big_int_size
285 start = off + self.big_int_size
285 (data_size,) = struct.unpack(
286 (data_size,) = struct.unpack(
286 b'>i',
287 b'>i',
287 self._data[start : start + self.int_size],
288 self._data[start : start + self.int_size],
288 )
289 )
289 start = off + sidedata_length_pos
290 start = off + sidedata_length_pos
290 (side_data_size,) = struct.unpack(
291 (side_data_size,) = struct.unpack(
291 b'>i', self._data[start : start + self.int_size]
292 b'>i', self._data[start : start + self.int_size]
292 )
293 )
293 if lgt is not None:
294 if lgt is not None:
294 self._offsets[count] = off
295 self._offsets[count] = off
295 count += 1
296 count += 1
296 off += self.index_size + data_size + side_data_size
297 off += self.entry_size + data_size + side_data_size
297 if off != len(self._data):
298 if off != len(self._data):
298 raise ValueError(b"corrupted data")
299 raise ValueError(b"corrupted data")
299 return count
300 return count
300
301
301
302
302 def parse_index_devel_nodemap(data, inline):
303 def parse_index_devel_nodemap(data, inline):
303 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
304 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
304 return PersistentNodeMapIndexObject(data), None
305 return PersistentNodeMapIndexObject(data), None
305
306
306
307
307 def parse_dirstate(dmap, copymap, st):
308 def parse_dirstate(dmap, copymap, st):
308 parents = [st[:20], st[20:40]]
309 parents = [st[:20], st[20:40]]
309 # dereference fields so they will be local in loop
310 # dereference fields so they will be local in loop
310 format = b">cllll"
311 format = b">cllll"
311 e_size = struct.calcsize(format)
312 e_size = struct.calcsize(format)
312 pos1 = 40
313 pos1 = 40
313 l = len(st)
314 l = len(st)
314
315
315 # the inner loop
316 # the inner loop
316 while pos1 < l:
317 while pos1 < l:
317 pos2 = pos1 + e_size
318 pos2 = pos1 + e_size
318 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
319 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
319 pos1 = pos2 + e[4]
320 pos1 = pos2 + e[4]
320 f = st[pos2:pos1]
321 f = st[pos2:pos1]
321 if b'\0' in f:
322 if b'\0' in f:
322 f, c = f.split(b'\0')
323 f, c = f.split(b'\0')
323 copymap[f] = c
324 copymap[f] = c
324 dmap[f] = e[:4]
325 dmap[f] = e[:4]
325 return parents
326 return parents
326
327
327
328
328 def pack_dirstate(dmap, copymap, pl, now):
329 def pack_dirstate(dmap, copymap, pl, now):
329 now = int(now)
330 now = int(now)
330 cs = stringio()
331 cs = stringio()
331 write = cs.write
332 write = cs.write
332 write(b"".join(pl))
333 write(b"".join(pl))
333 for f, e in pycompat.iteritems(dmap):
334 for f, e in pycompat.iteritems(dmap):
334 if e[0] == b'n' and e[3] == now:
335 if e[0] == b'n' and e[3] == now:
335 # The file was last modified "simultaneously" with the current
336 # The file was last modified "simultaneously" with the current
336 # write to dirstate (i.e. within the same second for file-
337 # write to dirstate (i.e. within the same second for file-
337 # systems with a granularity of 1 sec). This commonly happens
338 # systems with a granularity of 1 sec). This commonly happens
338 # for at least a couple of files on 'update'.
339 # for at least a couple of files on 'update'.
339 # The user could change the file without changing its size
340 # The user could change the file without changing its size
340 # within the same second. Invalidate the file's mtime in
341 # within the same second. Invalidate the file's mtime in
341 # dirstate, forcing future 'status' calls to compare the
342 # dirstate, forcing future 'status' calls to compare the
342 # contents of the file if the size is the same. This prevents
343 # contents of the file if the size is the same. This prevents
343 # mistakenly treating such files as clean.
344 # mistakenly treating such files as clean.
344 e = dirstatetuple(e[0], e[1], e[2], -1)
345 e = dirstatetuple(e[0], e[1], e[2], -1)
345 dmap[f] = e
346 dmap[f] = e
346
347
347 if f in copymap:
348 if f in copymap:
348 f = b"%s\0%s" % (f, copymap[f])
349 f = b"%s\0%s" % (f, copymap[f])
349 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
350 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
350 write(e)
351 write(e)
351 write(f)
352 write(f)
352 return cs.getvalue()
353 return cs.getvalue()
@@ -1,3249 +1,3242 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 sha1nodeconstants,
32 sha1nodeconstants,
33 short,
33 short,
34 wdirfilenodeids,
34 wdirfilenodeids,
35 wdirhex,
35 wdirhex,
36 wdirid,
36 wdirid,
37 wdirrev,
37 wdirrev,
38 )
38 )
39 from .i18n import _
39 from .i18n import _
40 from .pycompat import getattr
40 from .pycompat import getattr
41 from .revlogutils.constants import (
41 from .revlogutils.constants import (
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_ENTRY_V0,
44 INDEX_ENTRY_V0,
45 INDEX_ENTRY_V1,
45 INDEX_ENTRY_V1,
46 INDEX_ENTRY_V2,
46 INDEX_ENTRY_V2,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_SIDEDATA,
65 REVIDX_SIDEDATA,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 templatefilters,
75 templatefilters,
76 util,
76 util,
77 )
77 )
78 from .interfaces import (
78 from .interfaces import (
79 repository,
79 repository,
80 util as interfaceutil,
80 util as interfaceutil,
81 )
81 )
82 from .revlogutils import (
82 from .revlogutils import (
83 deltas as deltautil,
83 deltas as deltautil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 sidedata as sidedatautil,
86 sidedata as sidedatautil,
87 )
87 )
88 from .utils import (
88 from .utils import (
89 storageutil,
89 storageutil,
90 stringutil,
90 stringutil,
91 )
91 )
92
92
93 # blanked usage of all the name to prevent pyflakes constraints
93 # blanked usage of all the name to prevent pyflakes constraints
94 # We need these name available in the module for extensions.
94 # We need these name available in the module for extensions.
95 REVLOGV0
95 REVLOGV0
96 REVLOGV1
96 REVLOGV1
97 REVLOGV2
97 REVLOGV2
98 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
99 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
102 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
103 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
104 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
105 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
106 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
107 REVIDX_SIDEDATA
107 REVIDX_SIDEDATA
108 REVIDX_HASCOPIESINFO
108 REVIDX_HASCOPIESINFO
109 REVIDX_EXTSTORED
109 REVIDX_EXTSTORED
110 REVIDX_DEFAULT_FLAGS
110 REVIDX_DEFAULT_FLAGS
111 REVIDX_FLAGS_ORDER
111 REVIDX_FLAGS_ORDER
112 REVIDX_RAWTEXT_CHANGING_FLAGS
112 REVIDX_RAWTEXT_CHANGING_FLAGS
113
113
114 parsers = policy.importmod('parsers')
114 parsers = policy.importmod('parsers')
115 rustancestor = policy.importrust('ancestor')
115 rustancestor = policy.importrust('ancestor')
116 rustdagop = policy.importrust('dagop')
116 rustdagop = policy.importrust('dagop')
117 rustrevlog = policy.importrust('revlog')
117 rustrevlog = policy.importrust('revlog')
118
118
119 # Aliased for performance.
119 # Aliased for performance.
120 _zlibdecompress = zlib.decompress
120 _zlibdecompress = zlib.decompress
121
121
122 # max size of revlog with inline data
122 # max size of revlog with inline data
123 _maxinline = 131072
123 _maxinline = 131072
124 _chunksize = 1048576
124 _chunksize = 1048576
125
125
126 # Flag processors for REVIDX_ELLIPSIS.
126 # Flag processors for REVIDX_ELLIPSIS.
127 def ellipsisreadprocessor(rl, text):
127 def ellipsisreadprocessor(rl, text):
128 return text, False
128 return text, False
129
129
130
130
131 def ellipsiswriteprocessor(rl, text):
131 def ellipsiswriteprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsisrawprocessor(rl, text):
135 def ellipsisrawprocessor(rl, text):
136 return False
136 return False
137
137
138
138
139 ellipsisprocessor = (
139 ellipsisprocessor = (
140 ellipsisreadprocessor,
140 ellipsisreadprocessor,
141 ellipsiswriteprocessor,
141 ellipsiswriteprocessor,
142 ellipsisrawprocessor,
142 ellipsisrawprocessor,
143 )
143 )
144
144
145
145
146 def getoffset(q):
146 def getoffset(q):
147 return int(q >> 16)
147 return int(q >> 16)
148
148
149
149
150 def gettype(q):
150 def gettype(q):
151 return int(q & 0xFFFF)
151 return int(q & 0xFFFF)
152
152
153
153
154 def offset_type(offset, type):
154 def offset_type(offset, type):
155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
156 raise ValueError(b'unknown revlog index flags')
156 raise ValueError(b'unknown revlog index flags')
157 return int(int(offset) << 16 | type)
157 return int(int(offset) << 16 | type)
158
158
159
159
160 def _verify_revision(rl, skipflags, state, node):
160 def _verify_revision(rl, skipflags, state, node):
161 """Verify the integrity of the given revlog ``node`` while providing a hook
161 """Verify the integrity of the given revlog ``node`` while providing a hook
162 point for extensions to influence the operation."""
162 point for extensions to influence the operation."""
163 if skipflags:
163 if skipflags:
164 state[b'skipread'].add(node)
164 state[b'skipread'].add(node)
165 else:
165 else:
166 # Side-effect: read content and verify hash.
166 # Side-effect: read content and verify hash.
167 rl.revision(node)
167 rl.revision(node)
168
168
169
169
170 # True if a fast implementation for persistent-nodemap is available
170 # True if a fast implementation for persistent-nodemap is available
171 #
171 #
172 # We also consider we have a "fast" implementation in "pure" python because
172 # We also consider we have a "fast" implementation in "pure" python because
173 # people using pure don't really have performance consideration (and a
173 # people using pure don't really have performance consideration (and a
174 # wheelbarrow of other slowness source)
174 # wheelbarrow of other slowness source)
175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
176 parsers, 'BaseIndexObject'
176 parsers, 'BaseIndexObject'
177 )
177 )
178
178
179
179
180 @attr.s(slots=True, frozen=True)
180 @attr.s(slots=True, frozen=True)
181 class _revisioninfo(object):
181 class _revisioninfo(object):
182 """Information about a revision that allows building its fulltext
182 """Information about a revision that allows building its fulltext
183 node: expected hash of the revision
183 node: expected hash of the revision
184 p1, p2: parent revs of the revision
184 p1, p2: parent revs of the revision
185 btext: built text cache consisting of a one-element list
185 btext: built text cache consisting of a one-element list
186 cachedelta: (baserev, uncompressed_delta) or None
186 cachedelta: (baserev, uncompressed_delta) or None
187 flags: flags associated to the revision storage
187 flags: flags associated to the revision storage
188
188
189 One of btext[0] or cachedelta must be set.
189 One of btext[0] or cachedelta must be set.
190 """
190 """
191
191
192 node = attr.ib()
192 node = attr.ib()
193 p1 = attr.ib()
193 p1 = attr.ib()
194 p2 = attr.ib()
194 p2 = attr.ib()
195 btext = attr.ib()
195 btext = attr.ib()
196 textlen = attr.ib()
196 textlen = attr.ib()
197 cachedelta = attr.ib()
197 cachedelta = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199
199
200
200
201 @interfaceutil.implementer(repository.irevisiondelta)
201 @interfaceutil.implementer(repository.irevisiondelta)
202 @attr.s(slots=True)
202 @attr.s(slots=True)
203 class revlogrevisiondelta(object):
203 class revlogrevisiondelta(object):
204 node = attr.ib()
204 node = attr.ib()
205 p1node = attr.ib()
205 p1node = attr.ib()
206 p2node = attr.ib()
206 p2node = attr.ib()
207 basenode = attr.ib()
207 basenode = attr.ib()
208 flags = attr.ib()
208 flags = attr.ib()
209 baserevisionsize = attr.ib()
209 baserevisionsize = attr.ib()
210 revision = attr.ib()
210 revision = attr.ib()
211 delta = attr.ib()
211 delta = attr.ib()
212 sidedata = attr.ib()
212 sidedata = attr.ib()
213 linknode = attr.ib(default=None)
213 linknode = attr.ib(default=None)
214
214
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223
223
224 class revlogoldindex(list):
224 class revlogoldindex(list):
225 entry_size = INDEX_ENTRY_V0.size
226
225 @property
227 @property
226 def nodemap(self):
228 def nodemap(self):
227 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
229 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
228 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
230 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
229 return self._nodemap
231 return self._nodemap
230
232
231 @util.propertycache
233 @util.propertycache
232 def _nodemap(self):
234 def _nodemap(self):
233 nodemap = nodemaputil.NodeMap({nullid: nullrev})
235 nodemap = nodemaputil.NodeMap({nullid: nullrev})
234 for r in range(0, len(self)):
236 for r in range(0, len(self)):
235 n = self[r][7]
237 n = self[r][7]
236 nodemap[n] = r
238 nodemap[n] = r
237 return nodemap
239 return nodemap
238
240
239 def has_node(self, node):
241 def has_node(self, node):
240 """return True if the node exist in the index"""
242 """return True if the node exist in the index"""
241 return node in self._nodemap
243 return node in self._nodemap
242
244
243 def rev(self, node):
245 def rev(self, node):
244 """return a revision for a node
246 """return a revision for a node
245
247
246 If the node is unknown, raise a RevlogError"""
248 If the node is unknown, raise a RevlogError"""
247 return self._nodemap[node]
249 return self._nodemap[node]
248
250
249 def get_rev(self, node):
251 def get_rev(self, node):
250 """return a revision for a node
252 """return a revision for a node
251
253
252 If the node is unknown, return None"""
254 If the node is unknown, return None"""
253 return self._nodemap.get(node)
255 return self._nodemap.get(node)
254
256
255 def append(self, tup):
257 def append(self, tup):
256 self._nodemap[tup[7]] = len(self)
258 self._nodemap[tup[7]] = len(self)
257 super(revlogoldindex, self).append(tup)
259 super(revlogoldindex, self).append(tup)
258
260
259 def __delitem__(self, i):
261 def __delitem__(self, i):
260 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
262 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
261 raise ValueError(b"deleting slices only supports a:-1 with step 1")
263 raise ValueError(b"deleting slices only supports a:-1 with step 1")
262 for r in pycompat.xrange(i.start, len(self)):
264 for r in pycompat.xrange(i.start, len(self)):
263 del self._nodemap[self[r][7]]
265 del self._nodemap[self[r][7]]
264 super(revlogoldindex, self).__delitem__(i)
266 super(revlogoldindex, self).__delitem__(i)
265
267
266 def clearcaches(self):
268 def clearcaches(self):
267 self.__dict__.pop('_nodemap', None)
269 self.__dict__.pop('_nodemap', None)
268
270
269 def __getitem__(self, i):
271 def __getitem__(self, i):
270 if i == -1:
272 if i == -1:
271 return (0, 0, 0, -1, -1, -1, -1, nullid)
273 return (0, 0, 0, -1, -1, -1, -1, nullid)
272 return list.__getitem__(self, i)
274 return list.__getitem__(self, i)
273
275
274
276
275 class revlogoldio(object):
277 class revlogoldio(object):
276 def __init__(self):
277 self.size = INDEX_ENTRY_V0.size
278
279 def parseindex(self, data, inline):
278 def parseindex(self, data, inline):
280 s = self.size
279 s = INDEX_ENTRY_V0.size
281 index = []
280 index = []
282 nodemap = nodemaputil.NodeMap({nullid: nullrev})
281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
283 n = off = 0
282 n = off = 0
284 l = len(data)
283 l = len(data)
285 while off + s <= l:
284 while off + s <= l:
286 cur = data[off : off + s]
285 cur = data[off : off + s]
287 off += s
286 off += s
288 e = INDEX_ENTRY_V0.unpack(cur)
287 e = INDEX_ENTRY_V0.unpack(cur)
289 # transform to revlogv1 format
288 # transform to revlogv1 format
290 e2 = (
289 e2 = (
291 offset_type(e[0], 0),
290 offset_type(e[0], 0),
292 e[1],
291 e[1],
293 -1,
292 -1,
294 e[2],
293 e[2],
295 e[3],
294 e[3],
296 nodemap.get(e[4], nullrev),
295 nodemap.get(e[4], nullrev),
297 nodemap.get(e[5], nullrev),
296 nodemap.get(e[5], nullrev),
298 e[6],
297 e[6],
299 )
298 )
300 index.append(e2)
299 index.append(e2)
301 nodemap[e[6]] = n
300 nodemap[e[6]] = n
302 n += 1
301 n += 1
303
302
304 index = revlogoldindex(index)
303 index = revlogoldindex(index)
305 return index, None
304 return index, None
306
305
307 def packentry(self, entry, node, version, rev):
306 def packentry(self, entry, node, version, rev):
308 """return the binary representation of an entry
307 """return the binary representation of an entry
309
308
310 entry: a tuple containing all the values (see index.__getitem__)
309 entry: a tuple containing all the values (see index.__getitem__)
311 node: a callback to convert a revision to nodeid
310 node: a callback to convert a revision to nodeid
312 version: the changelog version
311 version: the changelog version
313 rev: the revision number
312 rev: the revision number
314 """
313 """
315 if gettype(entry[0]):
314 if gettype(entry[0]):
316 raise error.RevlogError(
315 raise error.RevlogError(
317 _(b'index entry flags need revlog version 1')
316 _(b'index entry flags need revlog version 1')
318 )
317 )
319 e2 = (
318 e2 = (
320 getoffset(entry[0]),
319 getoffset(entry[0]),
321 entry[1],
320 entry[1],
322 entry[3],
321 entry[3],
323 entry[4],
322 entry[4],
324 node(entry[5]),
323 node(entry[5]),
325 node(entry[6]),
324 node(entry[6]),
326 entry[7],
325 entry[7],
327 )
326 )
328 return INDEX_ENTRY_V0.pack(*e2)
327 return INDEX_ENTRY_V0.pack(*e2)
329
328
330
329
331 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
330 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
332 # signed integer)
331 # signed integer)
333 _maxentrysize = 0x7FFFFFFF
332 _maxentrysize = 0x7FFFFFFF
334
333
335
334
336 class revlogio(object):
335 class revlogio(object):
337 def __init__(self):
338 self.size = INDEX_ENTRY_V1.size
339
340 def parseindex(self, data, inline):
336 def parseindex(self, data, inline):
341 # call the C implementation to parse the index data
337 # call the C implementation to parse the index data
342 index, cache = parsers.parse_index2(data, inline)
338 index, cache = parsers.parse_index2(data, inline)
343 return index, cache
339 return index, cache
344
340
345 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
346 p = INDEX_ENTRY_V1.pack(*entry)
342 p = INDEX_ENTRY_V1.pack(*entry)
347 if rev == 0:
343 if rev == 0:
348 p = INDEX_HEADER.pack(version) + p[4:]
344 p = INDEX_HEADER.pack(version) + p[4:]
349 return p
345 return p
350
346
351
347
352 class revlogv2io(object):
348 class revlogv2io(object):
353 def __init__(self):
354 self.size = INDEX_ENTRY_V2.size
355
356 def parseindex(self, data, inline):
349 def parseindex(self, data, inline):
357 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
350 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
358 return index, cache
351 return index, cache
359
352
360 def packentry(self, entry, node, version, rev):
353 def packentry(self, entry, node, version, rev):
361 p = INDEX_ENTRY_V2.pack(*entry)
354 p = INDEX_ENTRY_V2.pack(*entry)
362 if rev == 0:
355 if rev == 0:
363 p = INDEX_HEADER.pack(version) + p[4:]
356 p = INDEX_HEADER.pack(version) + p[4:]
364 return p
357 return p
365
358
366
359
367 NodemapRevlogIO = None
360 NodemapRevlogIO = None
368
361
369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
362 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370
363
371 class NodemapRevlogIO(revlogio):
364 class NodemapRevlogIO(revlogio):
372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
365 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373
366
374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
367 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 """
368 """
376
369
377 def parseindex(self, data, inline):
370 def parseindex(self, data, inline):
378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
371 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 return index, cache
372 return index, cache
380
373
381
374
382 class rustrevlogio(revlogio):
375 class rustrevlogio(revlogio):
383 def parseindex(self, data, inline):
376 def parseindex(self, data, inline):
384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
377 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 return rustrevlog.MixedIndex(index), cache
378 return rustrevlog.MixedIndex(index), cache
386
379
387
380
388 class revlog(object):
381 class revlog(object):
389 """
382 """
390 the underlying revision storage object
383 the underlying revision storage object
391
384
392 A revlog consists of two parts, an index and the revision data.
385 A revlog consists of two parts, an index and the revision data.
393
386
394 The index is a file with a fixed record size containing
387 The index is a file with a fixed record size containing
395 information on each revision, including its nodeid (hash), the
388 information on each revision, including its nodeid (hash), the
396 nodeids of its parents, the position and offset of its data within
389 nodeids of its parents, the position and offset of its data within
397 the data file, and the revision it's based on. Finally, each entry
390 the data file, and the revision it's based on. Finally, each entry
398 contains a linkrev entry that can serve as a pointer to external
391 contains a linkrev entry that can serve as a pointer to external
399 data.
392 data.
400
393
401 The revision data itself is a linear collection of data chunks.
394 The revision data itself is a linear collection of data chunks.
402 Each chunk represents a revision and is usually represented as a
395 Each chunk represents a revision and is usually represented as a
403 delta against the previous chunk. To bound lookup time, runs of
396 delta against the previous chunk. To bound lookup time, runs of
404 deltas are limited to about 2 times the length of the original
397 deltas are limited to about 2 times the length of the original
405 version data. This makes retrieval of a version proportional to
398 version data. This makes retrieval of a version proportional to
406 its size, or O(1) relative to the number of revisions.
399 its size, or O(1) relative to the number of revisions.
407
400
408 Both pieces of the revlog are written to in an append-only
401 Both pieces of the revlog are written to in an append-only
409 fashion, which means we never need to rewrite a file to insert or
402 fashion, which means we never need to rewrite a file to insert or
410 remove data, and can use some simple techniques to avoid the need
403 remove data, and can use some simple techniques to avoid the need
411 for locking while reading.
404 for locking while reading.
412
405
413 If checkambig, indexfile is opened with checkambig=True at
406 If checkambig, indexfile is opened with checkambig=True at
414 writing, to avoid file stat ambiguity.
407 writing, to avoid file stat ambiguity.
415
408
416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
409 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 index will be mmapped rather than read if it is larger than the
410 index will be mmapped rather than read if it is larger than the
418 configured threshold.
411 configured threshold.
419
412
420 If censorable is True, the revlog can have censored revisions.
413 If censorable is True, the revlog can have censored revisions.
421
414
422 If `upperboundcomp` is not None, this is the expected maximal gain from
415 If `upperboundcomp` is not None, this is the expected maximal gain from
423 compression for the data content.
416 compression for the data content.
424
417
425 `concurrencychecker` is an optional function that receives 3 arguments: a
418 `concurrencychecker` is an optional function that receives 3 arguments: a
426 file handle, a filename, and an expected position. It should check whether
419 file handle, a filename, and an expected position. It should check whether
427 the current position in the file handle is valid, and log/warn/fail (by
420 the current position in the file handle is valid, and log/warn/fail (by
428 raising).
421 raising).
429 """
422 """
430
423
431 _flagserrorclass = error.RevlogError
424 _flagserrorclass = error.RevlogError
432
425
433 def __init__(
426 def __init__(
434 self,
427 self,
435 opener,
428 opener,
436 indexfile,
429 indexfile,
437 datafile=None,
430 datafile=None,
438 checkambig=False,
431 checkambig=False,
439 mmaplargeindex=False,
432 mmaplargeindex=False,
440 censorable=False,
433 censorable=False,
441 upperboundcomp=None,
434 upperboundcomp=None,
442 persistentnodemap=False,
435 persistentnodemap=False,
443 concurrencychecker=None,
436 concurrencychecker=None,
444 ):
437 ):
445 """
438 """
446 create a revlog object
439 create a revlog object
447
440
448 opener is a function that abstracts the file opening operation
441 opener is a function that abstracts the file opening operation
449 and can be used to implement COW semantics or the like.
442 and can be used to implement COW semantics or the like.
450
443
451 """
444 """
452 self.upperboundcomp = upperboundcomp
445 self.upperboundcomp = upperboundcomp
453 self.indexfile = indexfile
446 self.indexfile = indexfile
454 self.datafile = datafile or (indexfile[:-2] + b".d")
447 self.datafile = datafile or (indexfile[:-2] + b".d")
455 self.nodemap_file = None
448 self.nodemap_file = None
456 if persistentnodemap:
449 if persistentnodemap:
457 self.nodemap_file = nodemaputil.get_nodemap_file(
450 self.nodemap_file = nodemaputil.get_nodemap_file(
458 opener, self.indexfile
451 opener, self.indexfile
459 )
452 )
460
453
461 self.opener = opener
454 self.opener = opener
462 # When True, indexfile is opened with checkambig=True at writing, to
455 # When True, indexfile is opened with checkambig=True at writing, to
463 # avoid file stat ambiguity.
456 # avoid file stat ambiguity.
464 self._checkambig = checkambig
457 self._checkambig = checkambig
465 self._mmaplargeindex = mmaplargeindex
458 self._mmaplargeindex = mmaplargeindex
466 self._censorable = censorable
459 self._censorable = censorable
467 # 3-tuple of (node, rev, text) for a raw revision.
460 # 3-tuple of (node, rev, text) for a raw revision.
468 self._revisioncache = None
461 self._revisioncache = None
469 # Maps rev to chain base rev.
462 # Maps rev to chain base rev.
470 self._chainbasecache = util.lrucachedict(100)
463 self._chainbasecache = util.lrucachedict(100)
471 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
464 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
472 self._chunkcache = (0, b'')
465 self._chunkcache = (0, b'')
473 # How much data to read and cache into the raw revlog data cache.
466 # How much data to read and cache into the raw revlog data cache.
474 self._chunkcachesize = 65536
467 self._chunkcachesize = 65536
475 self._maxchainlen = None
468 self._maxchainlen = None
476 self._deltabothparents = True
469 self._deltabothparents = True
477 self.index = None
470 self.index = None
478 self._nodemap_docket = None
471 self._nodemap_docket = None
479 # Mapping of partial identifiers to full nodes.
472 # Mapping of partial identifiers to full nodes.
480 self._pcache = {}
473 self._pcache = {}
481 # Mapping of revision integer to full node.
474 # Mapping of revision integer to full node.
482 self._compengine = b'zlib'
475 self._compengine = b'zlib'
483 self._compengineopts = {}
476 self._compengineopts = {}
484 self._maxdeltachainspan = -1
477 self._maxdeltachainspan = -1
485 self._withsparseread = False
478 self._withsparseread = False
486 self._sparserevlog = False
479 self._sparserevlog = False
487 self._srdensitythreshold = 0.50
480 self._srdensitythreshold = 0.50
488 self._srmingapsize = 262144
481 self._srmingapsize = 262144
489
482
490 # Make copy of flag processors so each revlog instance can support
483 # Make copy of flag processors so each revlog instance can support
491 # custom flags.
484 # custom flags.
492 self._flagprocessors = dict(flagutil.flagprocessors)
485 self._flagprocessors = dict(flagutil.flagprocessors)
493
486
494 # 2-tuple of file handles being used for active writing.
487 # 2-tuple of file handles being used for active writing.
495 self._writinghandles = None
488 self._writinghandles = None
496
489
497 self._loadindex()
490 self._loadindex()
498
491
499 self._concurrencychecker = concurrencychecker
492 self._concurrencychecker = concurrencychecker
500
493
501 def _loadindex(self):
494 def _loadindex(self):
502 mmapindexthreshold = None
495 mmapindexthreshold = None
503 opts = self.opener.options
496 opts = self.opener.options
504
497
505 if b'revlogv2' in opts:
498 if b'revlogv2' in opts:
506 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
499 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
507 elif b'revlogv1' in opts:
500 elif b'revlogv1' in opts:
508 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
501 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
509 if b'generaldelta' in opts:
502 if b'generaldelta' in opts:
510 newversionflags |= FLAG_GENERALDELTA
503 newversionflags |= FLAG_GENERALDELTA
511 elif b'revlogv0' in self.opener.options:
504 elif b'revlogv0' in self.opener.options:
512 newversionflags = REVLOGV0
505 newversionflags = REVLOGV0
513 else:
506 else:
514 newversionflags = REVLOG_DEFAULT_VERSION
507 newversionflags = REVLOG_DEFAULT_VERSION
515
508
516 if b'chunkcachesize' in opts:
509 if b'chunkcachesize' in opts:
517 self._chunkcachesize = opts[b'chunkcachesize']
510 self._chunkcachesize = opts[b'chunkcachesize']
518 if b'maxchainlen' in opts:
511 if b'maxchainlen' in opts:
519 self._maxchainlen = opts[b'maxchainlen']
512 self._maxchainlen = opts[b'maxchainlen']
520 if b'deltabothparents' in opts:
513 if b'deltabothparents' in opts:
521 self._deltabothparents = opts[b'deltabothparents']
514 self._deltabothparents = opts[b'deltabothparents']
522 self._lazydelta = bool(opts.get(b'lazydelta', True))
515 self._lazydelta = bool(opts.get(b'lazydelta', True))
523 self._lazydeltabase = False
516 self._lazydeltabase = False
524 if self._lazydelta:
517 if self._lazydelta:
525 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
518 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
526 if b'compengine' in opts:
519 if b'compengine' in opts:
527 self._compengine = opts[b'compengine']
520 self._compengine = opts[b'compengine']
528 if b'zlib.level' in opts:
521 if b'zlib.level' in opts:
529 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
522 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
530 if b'zstd.level' in opts:
523 if b'zstd.level' in opts:
531 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
524 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
532 if b'maxdeltachainspan' in opts:
525 if b'maxdeltachainspan' in opts:
533 self._maxdeltachainspan = opts[b'maxdeltachainspan']
526 self._maxdeltachainspan = opts[b'maxdeltachainspan']
534 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
527 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
535 mmapindexthreshold = opts[b'mmapindexthreshold']
528 mmapindexthreshold = opts[b'mmapindexthreshold']
536 self.hassidedata = bool(opts.get(b'side-data', False))
529 self.hassidedata = bool(opts.get(b'side-data', False))
537 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
530 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
538 withsparseread = bool(opts.get(b'with-sparse-read', False))
531 withsparseread = bool(opts.get(b'with-sparse-read', False))
539 # sparse-revlog forces sparse-read
532 # sparse-revlog forces sparse-read
540 self._withsparseread = self._sparserevlog or withsparseread
533 self._withsparseread = self._sparserevlog or withsparseread
541 if b'sparse-read-density-threshold' in opts:
534 if b'sparse-read-density-threshold' in opts:
542 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
535 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
543 if b'sparse-read-min-gap-size' in opts:
536 if b'sparse-read-min-gap-size' in opts:
544 self._srmingapsize = opts[b'sparse-read-min-gap-size']
537 self._srmingapsize = opts[b'sparse-read-min-gap-size']
545 if opts.get(b'enableellipsis'):
538 if opts.get(b'enableellipsis'):
546 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
539 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
547
540
548 # revlog v0 doesn't have flag processors
541 # revlog v0 doesn't have flag processors
549 for flag, processor in pycompat.iteritems(
542 for flag, processor in pycompat.iteritems(
550 opts.get(b'flagprocessors', {})
543 opts.get(b'flagprocessors', {})
551 ):
544 ):
552 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
545 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
553
546
554 if self._chunkcachesize <= 0:
547 if self._chunkcachesize <= 0:
555 raise error.RevlogError(
548 raise error.RevlogError(
556 _(b'revlog chunk cache size %r is not greater than 0')
549 _(b'revlog chunk cache size %r is not greater than 0')
557 % self._chunkcachesize
550 % self._chunkcachesize
558 )
551 )
559 elif self._chunkcachesize & (self._chunkcachesize - 1):
552 elif self._chunkcachesize & (self._chunkcachesize - 1):
560 raise error.RevlogError(
553 raise error.RevlogError(
561 _(b'revlog chunk cache size %r is not a power of 2')
554 _(b'revlog chunk cache size %r is not a power of 2')
562 % self._chunkcachesize
555 % self._chunkcachesize
563 )
556 )
564
557
565 indexdata = b''
558 indexdata = b''
566 self._initempty = True
559 self._initempty = True
567 try:
560 try:
568 with self._indexfp() as f:
561 with self._indexfp() as f:
569 if (
562 if (
570 mmapindexthreshold is not None
563 mmapindexthreshold is not None
571 and self.opener.fstat(f).st_size >= mmapindexthreshold
564 and self.opener.fstat(f).st_size >= mmapindexthreshold
572 ):
565 ):
573 # TODO: should .close() to release resources without
566 # TODO: should .close() to release resources without
574 # relying on Python GC
567 # relying on Python GC
575 indexdata = util.buffer(util.mmapread(f))
568 indexdata = util.buffer(util.mmapread(f))
576 else:
569 else:
577 indexdata = f.read()
570 indexdata = f.read()
578 if len(indexdata) > 0:
571 if len(indexdata) > 0:
579 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
572 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
580 self._initempty = False
573 self._initempty = False
581 else:
574 else:
582 versionflags = newversionflags
575 versionflags = newversionflags
583 except IOError as inst:
576 except IOError as inst:
584 if inst.errno != errno.ENOENT:
577 if inst.errno != errno.ENOENT:
585 raise
578 raise
586
579
587 versionflags = newversionflags
580 versionflags = newversionflags
588
581
589 self.version = versionflags
582 self.version = versionflags
590
583
591 flags = versionflags & ~0xFFFF
584 flags = versionflags & ~0xFFFF
592 fmt = versionflags & 0xFFFF
585 fmt = versionflags & 0xFFFF
593
586
594 if fmt == REVLOGV0:
587 if fmt == REVLOGV0:
595 if flags:
588 if flags:
596 raise error.RevlogError(
589 raise error.RevlogError(
597 _(b'unknown flags (%#04x) in version %d revlog %s')
590 _(b'unknown flags (%#04x) in version %d revlog %s')
598 % (flags >> 16, fmt, self.indexfile)
591 % (flags >> 16, fmt, self.indexfile)
599 )
592 )
600
593
601 self._inline = False
594 self._inline = False
602 self._generaldelta = False
595 self._generaldelta = False
603
596
604 elif fmt == REVLOGV1:
597 elif fmt == REVLOGV1:
605 if flags & ~REVLOGV1_FLAGS:
598 if flags & ~REVLOGV1_FLAGS:
606 raise error.RevlogError(
599 raise error.RevlogError(
607 _(b'unknown flags (%#04x) in version %d revlog %s')
600 _(b'unknown flags (%#04x) in version %d revlog %s')
608 % (flags >> 16, fmt, self.indexfile)
601 % (flags >> 16, fmt, self.indexfile)
609 )
602 )
610
603
611 self._inline = versionflags & FLAG_INLINE_DATA
604 self._inline = versionflags & FLAG_INLINE_DATA
612 self._generaldelta = versionflags & FLAG_GENERALDELTA
605 self._generaldelta = versionflags & FLAG_GENERALDELTA
613
606
614 elif fmt == REVLOGV2:
607 elif fmt == REVLOGV2:
615 if flags & ~REVLOGV2_FLAGS:
608 if flags & ~REVLOGV2_FLAGS:
616 raise error.RevlogError(
609 raise error.RevlogError(
617 _(b'unknown flags (%#04x) in version %d revlog %s')
610 _(b'unknown flags (%#04x) in version %d revlog %s')
618 % (flags >> 16, fmt, self.indexfile)
611 % (flags >> 16, fmt, self.indexfile)
619 )
612 )
620
613
621 # There is a bug in the transaction handling when going from an
614 # There is a bug in the transaction handling when going from an
622 # inline revlog to a separate index and data file. Turn it off until
615 # inline revlog to a separate index and data file. Turn it off until
623 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
616 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
624 # See issue6485
617 # See issue6485
625 self._inline = False
618 self._inline = False
626 # generaldelta implied by version 2 revlogs.
619 # generaldelta implied by version 2 revlogs.
627 self._generaldelta = True
620 self._generaldelta = True
628
621
629 else:
622 else:
630 raise error.RevlogError(
623 raise error.RevlogError(
631 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
624 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
632 )
625 )
633
626
634 self.nodeconstants = sha1nodeconstants
627 self.nodeconstants = sha1nodeconstants
635 self.nullid = self.nodeconstants.nullid
628 self.nullid = self.nodeconstants.nullid
636
629
637 # sparse-revlog can't be on without general-delta (issue6056)
630 # sparse-revlog can't be on without general-delta (issue6056)
638 if not self._generaldelta:
631 if not self._generaldelta:
639 self._sparserevlog = False
632 self._sparserevlog = False
640
633
641 self._storedeltachains = True
634 self._storedeltachains = True
642
635
643 devel_nodemap = (
636 devel_nodemap = (
644 self.nodemap_file
637 self.nodemap_file
645 and opts.get(b'devel-force-nodemap', False)
638 and opts.get(b'devel-force-nodemap', False)
646 and NodemapRevlogIO is not None
639 and NodemapRevlogIO is not None
647 )
640 )
648
641
649 use_rust_index = False
642 use_rust_index = False
650 if rustrevlog is not None:
643 if rustrevlog is not None:
651 if self.nodemap_file is not None:
644 if self.nodemap_file is not None:
652 use_rust_index = True
645 use_rust_index = True
653 else:
646 else:
654 use_rust_index = self.opener.options.get(b'rust.index')
647 use_rust_index = self.opener.options.get(b'rust.index')
655
648
656 self._io = revlogio()
649 self._io = revlogio()
657 if self.version == REVLOGV0:
650 if self.version == REVLOGV0:
658 self._io = revlogoldio()
651 self._io = revlogoldio()
659 elif fmt == REVLOGV2:
652 elif fmt == REVLOGV2:
660 self._io = revlogv2io()
653 self._io = revlogv2io()
661 elif devel_nodemap:
654 elif devel_nodemap:
662 self._io = NodemapRevlogIO()
655 self._io = NodemapRevlogIO()
663 elif use_rust_index:
656 elif use_rust_index:
664 self._io = rustrevlogio()
657 self._io = rustrevlogio()
665 try:
658 try:
666 d = self._io.parseindex(indexdata, self._inline)
659 d = self._io.parseindex(indexdata, self._inline)
667 index, _chunkcache = d
660 index, _chunkcache = d
668 use_nodemap = (
661 use_nodemap = (
669 not self._inline
662 not self._inline
670 and self.nodemap_file is not None
663 and self.nodemap_file is not None
671 and util.safehasattr(index, 'update_nodemap_data')
664 and util.safehasattr(index, 'update_nodemap_data')
672 )
665 )
673 if use_nodemap:
666 if use_nodemap:
674 nodemap_data = nodemaputil.persisted_data(self)
667 nodemap_data = nodemaputil.persisted_data(self)
675 if nodemap_data is not None:
668 if nodemap_data is not None:
676 docket = nodemap_data[0]
669 docket = nodemap_data[0]
677 if (
670 if (
678 len(d[0]) > docket.tip_rev
671 len(d[0]) > docket.tip_rev
679 and d[0][docket.tip_rev][7] == docket.tip_node
672 and d[0][docket.tip_rev][7] == docket.tip_node
680 ):
673 ):
681 # no changelog tampering
674 # no changelog tampering
682 self._nodemap_docket = docket
675 self._nodemap_docket = docket
683 index.update_nodemap_data(*nodemap_data)
676 index.update_nodemap_data(*nodemap_data)
684 except (ValueError, IndexError):
677 except (ValueError, IndexError):
685 raise error.RevlogError(
678 raise error.RevlogError(
686 _(b"index %s is corrupted") % self.indexfile
679 _(b"index %s is corrupted") % self.indexfile
687 )
680 )
688 self.index, self._chunkcache = d
681 self.index, self._chunkcache = d
689 if not self._chunkcache:
682 if not self._chunkcache:
690 self._chunkclear()
683 self._chunkclear()
691 # revnum -> (chain-length, sum-delta-length)
684 # revnum -> (chain-length, sum-delta-length)
692 self._chaininfocache = util.lrucachedict(500)
685 self._chaininfocache = util.lrucachedict(500)
693 # revlog header -> revlog compressor
686 # revlog header -> revlog compressor
694 self._decompressors = {}
687 self._decompressors = {}
695
688
696 @util.propertycache
689 @util.propertycache
697 def _compressor(self):
690 def _compressor(self):
698 engine = util.compengines[self._compengine]
691 engine = util.compengines[self._compengine]
699 return engine.revlogcompressor(self._compengineopts)
692 return engine.revlogcompressor(self._compengineopts)
700
693
701 def _indexfp(self, mode=b'r'):
694 def _indexfp(self, mode=b'r'):
702 """file object for the revlog's index file"""
695 """file object for the revlog's index file"""
703 args = {'mode': mode}
696 args = {'mode': mode}
704 if mode != b'r':
697 if mode != b'r':
705 args['checkambig'] = self._checkambig
698 args['checkambig'] = self._checkambig
706 if mode == b'w':
699 if mode == b'w':
707 args['atomictemp'] = True
700 args['atomictemp'] = True
708 return self.opener(self.indexfile, **args)
701 return self.opener(self.indexfile, **args)
709
702
710 def _datafp(self, mode=b'r'):
703 def _datafp(self, mode=b'r'):
711 """file object for the revlog's data file"""
704 """file object for the revlog's data file"""
712 return self.opener(self.datafile, mode=mode)
705 return self.opener(self.datafile, mode=mode)
713
706
714 @contextlib.contextmanager
707 @contextlib.contextmanager
715 def _datareadfp(self, existingfp=None):
708 def _datareadfp(self, existingfp=None):
716 """file object suitable to read data"""
709 """file object suitable to read data"""
717 # Use explicit file handle, if given.
710 # Use explicit file handle, if given.
718 if existingfp is not None:
711 if existingfp is not None:
719 yield existingfp
712 yield existingfp
720
713
721 # Use a file handle being actively used for writes, if available.
714 # Use a file handle being actively used for writes, if available.
722 # There is some danger to doing this because reads will seek the
715 # There is some danger to doing this because reads will seek the
723 # file. However, _writeentry() performs a SEEK_END before all writes,
716 # file. However, _writeentry() performs a SEEK_END before all writes,
724 # so we should be safe.
717 # so we should be safe.
725 elif self._writinghandles:
718 elif self._writinghandles:
726 if self._inline:
719 if self._inline:
727 yield self._writinghandles[0]
720 yield self._writinghandles[0]
728 else:
721 else:
729 yield self._writinghandles[1]
722 yield self._writinghandles[1]
730
723
731 # Otherwise open a new file handle.
724 # Otherwise open a new file handle.
732 else:
725 else:
733 if self._inline:
726 if self._inline:
734 func = self._indexfp
727 func = self._indexfp
735 else:
728 else:
736 func = self._datafp
729 func = self._datafp
737 with func() as fp:
730 with func() as fp:
738 yield fp
731 yield fp
739
732
740 def tiprev(self):
733 def tiprev(self):
741 return len(self.index) - 1
734 return len(self.index) - 1
742
735
743 def tip(self):
736 def tip(self):
744 return self.node(self.tiprev())
737 return self.node(self.tiprev())
745
738
746 def __contains__(self, rev):
739 def __contains__(self, rev):
747 return 0 <= rev < len(self)
740 return 0 <= rev < len(self)
748
741
749 def __len__(self):
742 def __len__(self):
750 return len(self.index)
743 return len(self.index)
751
744
752 def __iter__(self):
745 def __iter__(self):
753 return iter(pycompat.xrange(len(self)))
746 return iter(pycompat.xrange(len(self)))
754
747
755 def revs(self, start=0, stop=None):
748 def revs(self, start=0, stop=None):
756 """iterate over all rev in this revlog (from start to stop)"""
749 """iterate over all rev in this revlog (from start to stop)"""
757 return storageutil.iterrevs(len(self), start=start, stop=stop)
750 return storageutil.iterrevs(len(self), start=start, stop=stop)
758
751
759 @property
752 @property
760 def nodemap(self):
753 def nodemap(self):
761 msg = (
754 msg = (
762 b"revlog.nodemap is deprecated, "
755 b"revlog.nodemap is deprecated, "
763 b"use revlog.index.[has_node|rev|get_rev]"
756 b"use revlog.index.[has_node|rev|get_rev]"
764 )
757 )
765 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
758 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
766 return self.index.nodemap
759 return self.index.nodemap
767
760
768 @property
761 @property
769 def _nodecache(self):
762 def _nodecache(self):
770 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
763 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
764 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 return self.index.nodemap
765 return self.index.nodemap
773
766
774 def hasnode(self, node):
767 def hasnode(self, node):
775 try:
768 try:
776 self.rev(node)
769 self.rev(node)
777 return True
770 return True
778 except KeyError:
771 except KeyError:
779 return False
772 return False
780
773
781 def candelta(self, baserev, rev):
774 def candelta(self, baserev, rev):
782 """whether two revisions (baserev, rev) can be delta-ed or not"""
775 """whether two revisions (baserev, rev) can be delta-ed or not"""
783 # Disable delta if either rev requires a content-changing flag
776 # Disable delta if either rev requires a content-changing flag
784 # processor (ex. LFS). This is because such flag processor can alter
777 # processor (ex. LFS). This is because such flag processor can alter
785 # the rawtext content that the delta will be based on, and two clients
778 # the rawtext content that the delta will be based on, and two clients
786 # could have a same revlog node with different flags (i.e. different
779 # could have a same revlog node with different flags (i.e. different
787 # rawtext contents) and the delta could be incompatible.
780 # rawtext contents) and the delta could be incompatible.
788 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
781 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
789 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
782 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
790 ):
783 ):
791 return False
784 return False
792 return True
785 return True
793
786
794 def update_caches(self, transaction):
787 def update_caches(self, transaction):
795 if self.nodemap_file is not None:
788 if self.nodemap_file is not None:
796 if transaction is None:
789 if transaction is None:
797 nodemaputil.update_persistent_nodemap(self)
790 nodemaputil.update_persistent_nodemap(self)
798 else:
791 else:
799 nodemaputil.setup_persistent_nodemap(transaction, self)
792 nodemaputil.setup_persistent_nodemap(transaction, self)
800
793
801 def clearcaches(self):
794 def clearcaches(self):
802 self._revisioncache = None
795 self._revisioncache = None
803 self._chainbasecache.clear()
796 self._chainbasecache.clear()
804 self._chunkcache = (0, b'')
797 self._chunkcache = (0, b'')
805 self._pcache = {}
798 self._pcache = {}
806 self._nodemap_docket = None
799 self._nodemap_docket = None
807 self.index.clearcaches()
800 self.index.clearcaches()
808 # The python code is the one responsible for validating the docket, we
801 # The python code is the one responsible for validating the docket, we
809 # end up having to refresh it here.
802 # end up having to refresh it here.
810 use_nodemap = (
803 use_nodemap = (
811 not self._inline
804 not self._inline
812 and self.nodemap_file is not None
805 and self.nodemap_file is not None
813 and util.safehasattr(self.index, 'update_nodemap_data')
806 and util.safehasattr(self.index, 'update_nodemap_data')
814 )
807 )
815 if use_nodemap:
808 if use_nodemap:
816 nodemap_data = nodemaputil.persisted_data(self)
809 nodemap_data = nodemaputil.persisted_data(self)
817 if nodemap_data is not None:
810 if nodemap_data is not None:
818 self._nodemap_docket = nodemap_data[0]
811 self._nodemap_docket = nodemap_data[0]
819 self.index.update_nodemap_data(*nodemap_data)
812 self.index.update_nodemap_data(*nodemap_data)
820
813
821 def rev(self, node):
814 def rev(self, node):
822 try:
815 try:
823 return self.index.rev(node)
816 return self.index.rev(node)
824 except TypeError:
817 except TypeError:
825 raise
818 raise
826 except error.RevlogError:
819 except error.RevlogError:
827 # parsers.c radix tree lookup failed
820 # parsers.c radix tree lookup failed
828 if node == wdirid or node in wdirfilenodeids:
821 if node == wdirid or node in wdirfilenodeids:
829 raise error.WdirUnsupported
822 raise error.WdirUnsupported
830 raise error.LookupError(node, self.indexfile, _(b'no node'))
823 raise error.LookupError(node, self.indexfile, _(b'no node'))
831
824
832 # Accessors for index entries.
825 # Accessors for index entries.
833
826
834 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
827 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
835 # are flags.
828 # are flags.
836 def start(self, rev):
829 def start(self, rev):
837 return int(self.index[rev][0] >> 16)
830 return int(self.index[rev][0] >> 16)
838
831
839 def flags(self, rev):
832 def flags(self, rev):
840 return self.index[rev][0] & 0xFFFF
833 return self.index[rev][0] & 0xFFFF
841
834
842 def length(self, rev):
835 def length(self, rev):
843 return self.index[rev][1]
836 return self.index[rev][1]
844
837
845 def sidedata_length(self, rev):
838 def sidedata_length(self, rev):
846 if self.version & 0xFFFF != REVLOGV2:
839 if self.version & 0xFFFF != REVLOGV2:
847 return 0
840 return 0
848 return self.index[rev][9]
841 return self.index[rev][9]
849
842
850 def rawsize(self, rev):
843 def rawsize(self, rev):
851 """return the length of the uncompressed text for a given revision"""
844 """return the length of the uncompressed text for a given revision"""
852 l = self.index[rev][2]
845 l = self.index[rev][2]
853 if l >= 0:
846 if l >= 0:
854 return l
847 return l
855
848
856 t = self.rawdata(rev)
849 t = self.rawdata(rev)
857 return len(t)
850 return len(t)
858
851
859 def size(self, rev):
852 def size(self, rev):
860 """length of non-raw text (processed by a "read" flag processor)"""
853 """length of non-raw text (processed by a "read" flag processor)"""
861 # fast path: if no "read" flag processor could change the content,
854 # fast path: if no "read" flag processor could change the content,
862 # size is rawsize. note: ELLIPSIS is known to not change the content.
855 # size is rawsize. note: ELLIPSIS is known to not change the content.
863 flags = self.flags(rev)
856 flags = self.flags(rev)
864 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
857 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
865 return self.rawsize(rev)
858 return self.rawsize(rev)
866
859
867 return len(self.revision(rev, raw=False))
860 return len(self.revision(rev, raw=False))
868
861
869 def chainbase(self, rev):
862 def chainbase(self, rev):
870 base = self._chainbasecache.get(rev)
863 base = self._chainbasecache.get(rev)
871 if base is not None:
864 if base is not None:
872 return base
865 return base
873
866
874 index = self.index
867 index = self.index
875 iterrev = rev
868 iterrev = rev
876 base = index[iterrev][3]
869 base = index[iterrev][3]
877 while base != iterrev:
870 while base != iterrev:
878 iterrev = base
871 iterrev = base
879 base = index[iterrev][3]
872 base = index[iterrev][3]
880
873
881 self._chainbasecache[rev] = base
874 self._chainbasecache[rev] = base
882 return base
875 return base
883
876
884 def linkrev(self, rev):
877 def linkrev(self, rev):
885 return self.index[rev][4]
878 return self.index[rev][4]
886
879
887 def parentrevs(self, rev):
880 def parentrevs(self, rev):
888 try:
881 try:
889 entry = self.index[rev]
882 entry = self.index[rev]
890 except IndexError:
883 except IndexError:
891 if rev == wdirrev:
884 if rev == wdirrev:
892 raise error.WdirUnsupported
885 raise error.WdirUnsupported
893 raise
886 raise
894 if entry[5] == nullrev:
887 if entry[5] == nullrev:
895 return entry[6], entry[5]
888 return entry[6], entry[5]
896 else:
889 else:
897 return entry[5], entry[6]
890 return entry[5], entry[6]
898
891
899 # fast parentrevs(rev) where rev isn't filtered
892 # fast parentrevs(rev) where rev isn't filtered
900 _uncheckedparentrevs = parentrevs
893 _uncheckedparentrevs = parentrevs
901
894
902 def node(self, rev):
895 def node(self, rev):
903 try:
896 try:
904 return self.index[rev][7]
897 return self.index[rev][7]
905 except IndexError:
898 except IndexError:
906 if rev == wdirrev:
899 if rev == wdirrev:
907 raise error.WdirUnsupported
900 raise error.WdirUnsupported
908 raise
901 raise
909
902
910 # Derived from index values.
903 # Derived from index values.
911
904
912 def end(self, rev):
905 def end(self, rev):
913 return self.start(rev) + self.length(rev)
906 return self.start(rev) + self.length(rev)
914
907
915 def parents(self, node):
908 def parents(self, node):
916 i = self.index
909 i = self.index
917 d = i[self.rev(node)]
910 d = i[self.rev(node)]
918 # inline node() to avoid function call overhead
911 # inline node() to avoid function call overhead
919 if d[5] == nullid:
912 if d[5] == nullid:
920 return i[d[6]][7], i[d[5]][7]
913 return i[d[6]][7], i[d[5]][7]
921 else:
914 else:
922 return i[d[5]][7], i[d[6]][7]
915 return i[d[5]][7], i[d[6]][7]
923
916
924 def chainlen(self, rev):
917 def chainlen(self, rev):
925 return self._chaininfo(rev)[0]
918 return self._chaininfo(rev)[0]
926
919
927 def _chaininfo(self, rev):
920 def _chaininfo(self, rev):
928 chaininfocache = self._chaininfocache
921 chaininfocache = self._chaininfocache
929 if rev in chaininfocache:
922 if rev in chaininfocache:
930 return chaininfocache[rev]
923 return chaininfocache[rev]
931 index = self.index
924 index = self.index
932 generaldelta = self._generaldelta
925 generaldelta = self._generaldelta
933 iterrev = rev
926 iterrev = rev
934 e = index[iterrev]
927 e = index[iterrev]
935 clen = 0
928 clen = 0
936 compresseddeltalen = 0
929 compresseddeltalen = 0
937 while iterrev != e[3]:
930 while iterrev != e[3]:
938 clen += 1
931 clen += 1
939 compresseddeltalen += e[1]
932 compresseddeltalen += e[1]
940 if generaldelta:
933 if generaldelta:
941 iterrev = e[3]
934 iterrev = e[3]
942 else:
935 else:
943 iterrev -= 1
936 iterrev -= 1
944 if iterrev in chaininfocache:
937 if iterrev in chaininfocache:
945 t = chaininfocache[iterrev]
938 t = chaininfocache[iterrev]
946 clen += t[0]
939 clen += t[0]
947 compresseddeltalen += t[1]
940 compresseddeltalen += t[1]
948 break
941 break
949 e = index[iterrev]
942 e = index[iterrev]
950 else:
943 else:
951 # Add text length of base since decompressing that also takes
944 # Add text length of base since decompressing that also takes
952 # work. For cache hits the length is already included.
945 # work. For cache hits the length is already included.
953 compresseddeltalen += e[1]
946 compresseddeltalen += e[1]
954 r = (clen, compresseddeltalen)
947 r = (clen, compresseddeltalen)
955 chaininfocache[rev] = r
948 chaininfocache[rev] = r
956 return r
949 return r
957
950
958 def _deltachain(self, rev, stoprev=None):
951 def _deltachain(self, rev, stoprev=None):
959 """Obtain the delta chain for a revision.
952 """Obtain the delta chain for a revision.
960
953
961 ``stoprev`` specifies a revision to stop at. If not specified, we
954 ``stoprev`` specifies a revision to stop at. If not specified, we
962 stop at the base of the chain.
955 stop at the base of the chain.
963
956
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
957 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 revs in ascending order and ``stopped`` is a bool indicating whether
958 revs in ascending order and ``stopped`` is a bool indicating whether
966 ``stoprev`` was hit.
959 ``stoprev`` was hit.
967 """
960 """
968 # Try C implementation.
961 # Try C implementation.
969 try:
962 try:
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
963 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 except AttributeError:
964 except AttributeError:
972 pass
965 pass
973
966
974 chain = []
967 chain = []
975
968
976 # Alias to prevent attribute lookup in tight loop.
969 # Alias to prevent attribute lookup in tight loop.
977 index = self.index
970 index = self.index
978 generaldelta = self._generaldelta
971 generaldelta = self._generaldelta
979
972
980 iterrev = rev
973 iterrev = rev
981 e = index[iterrev]
974 e = index[iterrev]
982 while iterrev != e[3] and iterrev != stoprev:
975 while iterrev != e[3] and iterrev != stoprev:
983 chain.append(iterrev)
976 chain.append(iterrev)
984 if generaldelta:
977 if generaldelta:
985 iterrev = e[3]
978 iterrev = e[3]
986 else:
979 else:
987 iterrev -= 1
980 iterrev -= 1
988 e = index[iterrev]
981 e = index[iterrev]
989
982
990 if iterrev == stoprev:
983 if iterrev == stoprev:
991 stopped = True
984 stopped = True
992 else:
985 else:
993 chain.append(iterrev)
986 chain.append(iterrev)
994 stopped = False
987 stopped = False
995
988
996 chain.reverse()
989 chain.reverse()
997 return chain, stopped
990 return chain, stopped
998
991
999 def ancestors(self, revs, stoprev=0, inclusive=False):
992 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 """Generate the ancestors of 'revs' in reverse revision order.
993 """Generate the ancestors of 'revs' in reverse revision order.
1001 Does not generate revs lower than stoprev.
994 Does not generate revs lower than stoprev.
1002
995
1003 See the documentation for ancestor.lazyancestors for more details."""
996 See the documentation for ancestor.lazyancestors for more details."""
1004
997
1005 # first, make sure start revisions aren't filtered
998 # first, make sure start revisions aren't filtered
1006 revs = list(revs)
999 revs = list(revs)
1007 checkrev = self.node
1000 checkrev = self.node
1008 for r in revs:
1001 for r in revs:
1009 checkrev(r)
1002 checkrev(r)
1010 # and we're sure ancestors aren't filtered as well
1003 # and we're sure ancestors aren't filtered as well
1011
1004
1012 if rustancestor is not None:
1005 if rustancestor is not None:
1013 lazyancestors = rustancestor.LazyAncestors
1006 lazyancestors = rustancestor.LazyAncestors
1014 arg = self.index
1007 arg = self.index
1015 else:
1008 else:
1016 lazyancestors = ancestor.lazyancestors
1009 lazyancestors = ancestor.lazyancestors
1017 arg = self._uncheckedparentrevs
1010 arg = self._uncheckedparentrevs
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1011 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019
1012
1020 def descendants(self, revs):
1013 def descendants(self, revs):
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1014 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022
1015
1023 def findcommonmissing(self, common=None, heads=None):
1016 def findcommonmissing(self, common=None, heads=None):
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1017 """Return a tuple of the ancestors of common and the ancestors of heads
1025 that are not ancestors of common. In revset terminology, we return the
1018 that are not ancestors of common. In revset terminology, we return the
1026 tuple:
1019 tuple:
1027
1020
1028 ::common, (::heads) - (::common)
1021 ::common, (::heads) - (::common)
1029
1022
1030 The list is sorted by revision number, meaning it is
1023 The list is sorted by revision number, meaning it is
1031 topologically sorted.
1024 topologically sorted.
1032
1025
1033 'heads' and 'common' are both lists of node IDs. If heads is
1026 'heads' and 'common' are both lists of node IDs. If heads is
1034 not supplied, uses all of the revlog's heads. If common is not
1027 not supplied, uses all of the revlog's heads. If common is not
1035 supplied, uses nullid."""
1028 supplied, uses nullid."""
1036 if common is None:
1029 if common is None:
1037 common = [nullid]
1030 common = [nullid]
1038 if heads is None:
1031 if heads is None:
1039 heads = self.heads()
1032 heads = self.heads()
1040
1033
1041 common = [self.rev(n) for n in common]
1034 common = [self.rev(n) for n in common]
1042 heads = [self.rev(n) for n in heads]
1035 heads = [self.rev(n) for n in heads]
1043
1036
1044 # we want the ancestors, but inclusive
1037 # we want the ancestors, but inclusive
1045 class lazyset(object):
1038 class lazyset(object):
1046 def __init__(self, lazyvalues):
1039 def __init__(self, lazyvalues):
1047 self.addedvalues = set()
1040 self.addedvalues = set()
1048 self.lazyvalues = lazyvalues
1041 self.lazyvalues = lazyvalues
1049
1042
1050 def __contains__(self, value):
1043 def __contains__(self, value):
1051 return value in self.addedvalues or value in self.lazyvalues
1044 return value in self.addedvalues or value in self.lazyvalues
1052
1045
1053 def __iter__(self):
1046 def __iter__(self):
1054 added = self.addedvalues
1047 added = self.addedvalues
1055 for r in added:
1048 for r in added:
1056 yield r
1049 yield r
1057 for r in self.lazyvalues:
1050 for r in self.lazyvalues:
1058 if not r in added:
1051 if not r in added:
1059 yield r
1052 yield r
1060
1053
1061 def add(self, value):
1054 def add(self, value):
1062 self.addedvalues.add(value)
1055 self.addedvalues.add(value)
1063
1056
1064 def update(self, values):
1057 def update(self, values):
1065 self.addedvalues.update(values)
1058 self.addedvalues.update(values)
1066
1059
1067 has = lazyset(self.ancestors(common))
1060 has = lazyset(self.ancestors(common))
1068 has.add(nullrev)
1061 has.add(nullrev)
1069 has.update(common)
1062 has.update(common)
1070
1063
1071 # take all ancestors from heads that aren't in has
1064 # take all ancestors from heads that aren't in has
1072 missing = set()
1065 missing = set()
1073 visit = collections.deque(r for r in heads if r not in has)
1066 visit = collections.deque(r for r in heads if r not in has)
1074 while visit:
1067 while visit:
1075 r = visit.popleft()
1068 r = visit.popleft()
1076 if r in missing:
1069 if r in missing:
1077 continue
1070 continue
1078 else:
1071 else:
1079 missing.add(r)
1072 missing.add(r)
1080 for p in self.parentrevs(r):
1073 for p in self.parentrevs(r):
1081 if p not in has:
1074 if p not in has:
1082 visit.append(p)
1075 visit.append(p)
1083 missing = list(missing)
1076 missing = list(missing)
1084 missing.sort()
1077 missing.sort()
1085 return has, [self.node(miss) for miss in missing]
1078 return has, [self.node(miss) for miss in missing]
1086
1079
1087 def incrementalmissingrevs(self, common=None):
1080 def incrementalmissingrevs(self, common=None):
1088 """Return an object that can be used to incrementally compute the
1081 """Return an object that can be used to incrementally compute the
1089 revision numbers of the ancestors of arbitrary sets that are not
1082 revision numbers of the ancestors of arbitrary sets that are not
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 object.
1084 object.
1092
1085
1093 'common' is a list of revision numbers. If common is not supplied, uses
1086 'common' is a list of revision numbers. If common is not supplied, uses
1094 nullrev.
1087 nullrev.
1095 """
1088 """
1096 if common is None:
1089 if common is None:
1097 common = [nullrev]
1090 common = [nullrev]
1098
1091
1099 if rustancestor is not None:
1092 if rustancestor is not None:
1100 return rustancestor.MissingAncestors(self.index, common)
1093 return rustancestor.MissingAncestors(self.index, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1094 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102
1095
1103 def findmissingrevs(self, common=None, heads=None):
1096 def findmissingrevs(self, common=None, heads=None):
1104 """Return the revision numbers of the ancestors of heads that
1097 """Return the revision numbers of the ancestors of heads that
1105 are not ancestors of common.
1098 are not ancestors of common.
1106
1099
1107 More specifically, return a list of revision numbers corresponding to
1100 More specifically, return a list of revision numbers corresponding to
1108 nodes N such that every N satisfies the following constraints:
1101 nodes N such that every N satisfies the following constraints:
1109
1102
1110 1. N is an ancestor of some node in 'heads'
1103 1. N is an ancestor of some node in 'heads'
1111 2. N is not an ancestor of any node in 'common'
1104 2. N is not an ancestor of any node in 'common'
1112
1105
1113 The list is sorted by revision number, meaning it is
1106 The list is sorted by revision number, meaning it is
1114 topologically sorted.
1107 topologically sorted.
1115
1108
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1109 'heads' and 'common' are both lists of revision numbers. If heads is
1117 not supplied, uses all of the revlog's heads. If common is not
1110 not supplied, uses all of the revlog's heads. If common is not
1118 supplied, uses nullid."""
1111 supplied, uses nullid."""
1119 if common is None:
1112 if common is None:
1120 common = [nullrev]
1113 common = [nullrev]
1121 if heads is None:
1114 if heads is None:
1122 heads = self.headrevs()
1115 heads = self.headrevs()
1123
1116
1124 inc = self.incrementalmissingrevs(common=common)
1117 inc = self.incrementalmissingrevs(common=common)
1125 return inc.missingancestors(heads)
1118 return inc.missingancestors(heads)
1126
1119
1127 def findmissing(self, common=None, heads=None):
1120 def findmissing(self, common=None, heads=None):
1128 """Return the ancestors of heads that are not ancestors of common.
1121 """Return the ancestors of heads that are not ancestors of common.
1129
1122
1130 More specifically, return a list of nodes N such that every N
1123 More specifically, return a list of nodes N such that every N
1131 satisfies the following constraints:
1124 satisfies the following constraints:
1132
1125
1133 1. N is an ancestor of some node in 'heads'
1126 1. N is an ancestor of some node in 'heads'
1134 2. N is not an ancestor of any node in 'common'
1127 2. N is not an ancestor of any node in 'common'
1135
1128
1136 The list is sorted by revision number, meaning it is
1129 The list is sorted by revision number, meaning it is
1137 topologically sorted.
1130 topologically sorted.
1138
1131
1139 'heads' and 'common' are both lists of node IDs. If heads is
1132 'heads' and 'common' are both lists of node IDs. If heads is
1140 not supplied, uses all of the revlog's heads. If common is not
1133 not supplied, uses all of the revlog's heads. If common is not
1141 supplied, uses nullid."""
1134 supplied, uses nullid."""
1142 if common is None:
1135 if common is None:
1143 common = [nullid]
1136 common = [nullid]
1144 if heads is None:
1137 if heads is None:
1145 heads = self.heads()
1138 heads = self.heads()
1146
1139
1147 common = [self.rev(n) for n in common]
1140 common = [self.rev(n) for n in common]
1148 heads = [self.rev(n) for n in heads]
1141 heads = [self.rev(n) for n in heads]
1149
1142
1150 inc = self.incrementalmissingrevs(common=common)
1143 inc = self.incrementalmissingrevs(common=common)
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1144 return [self.node(r) for r in inc.missingancestors(heads)]
1152
1145
1153 def nodesbetween(self, roots=None, heads=None):
1146 def nodesbetween(self, roots=None, heads=None):
1154 """Return a topological path from 'roots' to 'heads'.
1147 """Return a topological path from 'roots' to 'heads'.
1155
1148
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1149 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 topologically sorted list of all nodes N that satisfy both of
1150 topologically sorted list of all nodes N that satisfy both of
1158 these constraints:
1151 these constraints:
1159
1152
1160 1. N is a descendant of some node in 'roots'
1153 1. N is a descendant of some node in 'roots'
1161 2. N is an ancestor of some node in 'heads'
1154 2. N is an ancestor of some node in 'heads'
1162
1155
1163 Every node is considered to be both a descendant and an ancestor
1156 Every node is considered to be both a descendant and an ancestor
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1157 of itself, so every reachable node in 'roots' and 'heads' will be
1165 included in 'nodes'.
1158 included in 'nodes'.
1166
1159
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1160 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1161 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1162 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170
1163
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1164 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1165 unspecified, uses nullid as the only root. If 'heads' is
1173 unspecified, uses list of all of the revlog's heads."""
1166 unspecified, uses list of all of the revlog's heads."""
1174 nonodes = ([], [], [])
1167 nonodes = ([], [], [])
1175 if roots is not None:
1168 if roots is not None:
1176 roots = list(roots)
1169 roots = list(roots)
1177 if not roots:
1170 if not roots:
1178 return nonodes
1171 return nonodes
1179 lowestrev = min([self.rev(n) for n in roots])
1172 lowestrev = min([self.rev(n) for n in roots])
1180 else:
1173 else:
1181 roots = [nullid] # Everybody's a descendant of nullid
1174 roots = [nullid] # Everybody's a descendant of nullid
1182 lowestrev = nullrev
1175 lowestrev = nullrev
1183 if (lowestrev == nullrev) and (heads is None):
1176 if (lowestrev == nullrev) and (heads is None):
1184 # We want _all_ the nodes!
1177 # We want _all_ the nodes!
1185 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1178 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1186 if heads is None:
1179 if heads is None:
1187 # All nodes are ancestors, so the latest ancestor is the last
1180 # All nodes are ancestors, so the latest ancestor is the last
1188 # node.
1181 # node.
1189 highestrev = len(self) - 1
1182 highestrev = len(self) - 1
1190 # Set ancestors to None to signal that every node is an ancestor.
1183 # Set ancestors to None to signal that every node is an ancestor.
1191 ancestors = None
1184 ancestors = None
1192 # Set heads to an empty dictionary for later discovery of heads
1185 # Set heads to an empty dictionary for later discovery of heads
1193 heads = {}
1186 heads = {}
1194 else:
1187 else:
1195 heads = list(heads)
1188 heads = list(heads)
1196 if not heads:
1189 if not heads:
1197 return nonodes
1190 return nonodes
1198 ancestors = set()
1191 ancestors = set()
1199 # Turn heads into a dictionary so we can remove 'fake' heads.
1192 # Turn heads into a dictionary so we can remove 'fake' heads.
1200 # Also, later we will be using it to filter out the heads we can't
1193 # Also, later we will be using it to filter out the heads we can't
1201 # find from roots.
1194 # find from roots.
1202 heads = dict.fromkeys(heads, False)
1195 heads = dict.fromkeys(heads, False)
1203 # Start at the top and keep marking parents until we're done.
1196 # Start at the top and keep marking parents until we're done.
1204 nodestotag = set(heads)
1197 nodestotag = set(heads)
1205 # Remember where the top was so we can use it as a limit later.
1198 # Remember where the top was so we can use it as a limit later.
1206 highestrev = max([self.rev(n) for n in nodestotag])
1199 highestrev = max([self.rev(n) for n in nodestotag])
1207 while nodestotag:
1200 while nodestotag:
1208 # grab a node to tag
1201 # grab a node to tag
1209 n = nodestotag.pop()
1202 n = nodestotag.pop()
1210 # Never tag nullid
1203 # Never tag nullid
1211 if n == nullid:
1204 if n == nullid:
1212 continue
1205 continue
1213 # A node's revision number represents its place in a
1206 # A node's revision number represents its place in a
1214 # topologically sorted list of nodes.
1207 # topologically sorted list of nodes.
1215 r = self.rev(n)
1208 r = self.rev(n)
1216 if r >= lowestrev:
1209 if r >= lowestrev:
1217 if n not in ancestors:
1210 if n not in ancestors:
1218 # If we are possibly a descendant of one of the roots
1211 # If we are possibly a descendant of one of the roots
1219 # and we haven't already been marked as an ancestor
1212 # and we haven't already been marked as an ancestor
1220 ancestors.add(n) # Mark as ancestor
1213 ancestors.add(n) # Mark as ancestor
1221 # Add non-nullid parents to list of nodes to tag.
1214 # Add non-nullid parents to list of nodes to tag.
1222 nodestotag.update(
1215 nodestotag.update(
1223 [p for p in self.parents(n) if p != nullid]
1216 [p for p in self.parents(n) if p != nullid]
1224 )
1217 )
1225 elif n in heads: # We've seen it before, is it a fake head?
1218 elif n in heads: # We've seen it before, is it a fake head?
1226 # So it is, real heads should not be the ancestors of
1219 # So it is, real heads should not be the ancestors of
1227 # any other heads.
1220 # any other heads.
1228 heads.pop(n)
1221 heads.pop(n)
1229 if not ancestors:
1222 if not ancestors:
1230 return nonodes
1223 return nonodes
1231 # Now that we have our set of ancestors, we want to remove any
1224 # Now that we have our set of ancestors, we want to remove any
1232 # roots that are not ancestors.
1225 # roots that are not ancestors.
1233
1226
1234 # If one of the roots was nullid, everything is included anyway.
1227 # If one of the roots was nullid, everything is included anyway.
1235 if lowestrev > nullrev:
1228 if lowestrev > nullrev:
1236 # But, since we weren't, let's recompute the lowest rev to not
1229 # But, since we weren't, let's recompute the lowest rev to not
1237 # include roots that aren't ancestors.
1230 # include roots that aren't ancestors.
1238
1231
1239 # Filter out roots that aren't ancestors of heads
1232 # Filter out roots that aren't ancestors of heads
1240 roots = [root for root in roots if root in ancestors]
1233 roots = [root for root in roots if root in ancestors]
1241 # Recompute the lowest revision
1234 # Recompute the lowest revision
1242 if roots:
1235 if roots:
1243 lowestrev = min([self.rev(root) for root in roots])
1236 lowestrev = min([self.rev(root) for root in roots])
1244 else:
1237 else:
1245 # No more roots? Return empty list
1238 # No more roots? Return empty list
1246 return nonodes
1239 return nonodes
1247 else:
1240 else:
1248 # We are descending from nullid, and don't need to care about
1241 # We are descending from nullid, and don't need to care about
1249 # any other roots.
1242 # any other roots.
1250 lowestrev = nullrev
1243 lowestrev = nullrev
1251 roots = [nullid]
1244 roots = [nullid]
1252 # Transform our roots list into a set.
1245 # Transform our roots list into a set.
1253 descendants = set(roots)
1246 descendants = set(roots)
1254 # Also, keep the original roots so we can filter out roots that aren't
1247 # Also, keep the original roots so we can filter out roots that aren't
1255 # 'real' roots (i.e. are descended from other roots).
1248 # 'real' roots (i.e. are descended from other roots).
1256 roots = descendants.copy()
1249 roots = descendants.copy()
1257 # Our topologically sorted list of output nodes.
1250 # Our topologically sorted list of output nodes.
1258 orderedout = []
1251 orderedout = []
1259 # Don't start at nullid since we don't want nullid in our output list,
1252 # Don't start at nullid since we don't want nullid in our output list,
1260 # and if nullid shows up in descendants, empty parents will look like
1253 # and if nullid shows up in descendants, empty parents will look like
1261 # they're descendants.
1254 # they're descendants.
1262 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1255 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1263 n = self.node(r)
1256 n = self.node(r)
1264 isdescendant = False
1257 isdescendant = False
1265 if lowestrev == nullrev: # Everybody is a descendant of nullid
1258 if lowestrev == nullrev: # Everybody is a descendant of nullid
1266 isdescendant = True
1259 isdescendant = True
1267 elif n in descendants:
1260 elif n in descendants:
1268 # n is already a descendant
1261 # n is already a descendant
1269 isdescendant = True
1262 isdescendant = True
1270 # This check only needs to be done here because all the roots
1263 # This check only needs to be done here because all the roots
1271 # will start being marked is descendants before the loop.
1264 # will start being marked is descendants before the loop.
1272 if n in roots:
1265 if n in roots:
1273 # If n was a root, check if it's a 'real' root.
1266 # If n was a root, check if it's a 'real' root.
1274 p = tuple(self.parents(n))
1267 p = tuple(self.parents(n))
1275 # If any of its parents are descendants, it's not a root.
1268 # If any of its parents are descendants, it's not a root.
1276 if (p[0] in descendants) or (p[1] in descendants):
1269 if (p[0] in descendants) or (p[1] in descendants):
1277 roots.remove(n)
1270 roots.remove(n)
1278 else:
1271 else:
1279 p = tuple(self.parents(n))
1272 p = tuple(self.parents(n))
1280 # A node is a descendant if either of its parents are
1273 # A node is a descendant if either of its parents are
1281 # descendants. (We seeded the dependents list with the roots
1274 # descendants. (We seeded the dependents list with the roots
1282 # up there, remember?)
1275 # up there, remember?)
1283 if (p[0] in descendants) or (p[1] in descendants):
1276 if (p[0] in descendants) or (p[1] in descendants):
1284 descendants.add(n)
1277 descendants.add(n)
1285 isdescendant = True
1278 isdescendant = True
1286 if isdescendant and ((ancestors is None) or (n in ancestors)):
1279 if isdescendant and ((ancestors is None) or (n in ancestors)):
1287 # Only include nodes that are both descendants and ancestors.
1280 # Only include nodes that are both descendants and ancestors.
1288 orderedout.append(n)
1281 orderedout.append(n)
1289 if (ancestors is not None) and (n in heads):
1282 if (ancestors is not None) and (n in heads):
1290 # We're trying to figure out which heads are reachable
1283 # We're trying to figure out which heads are reachable
1291 # from roots.
1284 # from roots.
1292 # Mark this head as having been reached
1285 # Mark this head as having been reached
1293 heads[n] = True
1286 heads[n] = True
1294 elif ancestors is None:
1287 elif ancestors is None:
1295 # Otherwise, we're trying to discover the heads.
1288 # Otherwise, we're trying to discover the heads.
1296 # Assume this is a head because if it isn't, the next step
1289 # Assume this is a head because if it isn't, the next step
1297 # will eventually remove it.
1290 # will eventually remove it.
1298 heads[n] = True
1291 heads[n] = True
1299 # But, obviously its parents aren't.
1292 # But, obviously its parents aren't.
1300 for p in self.parents(n):
1293 for p in self.parents(n):
1301 heads.pop(p, None)
1294 heads.pop(p, None)
1302 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1295 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1303 roots = list(roots)
1296 roots = list(roots)
1304 assert orderedout
1297 assert orderedout
1305 assert roots
1298 assert roots
1306 assert heads
1299 assert heads
1307 return (orderedout, roots, heads)
1300 return (orderedout, roots, heads)
1308
1301
1309 def headrevs(self, revs=None):
1302 def headrevs(self, revs=None):
1310 if revs is None:
1303 if revs is None:
1311 try:
1304 try:
1312 return self.index.headrevs()
1305 return self.index.headrevs()
1313 except AttributeError:
1306 except AttributeError:
1314 return self._headrevs()
1307 return self._headrevs()
1315 if rustdagop is not None:
1308 if rustdagop is not None:
1316 return rustdagop.headrevs(self.index, revs)
1309 return rustdagop.headrevs(self.index, revs)
1317 return dagop.headrevs(revs, self._uncheckedparentrevs)
1310 return dagop.headrevs(revs, self._uncheckedparentrevs)
1318
1311
1319 def computephases(self, roots):
1312 def computephases(self, roots):
1320 return self.index.computephasesmapsets(roots)
1313 return self.index.computephasesmapsets(roots)
1321
1314
1322 def _headrevs(self):
1315 def _headrevs(self):
1323 count = len(self)
1316 count = len(self)
1324 if not count:
1317 if not count:
1325 return [nullrev]
1318 return [nullrev]
1326 # we won't iter over filtered rev so nobody is a head at start
1319 # we won't iter over filtered rev so nobody is a head at start
1327 ishead = [0] * (count + 1)
1320 ishead = [0] * (count + 1)
1328 index = self.index
1321 index = self.index
1329 for r in self:
1322 for r in self:
1330 ishead[r] = 1 # I may be an head
1323 ishead[r] = 1 # I may be an head
1331 e = index[r]
1324 e = index[r]
1332 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1325 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1333 return [r for r, val in enumerate(ishead) if val]
1326 return [r for r, val in enumerate(ishead) if val]
1334
1327
1335 def heads(self, start=None, stop=None):
1328 def heads(self, start=None, stop=None):
1336 """return the list of all nodes that have no children
1329 """return the list of all nodes that have no children
1337
1330
1338 if start is specified, only heads that are descendants of
1331 if start is specified, only heads that are descendants of
1339 start will be returned
1332 start will be returned
1340 if stop is specified, it will consider all the revs from stop
1333 if stop is specified, it will consider all the revs from stop
1341 as if they had no children
1334 as if they had no children
1342 """
1335 """
1343 if start is None and stop is None:
1336 if start is None and stop is None:
1344 if not len(self):
1337 if not len(self):
1345 return [nullid]
1338 return [nullid]
1346 return [self.node(r) for r in self.headrevs()]
1339 return [self.node(r) for r in self.headrevs()]
1347
1340
1348 if start is None:
1341 if start is None:
1349 start = nullrev
1342 start = nullrev
1350 else:
1343 else:
1351 start = self.rev(start)
1344 start = self.rev(start)
1352
1345
1353 stoprevs = {self.rev(n) for n in stop or []}
1346 stoprevs = {self.rev(n) for n in stop or []}
1354
1347
1355 revs = dagop.headrevssubset(
1348 revs = dagop.headrevssubset(
1356 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1349 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1357 )
1350 )
1358
1351
1359 return [self.node(rev) for rev in revs]
1352 return [self.node(rev) for rev in revs]
1360
1353
1361 def children(self, node):
1354 def children(self, node):
1362 """find the children of a given node"""
1355 """find the children of a given node"""
1363 c = []
1356 c = []
1364 p = self.rev(node)
1357 p = self.rev(node)
1365 for r in self.revs(start=p + 1):
1358 for r in self.revs(start=p + 1):
1366 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1367 if prevs:
1360 if prevs:
1368 for pr in prevs:
1361 for pr in prevs:
1369 if pr == p:
1362 if pr == p:
1370 c.append(self.node(r))
1363 c.append(self.node(r))
1371 elif p == nullrev:
1364 elif p == nullrev:
1372 c.append(self.node(r))
1365 c.append(self.node(r))
1373 return c
1366 return c
1374
1367
1375 def commonancestorsheads(self, a, b):
1368 def commonancestorsheads(self, a, b):
1376 """calculate all the heads of the common ancestors of nodes a and b"""
1369 """calculate all the heads of the common ancestors of nodes a and b"""
1377 a, b = self.rev(a), self.rev(b)
1370 a, b = self.rev(a), self.rev(b)
1378 ancs = self._commonancestorsheads(a, b)
1371 ancs = self._commonancestorsheads(a, b)
1379 return pycompat.maplist(self.node, ancs)
1372 return pycompat.maplist(self.node, ancs)
1380
1373
1381 def _commonancestorsheads(self, *revs):
1374 def _commonancestorsheads(self, *revs):
1382 """calculate all the heads of the common ancestors of revs"""
1375 """calculate all the heads of the common ancestors of revs"""
1383 try:
1376 try:
1384 ancs = self.index.commonancestorsheads(*revs)
1377 ancs = self.index.commonancestorsheads(*revs)
1385 except (AttributeError, OverflowError): # C implementation failed
1378 except (AttributeError, OverflowError): # C implementation failed
1386 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1379 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1387 return ancs
1380 return ancs
1388
1381
1389 def isancestor(self, a, b):
1382 def isancestor(self, a, b):
1390 """return True if node a is an ancestor of node b
1383 """return True if node a is an ancestor of node b
1391
1384
1392 A revision is considered an ancestor of itself."""
1385 A revision is considered an ancestor of itself."""
1393 a, b = self.rev(a), self.rev(b)
1386 a, b = self.rev(a), self.rev(b)
1394 return self.isancestorrev(a, b)
1387 return self.isancestorrev(a, b)
1395
1388
1396 def isancestorrev(self, a, b):
1389 def isancestorrev(self, a, b):
1397 """return True if revision a is an ancestor of revision b
1390 """return True if revision a is an ancestor of revision b
1398
1391
1399 A revision is considered an ancestor of itself.
1392 A revision is considered an ancestor of itself.
1400
1393
1401 The implementation of this is trivial but the use of
1394 The implementation of this is trivial but the use of
1402 reachableroots is not."""
1395 reachableroots is not."""
1403 if a == nullrev:
1396 if a == nullrev:
1404 return True
1397 return True
1405 elif a == b:
1398 elif a == b:
1406 return True
1399 return True
1407 elif a > b:
1400 elif a > b:
1408 return False
1401 return False
1409 return bool(self.reachableroots(a, [b], [a], includepath=False))
1402 return bool(self.reachableroots(a, [b], [a], includepath=False))
1410
1403
1411 def reachableroots(self, minroot, heads, roots, includepath=False):
1404 def reachableroots(self, minroot, heads, roots, includepath=False):
1412 """return (heads(::(<roots> and <roots>::<heads>)))
1405 """return (heads(::(<roots> and <roots>::<heads>)))
1413
1406
1414 If includepath is True, return (<roots>::<heads>)."""
1407 If includepath is True, return (<roots>::<heads>)."""
1415 try:
1408 try:
1416 return self.index.reachableroots2(
1409 return self.index.reachableroots2(
1417 minroot, heads, roots, includepath
1410 minroot, heads, roots, includepath
1418 )
1411 )
1419 except AttributeError:
1412 except AttributeError:
1420 return dagop._reachablerootspure(
1413 return dagop._reachablerootspure(
1421 self.parentrevs, minroot, roots, heads, includepath
1414 self.parentrevs, minroot, roots, heads, includepath
1422 )
1415 )
1423
1416
1424 def ancestor(self, a, b):
1417 def ancestor(self, a, b):
1425 """calculate the "best" common ancestor of nodes a and b"""
1418 """calculate the "best" common ancestor of nodes a and b"""
1426
1419
1427 a, b = self.rev(a), self.rev(b)
1420 a, b = self.rev(a), self.rev(b)
1428 try:
1421 try:
1429 ancs = self.index.ancestors(a, b)
1422 ancs = self.index.ancestors(a, b)
1430 except (AttributeError, OverflowError):
1423 except (AttributeError, OverflowError):
1431 ancs = ancestor.ancestors(self.parentrevs, a, b)
1424 ancs = ancestor.ancestors(self.parentrevs, a, b)
1432 if ancs:
1425 if ancs:
1433 # choose a consistent winner when there's a tie
1426 # choose a consistent winner when there's a tie
1434 return min(map(self.node, ancs))
1427 return min(map(self.node, ancs))
1435 return nullid
1428 return nullid
1436
1429
1437 def _match(self, id):
1430 def _match(self, id):
1438 if isinstance(id, int):
1431 if isinstance(id, int):
1439 # rev
1432 # rev
1440 return self.node(id)
1433 return self.node(id)
1441 if len(id) == 20:
1434 if len(id) == 20:
1442 # possibly a binary node
1435 # possibly a binary node
1443 # odds of a binary node being all hex in ASCII are 1 in 10**25
1436 # odds of a binary node being all hex in ASCII are 1 in 10**25
1444 try:
1437 try:
1445 node = id
1438 node = id
1446 self.rev(node) # quick search the index
1439 self.rev(node) # quick search the index
1447 return node
1440 return node
1448 except error.LookupError:
1441 except error.LookupError:
1449 pass # may be partial hex id
1442 pass # may be partial hex id
1450 try:
1443 try:
1451 # str(rev)
1444 # str(rev)
1452 rev = int(id)
1445 rev = int(id)
1453 if b"%d" % rev != id:
1446 if b"%d" % rev != id:
1454 raise ValueError
1447 raise ValueError
1455 if rev < 0:
1448 if rev < 0:
1456 rev = len(self) + rev
1449 rev = len(self) + rev
1457 if rev < 0 or rev >= len(self):
1450 if rev < 0 or rev >= len(self):
1458 raise ValueError
1451 raise ValueError
1459 return self.node(rev)
1452 return self.node(rev)
1460 except (ValueError, OverflowError):
1453 except (ValueError, OverflowError):
1461 pass
1454 pass
1462 if len(id) == 40:
1455 if len(id) == 40:
1463 try:
1456 try:
1464 # a full hex nodeid?
1457 # a full hex nodeid?
1465 node = bin(id)
1458 node = bin(id)
1466 self.rev(node)
1459 self.rev(node)
1467 return node
1460 return node
1468 except (TypeError, error.LookupError):
1461 except (TypeError, error.LookupError):
1469 pass
1462 pass
1470
1463
1471 def _partialmatch(self, id):
1464 def _partialmatch(self, id):
1472 # we don't care wdirfilenodeids as they should be always full hash
1465 # we don't care wdirfilenodeids as they should be always full hash
1473 maybewdir = wdirhex.startswith(id)
1466 maybewdir = wdirhex.startswith(id)
1474 try:
1467 try:
1475 partial = self.index.partialmatch(id)
1468 partial = self.index.partialmatch(id)
1476 if partial and self.hasnode(partial):
1469 if partial and self.hasnode(partial):
1477 if maybewdir:
1470 if maybewdir:
1478 # single 'ff...' match in radix tree, ambiguous with wdir
1471 # single 'ff...' match in radix tree, ambiguous with wdir
1479 raise error.RevlogError
1472 raise error.RevlogError
1480 return partial
1473 return partial
1481 if maybewdir:
1474 if maybewdir:
1482 # no 'ff...' match in radix tree, wdir identified
1475 # no 'ff...' match in radix tree, wdir identified
1483 raise error.WdirUnsupported
1476 raise error.WdirUnsupported
1484 return None
1477 return None
1485 except error.RevlogError:
1478 except error.RevlogError:
1486 # parsers.c radix tree lookup gave multiple matches
1479 # parsers.c radix tree lookup gave multiple matches
1487 # fast path: for unfiltered changelog, radix tree is accurate
1480 # fast path: for unfiltered changelog, radix tree is accurate
1488 if not getattr(self, 'filteredrevs', None):
1481 if not getattr(self, 'filteredrevs', None):
1489 raise error.AmbiguousPrefixLookupError(
1482 raise error.AmbiguousPrefixLookupError(
1490 id, self.indexfile, _(b'ambiguous identifier')
1483 id, self.indexfile, _(b'ambiguous identifier')
1491 )
1484 )
1492 # fall through to slow path that filters hidden revisions
1485 # fall through to slow path that filters hidden revisions
1493 except (AttributeError, ValueError):
1486 except (AttributeError, ValueError):
1494 # we are pure python, or key was too short to search radix tree
1487 # we are pure python, or key was too short to search radix tree
1495 pass
1488 pass
1496
1489
1497 if id in self._pcache:
1490 if id in self._pcache:
1498 return self._pcache[id]
1491 return self._pcache[id]
1499
1492
1500 if len(id) <= 40:
1493 if len(id) <= 40:
1501 try:
1494 try:
1502 # hex(node)[:...]
1495 # hex(node)[:...]
1503 l = len(id) // 2 # grab an even number of digits
1496 l = len(id) // 2 # grab an even number of digits
1504 prefix = bin(id[: l * 2])
1497 prefix = bin(id[: l * 2])
1505 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1498 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1506 nl = [
1499 nl = [
1507 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1500 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1508 ]
1501 ]
1509 if nullhex.startswith(id):
1502 if nullhex.startswith(id):
1510 nl.append(nullid)
1503 nl.append(nullid)
1511 if len(nl) > 0:
1504 if len(nl) > 0:
1512 if len(nl) == 1 and not maybewdir:
1505 if len(nl) == 1 and not maybewdir:
1513 self._pcache[id] = nl[0]
1506 self._pcache[id] = nl[0]
1514 return nl[0]
1507 return nl[0]
1515 raise error.AmbiguousPrefixLookupError(
1508 raise error.AmbiguousPrefixLookupError(
1516 id, self.indexfile, _(b'ambiguous identifier')
1509 id, self.indexfile, _(b'ambiguous identifier')
1517 )
1510 )
1518 if maybewdir:
1511 if maybewdir:
1519 raise error.WdirUnsupported
1512 raise error.WdirUnsupported
1520 return None
1513 return None
1521 except TypeError:
1514 except TypeError:
1522 pass
1515 pass
1523
1516
1524 def lookup(self, id):
1517 def lookup(self, id):
1525 """locate a node based on:
1518 """locate a node based on:
1526 - revision number or str(revision number)
1519 - revision number or str(revision number)
1527 - nodeid or subset of hex nodeid
1520 - nodeid or subset of hex nodeid
1528 """
1521 """
1529 n = self._match(id)
1522 n = self._match(id)
1530 if n is not None:
1523 if n is not None:
1531 return n
1524 return n
1532 n = self._partialmatch(id)
1525 n = self._partialmatch(id)
1533 if n:
1526 if n:
1534 return n
1527 return n
1535
1528
1536 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1529 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1537
1530
1538 def shortest(self, node, minlength=1):
1531 def shortest(self, node, minlength=1):
1539 """Find the shortest unambiguous prefix that matches node."""
1532 """Find the shortest unambiguous prefix that matches node."""
1540
1533
1541 def isvalid(prefix):
1534 def isvalid(prefix):
1542 try:
1535 try:
1543 matchednode = self._partialmatch(prefix)
1536 matchednode = self._partialmatch(prefix)
1544 except error.AmbiguousPrefixLookupError:
1537 except error.AmbiguousPrefixLookupError:
1545 return False
1538 return False
1546 except error.WdirUnsupported:
1539 except error.WdirUnsupported:
1547 # single 'ff...' match
1540 # single 'ff...' match
1548 return True
1541 return True
1549 if matchednode is None:
1542 if matchednode is None:
1550 raise error.LookupError(node, self.indexfile, _(b'no node'))
1543 raise error.LookupError(node, self.indexfile, _(b'no node'))
1551 return True
1544 return True
1552
1545
1553 def maybewdir(prefix):
1546 def maybewdir(prefix):
1554 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1547 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1555
1548
1556 hexnode = hex(node)
1549 hexnode = hex(node)
1557
1550
1558 def disambiguate(hexnode, minlength):
1551 def disambiguate(hexnode, minlength):
1559 """Disambiguate against wdirid."""
1552 """Disambiguate against wdirid."""
1560 for length in range(minlength, len(hexnode) + 1):
1553 for length in range(minlength, len(hexnode) + 1):
1561 prefix = hexnode[:length]
1554 prefix = hexnode[:length]
1562 if not maybewdir(prefix):
1555 if not maybewdir(prefix):
1563 return prefix
1556 return prefix
1564
1557
1565 if not getattr(self, 'filteredrevs', None):
1558 if not getattr(self, 'filteredrevs', None):
1566 try:
1559 try:
1567 length = max(self.index.shortest(node), minlength)
1560 length = max(self.index.shortest(node), minlength)
1568 return disambiguate(hexnode, length)
1561 return disambiguate(hexnode, length)
1569 except error.RevlogError:
1562 except error.RevlogError:
1570 if node != wdirid:
1563 if node != wdirid:
1571 raise error.LookupError(node, self.indexfile, _(b'no node'))
1564 raise error.LookupError(node, self.indexfile, _(b'no node'))
1572 except AttributeError:
1565 except AttributeError:
1573 # Fall through to pure code
1566 # Fall through to pure code
1574 pass
1567 pass
1575
1568
1576 if node == wdirid:
1569 if node == wdirid:
1577 for length in range(minlength, len(hexnode) + 1):
1570 for length in range(minlength, len(hexnode) + 1):
1578 prefix = hexnode[:length]
1571 prefix = hexnode[:length]
1579 if isvalid(prefix):
1572 if isvalid(prefix):
1580 return prefix
1573 return prefix
1581
1574
1582 for length in range(minlength, len(hexnode) + 1):
1575 for length in range(minlength, len(hexnode) + 1):
1583 prefix = hexnode[:length]
1576 prefix = hexnode[:length]
1584 if isvalid(prefix):
1577 if isvalid(prefix):
1585 return disambiguate(hexnode, length)
1578 return disambiguate(hexnode, length)
1586
1579
1587 def cmp(self, node, text):
1580 def cmp(self, node, text):
1588 """compare text with a given file revision
1581 """compare text with a given file revision
1589
1582
1590 returns True if text is different than what is stored.
1583 returns True if text is different than what is stored.
1591 """
1584 """
1592 p1, p2 = self.parents(node)
1585 p1, p2 = self.parents(node)
1593 return storageutil.hashrevisionsha1(text, p1, p2) != node
1586 return storageutil.hashrevisionsha1(text, p1, p2) != node
1594
1587
1595 def _cachesegment(self, offset, data):
1588 def _cachesegment(self, offset, data):
1596 """Add a segment to the revlog cache.
1589 """Add a segment to the revlog cache.
1597
1590
1598 Accepts an absolute offset and the data that is at that location.
1591 Accepts an absolute offset and the data that is at that location.
1599 """
1592 """
1600 o, d = self._chunkcache
1593 o, d = self._chunkcache
1601 # try to add to existing cache
1594 # try to add to existing cache
1602 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1595 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1603 self._chunkcache = o, d + data
1596 self._chunkcache = o, d + data
1604 else:
1597 else:
1605 self._chunkcache = offset, data
1598 self._chunkcache = offset, data
1606
1599
1607 def _readsegment(self, offset, length, df=None):
1600 def _readsegment(self, offset, length, df=None):
1608 """Load a segment of raw data from the revlog.
1601 """Load a segment of raw data from the revlog.
1609
1602
1610 Accepts an absolute offset, length to read, and an optional existing
1603 Accepts an absolute offset, length to read, and an optional existing
1611 file handle to read from.
1604 file handle to read from.
1612
1605
1613 If an existing file handle is passed, it will be seeked and the
1606 If an existing file handle is passed, it will be seeked and the
1614 original seek position will NOT be restored.
1607 original seek position will NOT be restored.
1615
1608
1616 Returns a str or buffer of raw byte data.
1609 Returns a str or buffer of raw byte data.
1617
1610
1618 Raises if the requested number of bytes could not be read.
1611 Raises if the requested number of bytes could not be read.
1619 """
1612 """
1620 # Cache data both forward and backward around the requested
1613 # Cache data both forward and backward around the requested
1621 # data, in a fixed size window. This helps speed up operations
1614 # data, in a fixed size window. This helps speed up operations
1622 # involving reading the revlog backwards.
1615 # involving reading the revlog backwards.
1623 cachesize = self._chunkcachesize
1616 cachesize = self._chunkcachesize
1624 realoffset = offset & ~(cachesize - 1)
1617 realoffset = offset & ~(cachesize - 1)
1625 reallength = (
1618 reallength = (
1626 (offset + length + cachesize) & ~(cachesize - 1)
1619 (offset + length + cachesize) & ~(cachesize - 1)
1627 ) - realoffset
1620 ) - realoffset
1628 with self._datareadfp(df) as df:
1621 with self._datareadfp(df) as df:
1629 df.seek(realoffset)
1622 df.seek(realoffset)
1630 d = df.read(reallength)
1623 d = df.read(reallength)
1631
1624
1632 self._cachesegment(realoffset, d)
1625 self._cachesegment(realoffset, d)
1633 if offset != realoffset or reallength != length:
1626 if offset != realoffset or reallength != length:
1634 startoffset = offset - realoffset
1627 startoffset = offset - realoffset
1635 if len(d) - startoffset < length:
1628 if len(d) - startoffset < length:
1636 raise error.RevlogError(
1629 raise error.RevlogError(
1637 _(
1630 _(
1638 b'partial read of revlog %s; expected %d bytes from '
1631 b'partial read of revlog %s; expected %d bytes from '
1639 b'offset %d, got %d'
1632 b'offset %d, got %d'
1640 )
1633 )
1641 % (
1634 % (
1642 self.indexfile if self._inline else self.datafile,
1635 self.indexfile if self._inline else self.datafile,
1643 length,
1636 length,
1644 realoffset,
1637 realoffset,
1645 len(d) - startoffset,
1638 len(d) - startoffset,
1646 )
1639 )
1647 )
1640 )
1648
1641
1649 return util.buffer(d, startoffset, length)
1642 return util.buffer(d, startoffset, length)
1650
1643
1651 if len(d) < length:
1644 if len(d) < length:
1652 raise error.RevlogError(
1645 raise error.RevlogError(
1653 _(
1646 _(
1654 b'partial read of revlog %s; expected %d bytes from offset '
1647 b'partial read of revlog %s; expected %d bytes from offset '
1655 b'%d, got %d'
1648 b'%d, got %d'
1656 )
1649 )
1657 % (
1650 % (
1658 self.indexfile if self._inline else self.datafile,
1651 self.indexfile if self._inline else self.datafile,
1659 length,
1652 length,
1660 offset,
1653 offset,
1661 len(d),
1654 len(d),
1662 )
1655 )
1663 )
1656 )
1664
1657
1665 return d
1658 return d
1666
1659
1667 def _getsegment(self, offset, length, df=None):
1660 def _getsegment(self, offset, length, df=None):
1668 """Obtain a segment of raw data from the revlog.
1661 """Obtain a segment of raw data from the revlog.
1669
1662
1670 Accepts an absolute offset, length of bytes to obtain, and an
1663 Accepts an absolute offset, length of bytes to obtain, and an
1671 optional file handle to the already-opened revlog. If the file
1664 optional file handle to the already-opened revlog. If the file
1672 handle is used, it's original seek position will not be preserved.
1665 handle is used, it's original seek position will not be preserved.
1673
1666
1674 Requests for data may be returned from a cache.
1667 Requests for data may be returned from a cache.
1675
1668
1676 Returns a str or a buffer instance of raw byte data.
1669 Returns a str or a buffer instance of raw byte data.
1677 """
1670 """
1678 o, d = self._chunkcache
1671 o, d = self._chunkcache
1679 l = len(d)
1672 l = len(d)
1680
1673
1681 # is it in the cache?
1674 # is it in the cache?
1682 cachestart = offset - o
1675 cachestart = offset - o
1683 cacheend = cachestart + length
1676 cacheend = cachestart + length
1684 if cachestart >= 0 and cacheend <= l:
1677 if cachestart >= 0 and cacheend <= l:
1685 if cachestart == 0 and cacheend == l:
1678 if cachestart == 0 and cacheend == l:
1686 return d # avoid a copy
1679 return d # avoid a copy
1687 return util.buffer(d, cachestart, cacheend - cachestart)
1680 return util.buffer(d, cachestart, cacheend - cachestart)
1688
1681
1689 return self._readsegment(offset, length, df=df)
1682 return self._readsegment(offset, length, df=df)
1690
1683
1691 def _getsegmentforrevs(self, startrev, endrev, df=None):
1684 def _getsegmentforrevs(self, startrev, endrev, df=None):
1692 """Obtain a segment of raw data corresponding to a range of revisions.
1685 """Obtain a segment of raw data corresponding to a range of revisions.
1693
1686
1694 Accepts the start and end revisions and an optional already-open
1687 Accepts the start and end revisions and an optional already-open
1695 file handle to be used for reading. If the file handle is read, its
1688 file handle to be used for reading. If the file handle is read, its
1696 seek position will not be preserved.
1689 seek position will not be preserved.
1697
1690
1698 Requests for data may be satisfied by a cache.
1691 Requests for data may be satisfied by a cache.
1699
1692
1700 Returns a 2-tuple of (offset, data) for the requested range of
1693 Returns a 2-tuple of (offset, data) for the requested range of
1701 revisions. Offset is the integer offset from the beginning of the
1694 revisions. Offset is the integer offset from the beginning of the
1702 revlog and data is a str or buffer of the raw byte data.
1695 revlog and data is a str or buffer of the raw byte data.
1703
1696
1704 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1697 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1705 to determine where each revision's data begins and ends.
1698 to determine where each revision's data begins and ends.
1706 """
1699 """
1707 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1700 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1708 # (functions are expensive).
1701 # (functions are expensive).
1709 index = self.index
1702 index = self.index
1710 istart = index[startrev]
1703 istart = index[startrev]
1711 start = int(istart[0] >> 16)
1704 start = int(istart[0] >> 16)
1712 if startrev == endrev:
1705 if startrev == endrev:
1713 end = start + istart[1]
1706 end = start + istart[1]
1714 else:
1707 else:
1715 iend = index[endrev]
1708 iend = index[endrev]
1716 end = int(iend[0] >> 16) + iend[1]
1709 end = int(iend[0] >> 16) + iend[1]
1717
1710
1718 if self._inline:
1711 if self._inline:
1719 start += (startrev + 1) * self._io.size
1712 start += (startrev + 1) * self.index.entry_size
1720 end += (endrev + 1) * self._io.size
1713 end += (endrev + 1) * self.index.entry_size
1721 length = end - start
1714 length = end - start
1722
1715
1723 return start, self._getsegment(start, length, df=df)
1716 return start, self._getsegment(start, length, df=df)
1724
1717
1725 def _chunk(self, rev, df=None):
1718 def _chunk(self, rev, df=None):
1726 """Obtain a single decompressed chunk for a revision.
1719 """Obtain a single decompressed chunk for a revision.
1727
1720
1728 Accepts an integer revision and an optional already-open file handle
1721 Accepts an integer revision and an optional already-open file handle
1729 to be used for reading. If used, the seek position of the file will not
1722 to be used for reading. If used, the seek position of the file will not
1730 be preserved.
1723 be preserved.
1731
1724
1732 Returns a str holding uncompressed data for the requested revision.
1725 Returns a str holding uncompressed data for the requested revision.
1733 """
1726 """
1734 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1727 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1735
1728
1736 def _chunks(self, revs, df=None, targetsize=None):
1729 def _chunks(self, revs, df=None, targetsize=None):
1737 """Obtain decompressed chunks for the specified revisions.
1730 """Obtain decompressed chunks for the specified revisions.
1738
1731
1739 Accepts an iterable of numeric revisions that are assumed to be in
1732 Accepts an iterable of numeric revisions that are assumed to be in
1740 ascending order. Also accepts an optional already-open file handle
1733 ascending order. Also accepts an optional already-open file handle
1741 to be used for reading. If used, the seek position of the file will
1734 to be used for reading. If used, the seek position of the file will
1742 not be preserved.
1735 not be preserved.
1743
1736
1744 This function is similar to calling ``self._chunk()`` multiple times,
1737 This function is similar to calling ``self._chunk()`` multiple times,
1745 but is faster.
1738 but is faster.
1746
1739
1747 Returns a list with decompressed data for each requested revision.
1740 Returns a list with decompressed data for each requested revision.
1748 """
1741 """
1749 if not revs:
1742 if not revs:
1750 return []
1743 return []
1751 start = self.start
1744 start = self.start
1752 length = self.length
1745 length = self.length
1753 inline = self._inline
1746 inline = self._inline
1754 iosize = self._io.size
1747 iosize = self.index.entry_size
1755 buffer = util.buffer
1748 buffer = util.buffer
1756
1749
1757 l = []
1750 l = []
1758 ladd = l.append
1751 ladd = l.append
1759
1752
1760 if not self._withsparseread:
1753 if not self._withsparseread:
1761 slicedchunks = (revs,)
1754 slicedchunks = (revs,)
1762 else:
1755 else:
1763 slicedchunks = deltautil.slicechunk(
1756 slicedchunks = deltautil.slicechunk(
1764 self, revs, targetsize=targetsize
1757 self, revs, targetsize=targetsize
1765 )
1758 )
1766
1759
1767 for revschunk in slicedchunks:
1760 for revschunk in slicedchunks:
1768 firstrev = revschunk[0]
1761 firstrev = revschunk[0]
1769 # Skip trailing revisions with empty diff
1762 # Skip trailing revisions with empty diff
1770 for lastrev in revschunk[::-1]:
1763 for lastrev in revschunk[::-1]:
1771 if length(lastrev) != 0:
1764 if length(lastrev) != 0:
1772 break
1765 break
1773
1766
1774 try:
1767 try:
1775 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1768 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1776 except OverflowError:
1769 except OverflowError:
1777 # issue4215 - we can't cache a run of chunks greater than
1770 # issue4215 - we can't cache a run of chunks greater than
1778 # 2G on Windows
1771 # 2G on Windows
1779 return [self._chunk(rev, df=df) for rev in revschunk]
1772 return [self._chunk(rev, df=df) for rev in revschunk]
1780
1773
1781 decomp = self.decompress
1774 decomp = self.decompress
1782 for rev in revschunk:
1775 for rev in revschunk:
1783 chunkstart = start(rev)
1776 chunkstart = start(rev)
1784 if inline:
1777 if inline:
1785 chunkstart += (rev + 1) * iosize
1778 chunkstart += (rev + 1) * iosize
1786 chunklength = length(rev)
1779 chunklength = length(rev)
1787 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1780 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1788
1781
1789 return l
1782 return l
1790
1783
1791 def _chunkclear(self):
1784 def _chunkclear(self):
1792 """Clear the raw chunk cache."""
1785 """Clear the raw chunk cache."""
1793 self._chunkcache = (0, b'')
1786 self._chunkcache = (0, b'')
1794
1787
1795 def deltaparent(self, rev):
1788 def deltaparent(self, rev):
1796 """return deltaparent of the given revision"""
1789 """return deltaparent of the given revision"""
1797 base = self.index[rev][3]
1790 base = self.index[rev][3]
1798 if base == rev:
1791 if base == rev:
1799 return nullrev
1792 return nullrev
1800 elif self._generaldelta:
1793 elif self._generaldelta:
1801 return base
1794 return base
1802 else:
1795 else:
1803 return rev - 1
1796 return rev - 1
1804
1797
1805 def issnapshot(self, rev):
1798 def issnapshot(self, rev):
1806 """tells whether rev is a snapshot"""
1799 """tells whether rev is a snapshot"""
1807 if not self._sparserevlog:
1800 if not self._sparserevlog:
1808 return self.deltaparent(rev) == nullrev
1801 return self.deltaparent(rev) == nullrev
1809 elif util.safehasattr(self.index, b'issnapshot'):
1802 elif util.safehasattr(self.index, b'issnapshot'):
1810 # directly assign the method to cache the testing and access
1803 # directly assign the method to cache the testing and access
1811 self.issnapshot = self.index.issnapshot
1804 self.issnapshot = self.index.issnapshot
1812 return self.issnapshot(rev)
1805 return self.issnapshot(rev)
1813 if rev == nullrev:
1806 if rev == nullrev:
1814 return True
1807 return True
1815 entry = self.index[rev]
1808 entry = self.index[rev]
1816 base = entry[3]
1809 base = entry[3]
1817 if base == rev:
1810 if base == rev:
1818 return True
1811 return True
1819 if base == nullrev:
1812 if base == nullrev:
1820 return True
1813 return True
1821 p1 = entry[5]
1814 p1 = entry[5]
1822 p2 = entry[6]
1815 p2 = entry[6]
1823 if base == p1 or base == p2:
1816 if base == p1 or base == p2:
1824 return False
1817 return False
1825 return self.issnapshot(base)
1818 return self.issnapshot(base)
1826
1819
1827 def snapshotdepth(self, rev):
1820 def snapshotdepth(self, rev):
1828 """number of snapshot in the chain before this one"""
1821 """number of snapshot in the chain before this one"""
1829 if not self.issnapshot(rev):
1822 if not self.issnapshot(rev):
1830 raise error.ProgrammingError(b'revision %d not a snapshot')
1823 raise error.ProgrammingError(b'revision %d not a snapshot')
1831 return len(self._deltachain(rev)[0]) - 1
1824 return len(self._deltachain(rev)[0]) - 1
1832
1825
1833 def revdiff(self, rev1, rev2):
1826 def revdiff(self, rev1, rev2):
1834 """return or calculate a delta between two revisions
1827 """return or calculate a delta between two revisions
1835
1828
1836 The delta calculated is in binary form and is intended to be written to
1829 The delta calculated is in binary form and is intended to be written to
1837 revlog data directly. So this function needs raw revision data.
1830 revlog data directly. So this function needs raw revision data.
1838 """
1831 """
1839 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1832 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1840 return bytes(self._chunk(rev2))
1833 return bytes(self._chunk(rev2))
1841
1834
1842 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1835 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1843
1836
1844 def _processflags(self, text, flags, operation, raw=False):
1837 def _processflags(self, text, flags, operation, raw=False):
1845 """deprecated entry point to access flag processors"""
1838 """deprecated entry point to access flag processors"""
1846 msg = b'_processflag(...) use the specialized variant'
1839 msg = b'_processflag(...) use the specialized variant'
1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1840 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1848 if raw:
1841 if raw:
1849 return text, flagutil.processflagsraw(self, text, flags)
1842 return text, flagutil.processflagsraw(self, text, flags)
1850 elif operation == b'read':
1843 elif operation == b'read':
1851 return flagutil.processflagsread(self, text, flags)
1844 return flagutil.processflagsread(self, text, flags)
1852 else: # write operation
1845 else: # write operation
1853 return flagutil.processflagswrite(self, text, flags)
1846 return flagutil.processflagswrite(self, text, flags)
1854
1847
1855 def revision(self, nodeorrev, _df=None, raw=False):
1848 def revision(self, nodeorrev, _df=None, raw=False):
1856 """return an uncompressed revision of a given node or revision
1849 """return an uncompressed revision of a given node or revision
1857 number.
1850 number.
1858
1851
1859 _df - an existing file handle to read from. (internal-only)
1852 _df - an existing file handle to read from. (internal-only)
1860 raw - an optional argument specifying if the revision data is to be
1853 raw - an optional argument specifying if the revision data is to be
1861 treated as raw data when applying flag transforms. 'raw' should be set
1854 treated as raw data when applying flag transforms. 'raw' should be set
1862 to True when generating changegroups or in debug commands.
1855 to True when generating changegroups or in debug commands.
1863 """
1856 """
1864 if raw:
1857 if raw:
1865 msg = (
1858 msg = (
1866 b'revlog.revision(..., raw=True) is deprecated, '
1859 b'revlog.revision(..., raw=True) is deprecated, '
1867 b'use revlog.rawdata(...)'
1860 b'use revlog.rawdata(...)'
1868 )
1861 )
1869 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1862 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1870 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1863 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1871
1864
1872 def sidedata(self, nodeorrev, _df=None):
1865 def sidedata(self, nodeorrev, _df=None):
1873 """a map of extra data related to the changeset but not part of the hash
1866 """a map of extra data related to the changeset but not part of the hash
1874
1867
1875 This function currently return a dictionary. However, more advanced
1868 This function currently return a dictionary. However, more advanced
1876 mapping object will likely be used in the future for a more
1869 mapping object will likely be used in the future for a more
1877 efficient/lazy code.
1870 efficient/lazy code.
1878 """
1871 """
1879 return self._revisiondata(nodeorrev, _df)[1]
1872 return self._revisiondata(nodeorrev, _df)[1]
1880
1873
1881 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1874 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1882 # deal with <nodeorrev> argument type
1875 # deal with <nodeorrev> argument type
1883 if isinstance(nodeorrev, int):
1876 if isinstance(nodeorrev, int):
1884 rev = nodeorrev
1877 rev = nodeorrev
1885 node = self.node(rev)
1878 node = self.node(rev)
1886 else:
1879 else:
1887 node = nodeorrev
1880 node = nodeorrev
1888 rev = None
1881 rev = None
1889
1882
1890 # fast path the special `nullid` rev
1883 # fast path the special `nullid` rev
1891 if node == nullid:
1884 if node == nullid:
1892 return b"", {}
1885 return b"", {}
1893
1886
1894 # ``rawtext`` is the text as stored inside the revlog. Might be the
1887 # ``rawtext`` is the text as stored inside the revlog. Might be the
1895 # revision or might need to be processed to retrieve the revision.
1888 # revision or might need to be processed to retrieve the revision.
1896 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1889 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1897
1890
1898 if self.version & 0xFFFF == REVLOGV2:
1891 if self.version & 0xFFFF == REVLOGV2:
1899 if rev is None:
1892 if rev is None:
1900 rev = self.rev(node)
1893 rev = self.rev(node)
1901 sidedata = self._sidedata(rev)
1894 sidedata = self._sidedata(rev)
1902 else:
1895 else:
1903 sidedata = {}
1896 sidedata = {}
1904
1897
1905 if raw and validated:
1898 if raw and validated:
1906 # if we don't want to process the raw text and that raw
1899 # if we don't want to process the raw text and that raw
1907 # text is cached, we can exit early.
1900 # text is cached, we can exit early.
1908 return rawtext, sidedata
1901 return rawtext, sidedata
1909 if rev is None:
1902 if rev is None:
1910 rev = self.rev(node)
1903 rev = self.rev(node)
1911 # the revlog's flag for this revision
1904 # the revlog's flag for this revision
1912 # (usually alter its state or content)
1905 # (usually alter its state or content)
1913 flags = self.flags(rev)
1906 flags = self.flags(rev)
1914
1907
1915 if validated and flags == REVIDX_DEFAULT_FLAGS:
1908 if validated and flags == REVIDX_DEFAULT_FLAGS:
1916 # no extra flags set, no flag processor runs, text = rawtext
1909 # no extra flags set, no flag processor runs, text = rawtext
1917 return rawtext, sidedata
1910 return rawtext, sidedata
1918
1911
1919 if raw:
1912 if raw:
1920 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1913 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1921 text = rawtext
1914 text = rawtext
1922 else:
1915 else:
1923 r = flagutil.processflagsread(self, rawtext, flags)
1916 r = flagutil.processflagsread(self, rawtext, flags)
1924 text, validatehash = r
1917 text, validatehash = r
1925 if validatehash:
1918 if validatehash:
1926 self.checkhash(text, node, rev=rev)
1919 self.checkhash(text, node, rev=rev)
1927 if not validated:
1920 if not validated:
1928 self._revisioncache = (node, rev, rawtext)
1921 self._revisioncache = (node, rev, rawtext)
1929
1922
1930 return text, sidedata
1923 return text, sidedata
1931
1924
1932 def _rawtext(self, node, rev, _df=None):
1925 def _rawtext(self, node, rev, _df=None):
1933 """return the possibly unvalidated rawtext for a revision
1926 """return the possibly unvalidated rawtext for a revision
1934
1927
1935 returns (rev, rawtext, validated)
1928 returns (rev, rawtext, validated)
1936 """
1929 """
1937
1930
1938 # revision in the cache (could be useful to apply delta)
1931 # revision in the cache (could be useful to apply delta)
1939 cachedrev = None
1932 cachedrev = None
1940 # An intermediate text to apply deltas to
1933 # An intermediate text to apply deltas to
1941 basetext = None
1934 basetext = None
1942
1935
1943 # Check if we have the entry in cache
1936 # Check if we have the entry in cache
1944 # The cache entry looks like (node, rev, rawtext)
1937 # The cache entry looks like (node, rev, rawtext)
1945 if self._revisioncache:
1938 if self._revisioncache:
1946 if self._revisioncache[0] == node:
1939 if self._revisioncache[0] == node:
1947 return (rev, self._revisioncache[2], True)
1940 return (rev, self._revisioncache[2], True)
1948 cachedrev = self._revisioncache[1]
1941 cachedrev = self._revisioncache[1]
1949
1942
1950 if rev is None:
1943 if rev is None:
1951 rev = self.rev(node)
1944 rev = self.rev(node)
1952
1945
1953 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1946 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1954 if stopped:
1947 if stopped:
1955 basetext = self._revisioncache[2]
1948 basetext = self._revisioncache[2]
1956
1949
1957 # drop cache to save memory, the caller is expected to
1950 # drop cache to save memory, the caller is expected to
1958 # update self._revisioncache after validating the text
1951 # update self._revisioncache after validating the text
1959 self._revisioncache = None
1952 self._revisioncache = None
1960
1953
1961 targetsize = None
1954 targetsize = None
1962 rawsize = self.index[rev][2]
1955 rawsize = self.index[rev][2]
1963 if 0 <= rawsize:
1956 if 0 <= rawsize:
1964 targetsize = 4 * rawsize
1957 targetsize = 4 * rawsize
1965
1958
1966 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1959 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1967 if basetext is None:
1960 if basetext is None:
1968 basetext = bytes(bins[0])
1961 basetext = bytes(bins[0])
1969 bins = bins[1:]
1962 bins = bins[1:]
1970
1963
1971 rawtext = mdiff.patches(basetext, bins)
1964 rawtext = mdiff.patches(basetext, bins)
1972 del basetext # let us have a chance to free memory early
1965 del basetext # let us have a chance to free memory early
1973 return (rev, rawtext, False)
1966 return (rev, rawtext, False)
1974
1967
1975 def _sidedata(self, rev):
1968 def _sidedata(self, rev):
1976 """Return the sidedata for a given revision number."""
1969 """Return the sidedata for a given revision number."""
1977 index_entry = self.index[rev]
1970 index_entry = self.index[rev]
1978 sidedata_offset = index_entry[8]
1971 sidedata_offset = index_entry[8]
1979 sidedata_size = index_entry[9]
1972 sidedata_size = index_entry[9]
1980
1973
1981 if self._inline:
1974 if self._inline:
1982 sidedata_offset += self._io.size * (1 + rev)
1975 sidedata_offset += self.index.entry_size * (1 + rev)
1983 if sidedata_size == 0:
1976 if sidedata_size == 0:
1984 return {}
1977 return {}
1985
1978
1986 segment = self._getsegment(sidedata_offset, sidedata_size)
1979 segment = self._getsegment(sidedata_offset, sidedata_size)
1987 sidedata = sidedatautil.deserialize_sidedata(segment)
1980 sidedata = sidedatautil.deserialize_sidedata(segment)
1988 return sidedata
1981 return sidedata
1989
1982
1990 def rawdata(self, nodeorrev, _df=None):
1983 def rawdata(self, nodeorrev, _df=None):
1991 """return an uncompressed raw data of a given node or revision number.
1984 """return an uncompressed raw data of a given node or revision number.
1992
1985
1993 _df - an existing file handle to read from. (internal-only)
1986 _df - an existing file handle to read from. (internal-only)
1994 """
1987 """
1995 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1988 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1996
1989
1997 def hash(self, text, p1, p2):
1990 def hash(self, text, p1, p2):
1998 """Compute a node hash.
1991 """Compute a node hash.
1999
1992
2000 Available as a function so that subclasses can replace the hash
1993 Available as a function so that subclasses can replace the hash
2001 as needed.
1994 as needed.
2002 """
1995 """
2003 return storageutil.hashrevisionsha1(text, p1, p2)
1996 return storageutil.hashrevisionsha1(text, p1, p2)
2004
1997
2005 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1998 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2006 """Check node hash integrity.
1999 """Check node hash integrity.
2007
2000
2008 Available as a function so that subclasses can extend hash mismatch
2001 Available as a function so that subclasses can extend hash mismatch
2009 behaviors as needed.
2002 behaviors as needed.
2010 """
2003 """
2011 try:
2004 try:
2012 if p1 is None and p2 is None:
2005 if p1 is None and p2 is None:
2013 p1, p2 = self.parents(node)
2006 p1, p2 = self.parents(node)
2014 if node != self.hash(text, p1, p2):
2007 if node != self.hash(text, p1, p2):
2015 # Clear the revision cache on hash failure. The revision cache
2008 # Clear the revision cache on hash failure. The revision cache
2016 # only stores the raw revision and clearing the cache does have
2009 # only stores the raw revision and clearing the cache does have
2017 # the side-effect that we won't have a cache hit when the raw
2010 # the side-effect that we won't have a cache hit when the raw
2018 # revision data is accessed. But this case should be rare and
2011 # revision data is accessed. But this case should be rare and
2019 # it is extra work to teach the cache about the hash
2012 # it is extra work to teach the cache about the hash
2020 # verification state.
2013 # verification state.
2021 if self._revisioncache and self._revisioncache[0] == node:
2014 if self._revisioncache and self._revisioncache[0] == node:
2022 self._revisioncache = None
2015 self._revisioncache = None
2023
2016
2024 revornode = rev
2017 revornode = rev
2025 if revornode is None:
2018 if revornode is None:
2026 revornode = templatefilters.short(hex(node))
2019 revornode = templatefilters.short(hex(node))
2027 raise error.RevlogError(
2020 raise error.RevlogError(
2028 _(b"integrity check failed on %s:%s")
2021 _(b"integrity check failed on %s:%s")
2029 % (self.indexfile, pycompat.bytestr(revornode))
2022 % (self.indexfile, pycompat.bytestr(revornode))
2030 )
2023 )
2031 except error.RevlogError:
2024 except error.RevlogError:
2032 if self._censorable and storageutil.iscensoredtext(text):
2025 if self._censorable and storageutil.iscensoredtext(text):
2033 raise error.CensoredNodeError(self.indexfile, node, text)
2026 raise error.CensoredNodeError(self.indexfile, node, text)
2034 raise
2027 raise
2035
2028
2036 def _enforceinlinesize(self, tr, fp=None):
2029 def _enforceinlinesize(self, tr, fp=None):
2037 """Check if the revlog is too big for inline and convert if so.
2030 """Check if the revlog is too big for inline and convert if so.
2038
2031
2039 This should be called after revisions are added to the revlog. If the
2032 This should be called after revisions are added to the revlog. If the
2040 revlog has grown too large to be an inline revlog, it will convert it
2033 revlog has grown too large to be an inline revlog, it will convert it
2041 to use multiple index and data files.
2034 to use multiple index and data files.
2042 """
2035 """
2043 tiprev = len(self) - 1
2036 tiprev = len(self) - 1
2044 if (
2037 if (
2045 not self._inline
2038 not self._inline
2046 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2039 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2047 ):
2040 ):
2048 return
2041 return
2049
2042
2050 troffset = tr.findoffset(self.indexfile)
2043 troffset = tr.findoffset(self.indexfile)
2051 if troffset is None:
2044 if troffset is None:
2052 raise error.RevlogError(
2045 raise error.RevlogError(
2053 _(b"%s not found in the transaction") % self.indexfile
2046 _(b"%s not found in the transaction") % self.indexfile
2054 )
2047 )
2055 trindex = 0
2048 trindex = 0
2056 tr.add(self.datafile, 0)
2049 tr.add(self.datafile, 0)
2057
2050
2058 if fp:
2051 if fp:
2059 fp.flush()
2052 fp.flush()
2060 fp.close()
2053 fp.close()
2061 # We can't use the cached file handle after close(). So prevent
2054 # We can't use the cached file handle after close(). So prevent
2062 # its usage.
2055 # its usage.
2063 self._writinghandles = None
2056 self._writinghandles = None
2064
2057
2065 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2058 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2066 for r in self:
2059 for r in self:
2067 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2060 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2068 if troffset <= self.start(r):
2061 if troffset <= self.start(r):
2069 trindex = r
2062 trindex = r
2070
2063
2071 with self._indexfp(b'w') as fp:
2064 with self._indexfp(b'w') as fp:
2072 self.version &= ~FLAG_INLINE_DATA
2065 self.version &= ~FLAG_INLINE_DATA
2073 self._inline = False
2066 self._inline = False
2074 io = self._io
2067 io = self._io
2075 for i in self:
2068 for i in self:
2076 e = io.packentry(self.index[i], self.node, self.version, i)
2069 e = io.packentry(self.index[i], self.node, self.version, i)
2077 fp.write(e)
2070 fp.write(e)
2078
2071
2079 # the temp file replace the real index when we exit the context
2072 # the temp file replace the real index when we exit the context
2080 # manager
2073 # manager
2081
2074
2082 tr.replace(self.indexfile, trindex * self._io.size)
2075 tr.replace(self.indexfile, trindex * self.index.entry_size)
2083 nodemaputil.setup_persistent_nodemap(tr, self)
2076 nodemaputil.setup_persistent_nodemap(tr, self)
2084 self._chunkclear()
2077 self._chunkclear()
2085
2078
2086 def _nodeduplicatecallback(self, transaction, node):
2079 def _nodeduplicatecallback(self, transaction, node):
2087 """called when trying to add a node already stored."""
2080 """called when trying to add a node already stored."""
2088
2081
2089 def addrevision(
2082 def addrevision(
2090 self,
2083 self,
2091 text,
2084 text,
2092 transaction,
2085 transaction,
2093 link,
2086 link,
2094 p1,
2087 p1,
2095 p2,
2088 p2,
2096 cachedelta=None,
2089 cachedelta=None,
2097 node=None,
2090 node=None,
2098 flags=REVIDX_DEFAULT_FLAGS,
2091 flags=REVIDX_DEFAULT_FLAGS,
2099 deltacomputer=None,
2092 deltacomputer=None,
2100 sidedata=None,
2093 sidedata=None,
2101 ):
2094 ):
2102 """add a revision to the log
2095 """add a revision to the log
2103
2096
2104 text - the revision data to add
2097 text - the revision data to add
2105 transaction - the transaction object used for rollback
2098 transaction - the transaction object used for rollback
2106 link - the linkrev data to add
2099 link - the linkrev data to add
2107 p1, p2 - the parent nodeids of the revision
2100 p1, p2 - the parent nodeids of the revision
2108 cachedelta - an optional precomputed delta
2101 cachedelta - an optional precomputed delta
2109 node - nodeid of revision; typically node is not specified, and it is
2102 node - nodeid of revision; typically node is not specified, and it is
2110 computed by default as hash(text, p1, p2), however subclasses might
2103 computed by default as hash(text, p1, p2), however subclasses might
2111 use different hashing method (and override checkhash() in such case)
2104 use different hashing method (and override checkhash() in such case)
2112 flags - the known flags to set on the revision
2105 flags - the known flags to set on the revision
2113 deltacomputer - an optional deltacomputer instance shared between
2106 deltacomputer - an optional deltacomputer instance shared between
2114 multiple calls
2107 multiple calls
2115 """
2108 """
2116 if link == nullrev:
2109 if link == nullrev:
2117 raise error.RevlogError(
2110 raise error.RevlogError(
2118 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2111 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2119 )
2112 )
2120
2113
2121 if sidedata is None:
2114 if sidedata is None:
2122 sidedata = {}
2115 sidedata = {}
2123 elif not self.hassidedata:
2116 elif not self.hassidedata:
2124 raise error.ProgrammingError(
2117 raise error.ProgrammingError(
2125 _(b"trying to add sidedata to a revlog who don't support them")
2118 _(b"trying to add sidedata to a revlog who don't support them")
2126 )
2119 )
2127
2120
2128 if flags:
2121 if flags:
2129 node = node or self.hash(text, p1, p2)
2122 node = node or self.hash(text, p1, p2)
2130
2123
2131 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2124 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2132
2125
2133 # If the flag processor modifies the revision data, ignore any provided
2126 # If the flag processor modifies the revision data, ignore any provided
2134 # cachedelta.
2127 # cachedelta.
2135 if rawtext != text:
2128 if rawtext != text:
2136 cachedelta = None
2129 cachedelta = None
2137
2130
2138 if len(rawtext) > _maxentrysize:
2131 if len(rawtext) > _maxentrysize:
2139 raise error.RevlogError(
2132 raise error.RevlogError(
2140 _(
2133 _(
2141 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2134 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2142 )
2135 )
2143 % (self.indexfile, len(rawtext))
2136 % (self.indexfile, len(rawtext))
2144 )
2137 )
2145
2138
2146 node = node or self.hash(rawtext, p1, p2)
2139 node = node or self.hash(rawtext, p1, p2)
2147 rev = self.index.get_rev(node)
2140 rev = self.index.get_rev(node)
2148 if rev is not None:
2141 if rev is not None:
2149 return rev
2142 return rev
2150
2143
2151 if validatehash:
2144 if validatehash:
2152 self.checkhash(rawtext, node, p1=p1, p2=p2)
2145 self.checkhash(rawtext, node, p1=p1, p2=p2)
2153
2146
2154 return self.addrawrevision(
2147 return self.addrawrevision(
2155 rawtext,
2148 rawtext,
2156 transaction,
2149 transaction,
2157 link,
2150 link,
2158 p1,
2151 p1,
2159 p2,
2152 p2,
2160 node,
2153 node,
2161 flags,
2154 flags,
2162 cachedelta=cachedelta,
2155 cachedelta=cachedelta,
2163 deltacomputer=deltacomputer,
2156 deltacomputer=deltacomputer,
2164 sidedata=sidedata,
2157 sidedata=sidedata,
2165 )
2158 )
2166
2159
2167 def addrawrevision(
2160 def addrawrevision(
2168 self,
2161 self,
2169 rawtext,
2162 rawtext,
2170 transaction,
2163 transaction,
2171 link,
2164 link,
2172 p1,
2165 p1,
2173 p2,
2166 p2,
2174 node,
2167 node,
2175 flags,
2168 flags,
2176 cachedelta=None,
2169 cachedelta=None,
2177 deltacomputer=None,
2170 deltacomputer=None,
2178 sidedata=None,
2171 sidedata=None,
2179 ):
2172 ):
2180 """add a raw revision with known flags, node and parents
2173 """add a raw revision with known flags, node and parents
2181 useful when reusing a revision not stored in this revlog (ex: received
2174 useful when reusing a revision not stored in this revlog (ex: received
2182 over wire, or read from an external bundle).
2175 over wire, or read from an external bundle).
2183 """
2176 """
2184 dfh = None
2177 dfh = None
2185 if not self._inline:
2178 if not self._inline:
2186 dfh = self._datafp(b"a+")
2179 dfh = self._datafp(b"a+")
2187 ifh = self._indexfp(b"a+")
2180 ifh = self._indexfp(b"a+")
2188 try:
2181 try:
2189 return self._addrevision(
2182 return self._addrevision(
2190 node,
2183 node,
2191 rawtext,
2184 rawtext,
2192 transaction,
2185 transaction,
2193 link,
2186 link,
2194 p1,
2187 p1,
2195 p2,
2188 p2,
2196 flags,
2189 flags,
2197 cachedelta,
2190 cachedelta,
2198 ifh,
2191 ifh,
2199 dfh,
2192 dfh,
2200 deltacomputer=deltacomputer,
2193 deltacomputer=deltacomputer,
2201 sidedata=sidedata,
2194 sidedata=sidedata,
2202 )
2195 )
2203 finally:
2196 finally:
2204 if dfh:
2197 if dfh:
2205 dfh.close()
2198 dfh.close()
2206 ifh.close()
2199 ifh.close()
2207
2200
2208 def compress(self, data):
2201 def compress(self, data):
2209 """Generate a possibly-compressed representation of data."""
2202 """Generate a possibly-compressed representation of data."""
2210 if not data:
2203 if not data:
2211 return b'', data
2204 return b'', data
2212
2205
2213 compressed = self._compressor.compress(data)
2206 compressed = self._compressor.compress(data)
2214
2207
2215 if compressed:
2208 if compressed:
2216 # The revlog compressor added the header in the returned data.
2209 # The revlog compressor added the header in the returned data.
2217 return b'', compressed
2210 return b'', compressed
2218
2211
2219 if data[0:1] == b'\0':
2212 if data[0:1] == b'\0':
2220 return b'', data
2213 return b'', data
2221 return b'u', data
2214 return b'u', data
2222
2215
2223 def decompress(self, data):
2216 def decompress(self, data):
2224 """Decompress a revlog chunk.
2217 """Decompress a revlog chunk.
2225
2218
2226 The chunk is expected to begin with a header identifying the
2219 The chunk is expected to begin with a header identifying the
2227 format type so it can be routed to an appropriate decompressor.
2220 format type so it can be routed to an appropriate decompressor.
2228 """
2221 """
2229 if not data:
2222 if not data:
2230 return data
2223 return data
2231
2224
2232 # Revlogs are read much more frequently than they are written and many
2225 # Revlogs are read much more frequently than they are written and many
2233 # chunks only take microseconds to decompress, so performance is
2226 # chunks only take microseconds to decompress, so performance is
2234 # important here.
2227 # important here.
2235 #
2228 #
2236 # We can make a few assumptions about revlogs:
2229 # We can make a few assumptions about revlogs:
2237 #
2230 #
2238 # 1) the majority of chunks will be compressed (as opposed to inline
2231 # 1) the majority of chunks will be compressed (as opposed to inline
2239 # raw data).
2232 # raw data).
2240 # 2) decompressing *any* data will likely by at least 10x slower than
2233 # 2) decompressing *any* data will likely by at least 10x slower than
2241 # returning raw inline data.
2234 # returning raw inline data.
2242 # 3) we want to prioritize common and officially supported compression
2235 # 3) we want to prioritize common and officially supported compression
2243 # engines
2236 # engines
2244 #
2237 #
2245 # It follows that we want to optimize for "decompress compressed data
2238 # It follows that we want to optimize for "decompress compressed data
2246 # when encoded with common and officially supported compression engines"
2239 # when encoded with common and officially supported compression engines"
2247 # case over "raw data" and "data encoded by less common or non-official
2240 # case over "raw data" and "data encoded by less common or non-official
2248 # compression engines." That is why we have the inline lookup first
2241 # compression engines." That is why we have the inline lookup first
2249 # followed by the compengines lookup.
2242 # followed by the compengines lookup.
2250 #
2243 #
2251 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2244 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2252 # compressed chunks. And this matters for changelog and manifest reads.
2245 # compressed chunks. And this matters for changelog and manifest reads.
2253 t = data[0:1]
2246 t = data[0:1]
2254
2247
2255 if t == b'x':
2248 if t == b'x':
2256 try:
2249 try:
2257 return _zlibdecompress(data)
2250 return _zlibdecompress(data)
2258 except zlib.error as e:
2251 except zlib.error as e:
2259 raise error.RevlogError(
2252 raise error.RevlogError(
2260 _(b'revlog decompress error: %s')
2253 _(b'revlog decompress error: %s')
2261 % stringutil.forcebytestr(e)
2254 % stringutil.forcebytestr(e)
2262 )
2255 )
2263 # '\0' is more common than 'u' so it goes first.
2256 # '\0' is more common than 'u' so it goes first.
2264 elif t == b'\0':
2257 elif t == b'\0':
2265 return data
2258 return data
2266 elif t == b'u':
2259 elif t == b'u':
2267 return util.buffer(data, 1)
2260 return util.buffer(data, 1)
2268
2261
2269 try:
2262 try:
2270 compressor = self._decompressors[t]
2263 compressor = self._decompressors[t]
2271 except KeyError:
2264 except KeyError:
2272 try:
2265 try:
2273 engine = util.compengines.forrevlogheader(t)
2266 engine = util.compengines.forrevlogheader(t)
2274 compressor = engine.revlogcompressor(self._compengineopts)
2267 compressor = engine.revlogcompressor(self._compengineopts)
2275 self._decompressors[t] = compressor
2268 self._decompressors[t] = compressor
2276 except KeyError:
2269 except KeyError:
2277 raise error.RevlogError(
2270 raise error.RevlogError(
2278 _(b'unknown compression type %s') % binascii.hexlify(t)
2271 _(b'unknown compression type %s') % binascii.hexlify(t)
2279 )
2272 )
2280
2273
2281 return compressor.decompress(data)
2274 return compressor.decompress(data)
2282
2275
2283 def _addrevision(
2276 def _addrevision(
2284 self,
2277 self,
2285 node,
2278 node,
2286 rawtext,
2279 rawtext,
2287 transaction,
2280 transaction,
2288 link,
2281 link,
2289 p1,
2282 p1,
2290 p2,
2283 p2,
2291 flags,
2284 flags,
2292 cachedelta,
2285 cachedelta,
2293 ifh,
2286 ifh,
2294 dfh,
2287 dfh,
2295 alwayscache=False,
2288 alwayscache=False,
2296 deltacomputer=None,
2289 deltacomputer=None,
2297 sidedata=None,
2290 sidedata=None,
2298 ):
2291 ):
2299 """internal function to add revisions to the log
2292 """internal function to add revisions to the log
2300
2293
2301 see addrevision for argument descriptions.
2294 see addrevision for argument descriptions.
2302
2295
2303 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2296 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2304
2297
2305 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2298 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2306 be used.
2299 be used.
2307
2300
2308 invariants:
2301 invariants:
2309 - rawtext is optional (can be None); if not set, cachedelta must be set.
2302 - rawtext is optional (can be None); if not set, cachedelta must be set.
2310 if both are set, they must correspond to each other.
2303 if both are set, they must correspond to each other.
2311 """
2304 """
2312 if node == nullid:
2305 if node == nullid:
2313 raise error.RevlogError(
2306 raise error.RevlogError(
2314 _(b"%s: attempt to add null revision") % self.indexfile
2307 _(b"%s: attempt to add null revision") % self.indexfile
2315 )
2308 )
2316 if node == wdirid or node in wdirfilenodeids:
2309 if node == wdirid or node in wdirfilenodeids:
2317 raise error.RevlogError(
2310 raise error.RevlogError(
2318 _(b"%s: attempt to add wdir revision") % self.indexfile
2311 _(b"%s: attempt to add wdir revision") % self.indexfile
2319 )
2312 )
2320
2313
2321 if self._inline:
2314 if self._inline:
2322 fh = ifh
2315 fh = ifh
2323 else:
2316 else:
2324 fh = dfh
2317 fh = dfh
2325
2318
2326 btext = [rawtext]
2319 btext = [rawtext]
2327
2320
2328 curr = len(self)
2321 curr = len(self)
2329 prev = curr - 1
2322 prev = curr - 1
2330
2323
2331 offset = self._get_data_offset(prev)
2324 offset = self._get_data_offset(prev)
2332
2325
2333 if self._concurrencychecker:
2326 if self._concurrencychecker:
2334 if self._inline:
2327 if self._inline:
2335 # offset is "as if" it were in the .d file, so we need to add on
2328 # offset is "as if" it were in the .d file, so we need to add on
2336 # the size of the entry metadata.
2329 # the size of the entry metadata.
2337 self._concurrencychecker(
2330 self._concurrencychecker(
2338 ifh, self.indexfile, offset + curr * self._io.size
2331 ifh, self.indexfile, offset + curr * self.index.entry_size
2339 )
2332 )
2340 else:
2333 else:
2341 # Entries in the .i are a consistent size.
2334 # Entries in the .i are a consistent size.
2342 self._concurrencychecker(
2335 self._concurrencychecker(
2343 ifh, self.indexfile, curr * self._io.size
2336 ifh, self.indexfile, curr * self.index.entry_size
2344 )
2337 )
2345 self._concurrencychecker(dfh, self.datafile, offset)
2338 self._concurrencychecker(dfh, self.datafile, offset)
2346
2339
2347 p1r, p2r = self.rev(p1), self.rev(p2)
2340 p1r, p2r = self.rev(p1), self.rev(p2)
2348
2341
2349 # full versions are inserted when the needed deltas
2342 # full versions are inserted when the needed deltas
2350 # become comparable to the uncompressed text
2343 # become comparable to the uncompressed text
2351 if rawtext is None:
2344 if rawtext is None:
2352 # need rawtext size, before changed by flag processors, which is
2345 # need rawtext size, before changed by flag processors, which is
2353 # the non-raw size. use revlog explicitly to avoid filelog's extra
2346 # the non-raw size. use revlog explicitly to avoid filelog's extra
2354 # logic that might remove metadata size.
2347 # logic that might remove metadata size.
2355 textlen = mdiff.patchedsize(
2348 textlen = mdiff.patchedsize(
2356 revlog.size(self, cachedelta[0]), cachedelta[1]
2349 revlog.size(self, cachedelta[0]), cachedelta[1]
2357 )
2350 )
2358 else:
2351 else:
2359 textlen = len(rawtext)
2352 textlen = len(rawtext)
2360
2353
2361 if deltacomputer is None:
2354 if deltacomputer is None:
2362 deltacomputer = deltautil.deltacomputer(self)
2355 deltacomputer = deltautil.deltacomputer(self)
2363
2356
2364 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2357 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2365
2358
2366 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2359 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2367
2360
2368 if sidedata:
2361 if sidedata:
2369 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2362 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2370 sidedata_offset = offset + deltainfo.deltalen
2363 sidedata_offset = offset + deltainfo.deltalen
2371 else:
2364 else:
2372 serialized_sidedata = b""
2365 serialized_sidedata = b""
2373 # Don't store the offset if the sidedata is empty, that way
2366 # Don't store the offset if the sidedata is empty, that way
2374 # we can easily detect empty sidedata and they will be no different
2367 # we can easily detect empty sidedata and they will be no different
2375 # than ones we manually add.
2368 # than ones we manually add.
2376 sidedata_offset = 0
2369 sidedata_offset = 0
2377
2370
2378 e = (
2371 e = (
2379 offset_type(offset, flags),
2372 offset_type(offset, flags),
2380 deltainfo.deltalen,
2373 deltainfo.deltalen,
2381 textlen,
2374 textlen,
2382 deltainfo.base,
2375 deltainfo.base,
2383 link,
2376 link,
2384 p1r,
2377 p1r,
2385 p2r,
2378 p2r,
2386 node,
2379 node,
2387 sidedata_offset,
2380 sidedata_offset,
2388 len(serialized_sidedata),
2381 len(serialized_sidedata),
2389 )
2382 )
2390
2383
2391 if self.version & 0xFFFF != REVLOGV2:
2384 if self.version & 0xFFFF != REVLOGV2:
2392 e = e[:8]
2385 e = e[:8]
2393
2386
2394 self.index.append(e)
2387 self.index.append(e)
2395 entry = self._io.packentry(e, self.node, self.version, curr)
2388 entry = self._io.packentry(e, self.node, self.version, curr)
2396 self._writeentry(
2389 self._writeentry(
2397 transaction,
2390 transaction,
2398 ifh,
2391 ifh,
2399 dfh,
2392 dfh,
2400 entry,
2393 entry,
2401 deltainfo.data,
2394 deltainfo.data,
2402 link,
2395 link,
2403 offset,
2396 offset,
2404 serialized_sidedata,
2397 serialized_sidedata,
2405 )
2398 )
2406
2399
2407 rawtext = btext[0]
2400 rawtext = btext[0]
2408
2401
2409 if alwayscache and rawtext is None:
2402 if alwayscache and rawtext is None:
2410 rawtext = deltacomputer.buildtext(revinfo, fh)
2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2411
2404
2412 if type(rawtext) == bytes: # only accept immutable objects
2405 if type(rawtext) == bytes: # only accept immutable objects
2413 self._revisioncache = (node, curr, rawtext)
2406 self._revisioncache = (node, curr, rawtext)
2414 self._chainbasecache[curr] = deltainfo.chainbase
2407 self._chainbasecache[curr] = deltainfo.chainbase
2415 return curr
2408 return curr
2416
2409
2417 def _get_data_offset(self, prev):
2410 def _get_data_offset(self, prev):
2418 """Returns the current offset in the (in-transaction) data file.
2411 """Returns the current offset in the (in-transaction) data file.
2419 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2420 file to store that information: since sidedata can be rewritten to the
2413 file to store that information: since sidedata can be rewritten to the
2421 end of the data file within a transaction, you can have cases where, for
2414 end of the data file within a transaction, you can have cases where, for
2422 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2423 to `n - 1`'s sidedata being written after `n`'s data.
2416 to `n - 1`'s sidedata being written after `n`'s data.
2424
2417
2425 TODO cache this in a docket file before getting out of experimental."""
2418 TODO cache this in a docket file before getting out of experimental."""
2426 if self.version & 0xFFFF != REVLOGV2:
2419 if self.version & 0xFFFF != REVLOGV2:
2427 return self.end(prev)
2420 return self.end(prev)
2428
2421
2429 offset = 0
2422 offset = 0
2430 for rev, entry in enumerate(self.index):
2423 for rev, entry in enumerate(self.index):
2431 sidedata_end = entry[8] + entry[9]
2424 sidedata_end = entry[8] + entry[9]
2432 # Sidedata for a previous rev has potentially been written after
2425 # Sidedata for a previous rev has potentially been written after
2433 # this rev's end, so take the max.
2426 # this rev's end, so take the max.
2434 offset = max(self.end(rev), offset, sidedata_end)
2427 offset = max(self.end(rev), offset, sidedata_end)
2435 return offset
2428 return offset
2436
2429
2437 def _writeentry(
2430 def _writeentry(
2438 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2431 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2439 ):
2432 ):
2440 # Files opened in a+ mode have inconsistent behavior on various
2433 # Files opened in a+ mode have inconsistent behavior on various
2441 # platforms. Windows requires that a file positioning call be made
2434 # platforms. Windows requires that a file positioning call be made
2442 # when the file handle transitions between reads and writes. See
2435 # when the file handle transitions between reads and writes. See
2443 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2436 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2444 # platforms, Python or the platform itself can be buggy. Some versions
2437 # platforms, Python or the platform itself can be buggy. Some versions
2445 # of Solaris have been observed to not append at the end of the file
2438 # of Solaris have been observed to not append at the end of the file
2446 # if the file was seeked to before the end. See issue4943 for more.
2439 # if the file was seeked to before the end. See issue4943 for more.
2447 #
2440 #
2448 # We work around this issue by inserting a seek() before writing.
2441 # We work around this issue by inserting a seek() before writing.
2449 # Note: This is likely not necessary on Python 3. However, because
2442 # Note: This is likely not necessary on Python 3. However, because
2450 # the file handle is reused for reads and may be seeked there, we need
2443 # the file handle is reused for reads and may be seeked there, we need
2451 # to be careful before changing this.
2444 # to be careful before changing this.
2452 ifh.seek(0, os.SEEK_END)
2445 ifh.seek(0, os.SEEK_END)
2453 if dfh:
2446 if dfh:
2454 dfh.seek(0, os.SEEK_END)
2447 dfh.seek(0, os.SEEK_END)
2455
2448
2456 curr = len(self) - 1
2449 curr = len(self) - 1
2457 if not self._inline:
2450 if not self._inline:
2458 transaction.add(self.datafile, offset)
2451 transaction.add(self.datafile, offset)
2459 transaction.add(self.indexfile, curr * len(entry))
2452 transaction.add(self.indexfile, curr * len(entry))
2460 if data[0]:
2453 if data[0]:
2461 dfh.write(data[0])
2454 dfh.write(data[0])
2462 dfh.write(data[1])
2455 dfh.write(data[1])
2463 if sidedata:
2456 if sidedata:
2464 dfh.write(sidedata)
2457 dfh.write(sidedata)
2465 ifh.write(entry)
2458 ifh.write(entry)
2466 else:
2459 else:
2467 offset += curr * self._io.size
2460 offset += curr * self.index.entry_size
2468 transaction.add(self.indexfile, offset)
2461 transaction.add(self.indexfile, offset)
2469 ifh.write(entry)
2462 ifh.write(entry)
2470 ifh.write(data[0])
2463 ifh.write(data[0])
2471 ifh.write(data[1])
2464 ifh.write(data[1])
2472 if sidedata:
2465 if sidedata:
2473 ifh.write(sidedata)
2466 ifh.write(sidedata)
2474 self._enforceinlinesize(transaction, ifh)
2467 self._enforceinlinesize(transaction, ifh)
2475 nodemaputil.setup_persistent_nodemap(transaction, self)
2468 nodemaputil.setup_persistent_nodemap(transaction, self)
2476
2469
2477 def addgroup(
2470 def addgroup(
2478 self,
2471 self,
2479 deltas,
2472 deltas,
2480 linkmapper,
2473 linkmapper,
2481 transaction,
2474 transaction,
2482 alwayscache=False,
2475 alwayscache=False,
2483 addrevisioncb=None,
2476 addrevisioncb=None,
2484 duplicaterevisioncb=None,
2477 duplicaterevisioncb=None,
2485 ):
2478 ):
2486 """
2479 """
2487 add a delta group
2480 add a delta group
2488
2481
2489 given a set of deltas, add them to the revision log. the
2482 given a set of deltas, add them to the revision log. the
2490 first delta is against its parent, which should be in our
2483 first delta is against its parent, which should be in our
2491 log, the rest are against the previous delta.
2484 log, the rest are against the previous delta.
2492
2485
2493 If ``addrevisioncb`` is defined, it will be called with arguments of
2486 If ``addrevisioncb`` is defined, it will be called with arguments of
2494 this revlog and the node that was added.
2487 this revlog and the node that was added.
2495 """
2488 """
2496
2489
2497 if self._writinghandles:
2490 if self._writinghandles:
2498 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2491 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2499
2492
2500 r = len(self)
2493 r = len(self)
2501 end = 0
2494 end = 0
2502 if r:
2495 if r:
2503 end = self.end(r - 1)
2496 end = self.end(r - 1)
2504 ifh = self._indexfp(b"a+")
2497 ifh = self._indexfp(b"a+")
2505 isize = r * self._io.size
2498 isize = r * self.index.entry_size
2506 if self._inline:
2499 if self._inline:
2507 transaction.add(self.indexfile, end + isize)
2500 transaction.add(self.indexfile, end + isize)
2508 dfh = None
2501 dfh = None
2509 else:
2502 else:
2510 transaction.add(self.indexfile, isize)
2503 transaction.add(self.indexfile, isize)
2511 transaction.add(self.datafile, end)
2504 transaction.add(self.datafile, end)
2512 dfh = self._datafp(b"a+")
2505 dfh = self._datafp(b"a+")
2513
2506
2514 def flush():
2507 def flush():
2515 if dfh:
2508 if dfh:
2516 dfh.flush()
2509 dfh.flush()
2517 ifh.flush()
2510 ifh.flush()
2518
2511
2519 self._writinghandles = (ifh, dfh)
2512 self._writinghandles = (ifh, dfh)
2520 empty = True
2513 empty = True
2521
2514
2522 try:
2515 try:
2523 deltacomputer = deltautil.deltacomputer(self)
2516 deltacomputer = deltautil.deltacomputer(self)
2524 # loop through our set of deltas
2517 # loop through our set of deltas
2525 for data in deltas:
2518 for data in deltas:
2526 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2519 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2527 link = linkmapper(linknode)
2520 link = linkmapper(linknode)
2528 flags = flags or REVIDX_DEFAULT_FLAGS
2521 flags = flags or REVIDX_DEFAULT_FLAGS
2529
2522
2530 rev = self.index.get_rev(node)
2523 rev = self.index.get_rev(node)
2531 if rev is not None:
2524 if rev is not None:
2532 # this can happen if two branches make the same change
2525 # this can happen if two branches make the same change
2533 self._nodeduplicatecallback(transaction, rev)
2526 self._nodeduplicatecallback(transaction, rev)
2534 if duplicaterevisioncb:
2527 if duplicaterevisioncb:
2535 duplicaterevisioncb(self, rev)
2528 duplicaterevisioncb(self, rev)
2536 empty = False
2529 empty = False
2537 continue
2530 continue
2538
2531
2539 for p in (p1, p2):
2532 for p in (p1, p2):
2540 if not self.index.has_node(p):
2533 if not self.index.has_node(p):
2541 raise error.LookupError(
2534 raise error.LookupError(
2542 p, self.indexfile, _(b'unknown parent')
2535 p, self.indexfile, _(b'unknown parent')
2543 )
2536 )
2544
2537
2545 if not self.index.has_node(deltabase):
2538 if not self.index.has_node(deltabase):
2546 raise error.LookupError(
2539 raise error.LookupError(
2547 deltabase, self.indexfile, _(b'unknown delta base')
2540 deltabase, self.indexfile, _(b'unknown delta base')
2548 )
2541 )
2549
2542
2550 baserev = self.rev(deltabase)
2543 baserev = self.rev(deltabase)
2551
2544
2552 if baserev != nullrev and self.iscensored(baserev):
2545 if baserev != nullrev and self.iscensored(baserev):
2553 # if base is censored, delta must be full replacement in a
2546 # if base is censored, delta must be full replacement in a
2554 # single patch operation
2547 # single patch operation
2555 hlen = struct.calcsize(b">lll")
2548 hlen = struct.calcsize(b">lll")
2556 oldlen = self.rawsize(baserev)
2549 oldlen = self.rawsize(baserev)
2557 newlen = len(delta) - hlen
2550 newlen = len(delta) - hlen
2558 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2551 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2559 raise error.CensoredBaseError(
2552 raise error.CensoredBaseError(
2560 self.indexfile, self.node(baserev)
2553 self.indexfile, self.node(baserev)
2561 )
2554 )
2562
2555
2563 if not flags and self._peek_iscensored(baserev, delta, flush):
2556 if not flags and self._peek_iscensored(baserev, delta, flush):
2564 flags |= REVIDX_ISCENSORED
2557 flags |= REVIDX_ISCENSORED
2565
2558
2566 # We assume consumers of addrevisioncb will want to retrieve
2559 # We assume consumers of addrevisioncb will want to retrieve
2567 # the added revision, which will require a call to
2560 # the added revision, which will require a call to
2568 # revision(). revision() will fast path if there is a cache
2561 # revision(). revision() will fast path if there is a cache
2569 # hit. So, we tell _addrevision() to always cache in this case.
2562 # hit. So, we tell _addrevision() to always cache in this case.
2570 # We're only using addgroup() in the context of changegroup
2563 # We're only using addgroup() in the context of changegroup
2571 # generation so the revision data can always be handled as raw
2564 # generation so the revision data can always be handled as raw
2572 # by the flagprocessor.
2565 # by the flagprocessor.
2573 rev = self._addrevision(
2566 rev = self._addrevision(
2574 node,
2567 node,
2575 None,
2568 None,
2576 transaction,
2569 transaction,
2577 link,
2570 link,
2578 p1,
2571 p1,
2579 p2,
2572 p2,
2580 flags,
2573 flags,
2581 (baserev, delta),
2574 (baserev, delta),
2582 ifh,
2575 ifh,
2583 dfh,
2576 dfh,
2584 alwayscache=alwayscache,
2577 alwayscache=alwayscache,
2585 deltacomputer=deltacomputer,
2578 deltacomputer=deltacomputer,
2586 sidedata=sidedata,
2579 sidedata=sidedata,
2587 )
2580 )
2588
2581
2589 if addrevisioncb:
2582 if addrevisioncb:
2590 addrevisioncb(self, rev)
2583 addrevisioncb(self, rev)
2591 empty = False
2584 empty = False
2592
2585
2593 if not dfh and not self._inline:
2586 if not dfh and not self._inline:
2594 # addrevision switched from inline to conventional
2587 # addrevision switched from inline to conventional
2595 # reopen the index
2588 # reopen the index
2596 ifh.close()
2589 ifh.close()
2597 dfh = self._datafp(b"a+")
2590 dfh = self._datafp(b"a+")
2598 ifh = self._indexfp(b"a+")
2591 ifh = self._indexfp(b"a+")
2599 self._writinghandles = (ifh, dfh)
2592 self._writinghandles = (ifh, dfh)
2600 finally:
2593 finally:
2601 self._writinghandles = None
2594 self._writinghandles = None
2602
2595
2603 if dfh:
2596 if dfh:
2604 dfh.close()
2597 dfh.close()
2605 ifh.close()
2598 ifh.close()
2606 return not empty
2599 return not empty
2607
2600
2608 def iscensored(self, rev):
2601 def iscensored(self, rev):
2609 """Check if a file revision is censored."""
2602 """Check if a file revision is censored."""
2610 if not self._censorable:
2603 if not self._censorable:
2611 return False
2604 return False
2612
2605
2613 return self.flags(rev) & REVIDX_ISCENSORED
2606 return self.flags(rev) & REVIDX_ISCENSORED
2614
2607
2615 def _peek_iscensored(self, baserev, delta, flush):
2608 def _peek_iscensored(self, baserev, delta, flush):
2616 """Quickly check if a delta produces a censored revision."""
2609 """Quickly check if a delta produces a censored revision."""
2617 if not self._censorable:
2610 if not self._censorable:
2618 return False
2611 return False
2619
2612
2620 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2613 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2621
2614
2622 def getstrippoint(self, minlink):
2615 def getstrippoint(self, minlink):
2623 """find the minimum rev that must be stripped to strip the linkrev
2616 """find the minimum rev that must be stripped to strip the linkrev
2624
2617
2625 Returns a tuple containing the minimum rev and a set of all revs that
2618 Returns a tuple containing the minimum rev and a set of all revs that
2626 have linkrevs that will be broken by this strip.
2619 have linkrevs that will be broken by this strip.
2627 """
2620 """
2628 return storageutil.resolvestripinfo(
2621 return storageutil.resolvestripinfo(
2629 minlink,
2622 minlink,
2630 len(self) - 1,
2623 len(self) - 1,
2631 self.headrevs(),
2624 self.headrevs(),
2632 self.linkrev,
2625 self.linkrev,
2633 self.parentrevs,
2626 self.parentrevs,
2634 )
2627 )
2635
2628
2636 def strip(self, minlink, transaction):
2629 def strip(self, minlink, transaction):
2637 """truncate the revlog on the first revision with a linkrev >= minlink
2630 """truncate the revlog on the first revision with a linkrev >= minlink
2638
2631
2639 This function is called when we're stripping revision minlink and
2632 This function is called when we're stripping revision minlink and
2640 its descendants from the repository.
2633 its descendants from the repository.
2641
2634
2642 We have to remove all revisions with linkrev >= minlink, because
2635 We have to remove all revisions with linkrev >= minlink, because
2643 the equivalent changelog revisions will be renumbered after the
2636 the equivalent changelog revisions will be renumbered after the
2644 strip.
2637 strip.
2645
2638
2646 So we truncate the revlog on the first of these revisions, and
2639 So we truncate the revlog on the first of these revisions, and
2647 trust that the caller has saved the revisions that shouldn't be
2640 trust that the caller has saved the revisions that shouldn't be
2648 removed and that it'll re-add them after this truncation.
2641 removed and that it'll re-add them after this truncation.
2649 """
2642 """
2650 if len(self) == 0:
2643 if len(self) == 0:
2651 return
2644 return
2652
2645
2653 rev, _ = self.getstrippoint(minlink)
2646 rev, _ = self.getstrippoint(minlink)
2654 if rev == len(self):
2647 if rev == len(self):
2655 return
2648 return
2656
2649
2657 # first truncate the files on disk
2650 # first truncate the files on disk
2658 end = self.start(rev)
2651 end = self.start(rev)
2659 if not self._inline:
2652 if not self._inline:
2660 transaction.add(self.datafile, end)
2653 transaction.add(self.datafile, end)
2661 end = rev * self._io.size
2654 end = rev * self.index.entry_size
2662 else:
2655 else:
2663 end += rev * self._io.size
2656 end += rev * self.index.entry_size
2664
2657
2665 transaction.add(self.indexfile, end)
2658 transaction.add(self.indexfile, end)
2666
2659
2667 # then reset internal state in memory to forget those revisions
2660 # then reset internal state in memory to forget those revisions
2668 self._revisioncache = None
2661 self._revisioncache = None
2669 self._chaininfocache = util.lrucachedict(500)
2662 self._chaininfocache = util.lrucachedict(500)
2670 self._chunkclear()
2663 self._chunkclear()
2671
2664
2672 del self.index[rev:-1]
2665 del self.index[rev:-1]
2673
2666
2674 def checksize(self):
2667 def checksize(self):
2675 """Check size of index and data files
2668 """Check size of index and data files
2676
2669
2677 return a (dd, di) tuple.
2670 return a (dd, di) tuple.
2678 - dd: extra bytes for the "data" file
2671 - dd: extra bytes for the "data" file
2679 - di: extra bytes for the "index" file
2672 - di: extra bytes for the "index" file
2680
2673
2681 A healthy revlog will return (0, 0).
2674 A healthy revlog will return (0, 0).
2682 """
2675 """
2683 expected = 0
2676 expected = 0
2684 if len(self):
2677 if len(self):
2685 expected = max(0, self.end(len(self) - 1))
2678 expected = max(0, self.end(len(self) - 1))
2686
2679
2687 try:
2680 try:
2688 with self._datafp() as f:
2681 with self._datafp() as f:
2689 f.seek(0, io.SEEK_END)
2682 f.seek(0, io.SEEK_END)
2690 actual = f.tell()
2683 actual = f.tell()
2691 dd = actual - expected
2684 dd = actual - expected
2692 except IOError as inst:
2685 except IOError as inst:
2693 if inst.errno != errno.ENOENT:
2686 if inst.errno != errno.ENOENT:
2694 raise
2687 raise
2695 dd = 0
2688 dd = 0
2696
2689
2697 try:
2690 try:
2698 f = self.opener(self.indexfile)
2691 f = self.opener(self.indexfile)
2699 f.seek(0, io.SEEK_END)
2692 f.seek(0, io.SEEK_END)
2700 actual = f.tell()
2693 actual = f.tell()
2701 f.close()
2694 f.close()
2702 s = self._io.size
2695 s = self.index.entry_size
2703 i = max(0, actual // s)
2696 i = max(0, actual // s)
2704 di = actual - (i * s)
2697 di = actual - (i * s)
2705 if self._inline:
2698 if self._inline:
2706 databytes = 0
2699 databytes = 0
2707 for r in self:
2700 for r in self:
2708 databytes += max(0, self.length(r))
2701 databytes += max(0, self.length(r))
2709 dd = 0
2702 dd = 0
2710 di = actual - len(self) * s - databytes
2703 di = actual - len(self) * s - databytes
2711 except IOError as inst:
2704 except IOError as inst:
2712 if inst.errno != errno.ENOENT:
2705 if inst.errno != errno.ENOENT:
2713 raise
2706 raise
2714 di = 0
2707 di = 0
2715
2708
2716 return (dd, di)
2709 return (dd, di)
2717
2710
2718 def files(self):
2711 def files(self):
2719 res = [self.indexfile]
2712 res = [self.indexfile]
2720 if not self._inline:
2713 if not self._inline:
2721 res.append(self.datafile)
2714 res.append(self.datafile)
2722 return res
2715 return res
2723
2716
2724 def emitrevisions(
2717 def emitrevisions(
2725 self,
2718 self,
2726 nodes,
2719 nodes,
2727 nodesorder=None,
2720 nodesorder=None,
2728 revisiondata=False,
2721 revisiondata=False,
2729 assumehaveparentrevisions=False,
2722 assumehaveparentrevisions=False,
2730 deltamode=repository.CG_DELTAMODE_STD,
2723 deltamode=repository.CG_DELTAMODE_STD,
2731 sidedata_helpers=None,
2724 sidedata_helpers=None,
2732 ):
2725 ):
2733 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2726 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2734 raise error.ProgrammingError(
2727 raise error.ProgrammingError(
2735 b'unhandled value for nodesorder: %s' % nodesorder
2728 b'unhandled value for nodesorder: %s' % nodesorder
2736 )
2729 )
2737
2730
2738 if nodesorder is None and not self._generaldelta:
2731 if nodesorder is None and not self._generaldelta:
2739 nodesorder = b'storage'
2732 nodesorder = b'storage'
2740
2733
2741 if (
2734 if (
2742 not self._storedeltachains
2735 not self._storedeltachains
2743 and deltamode != repository.CG_DELTAMODE_PREV
2736 and deltamode != repository.CG_DELTAMODE_PREV
2744 ):
2737 ):
2745 deltamode = repository.CG_DELTAMODE_FULL
2738 deltamode = repository.CG_DELTAMODE_FULL
2746
2739
2747 return storageutil.emitrevisions(
2740 return storageutil.emitrevisions(
2748 self,
2741 self,
2749 nodes,
2742 nodes,
2750 nodesorder,
2743 nodesorder,
2751 revlogrevisiondelta,
2744 revlogrevisiondelta,
2752 deltaparentfn=self.deltaparent,
2745 deltaparentfn=self.deltaparent,
2753 candeltafn=self.candelta,
2746 candeltafn=self.candelta,
2754 rawsizefn=self.rawsize,
2747 rawsizefn=self.rawsize,
2755 revdifffn=self.revdiff,
2748 revdifffn=self.revdiff,
2756 flagsfn=self.flags,
2749 flagsfn=self.flags,
2757 deltamode=deltamode,
2750 deltamode=deltamode,
2758 revisiondata=revisiondata,
2751 revisiondata=revisiondata,
2759 assumehaveparentrevisions=assumehaveparentrevisions,
2752 assumehaveparentrevisions=assumehaveparentrevisions,
2760 sidedata_helpers=sidedata_helpers,
2753 sidedata_helpers=sidedata_helpers,
2761 )
2754 )
2762
2755
2763 DELTAREUSEALWAYS = b'always'
2756 DELTAREUSEALWAYS = b'always'
2764 DELTAREUSESAMEREVS = b'samerevs'
2757 DELTAREUSESAMEREVS = b'samerevs'
2765 DELTAREUSENEVER = b'never'
2758 DELTAREUSENEVER = b'never'
2766
2759
2767 DELTAREUSEFULLADD = b'fulladd'
2760 DELTAREUSEFULLADD = b'fulladd'
2768
2761
2769 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2762 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2770
2763
2771 def clone(
2764 def clone(
2772 self,
2765 self,
2773 tr,
2766 tr,
2774 destrevlog,
2767 destrevlog,
2775 addrevisioncb=None,
2768 addrevisioncb=None,
2776 deltareuse=DELTAREUSESAMEREVS,
2769 deltareuse=DELTAREUSESAMEREVS,
2777 forcedeltabothparents=None,
2770 forcedeltabothparents=None,
2778 sidedatacompanion=None,
2771 sidedatacompanion=None,
2779 ):
2772 ):
2780 """Copy this revlog to another, possibly with format changes.
2773 """Copy this revlog to another, possibly with format changes.
2781
2774
2782 The destination revlog will contain the same revisions and nodes.
2775 The destination revlog will contain the same revisions and nodes.
2783 However, it may not be bit-for-bit identical due to e.g. delta encoding
2776 However, it may not be bit-for-bit identical due to e.g. delta encoding
2784 differences.
2777 differences.
2785
2778
2786 The ``deltareuse`` argument control how deltas from the existing revlog
2779 The ``deltareuse`` argument control how deltas from the existing revlog
2787 are preserved in the destination revlog. The argument can have the
2780 are preserved in the destination revlog. The argument can have the
2788 following values:
2781 following values:
2789
2782
2790 DELTAREUSEALWAYS
2783 DELTAREUSEALWAYS
2791 Deltas will always be reused (if possible), even if the destination
2784 Deltas will always be reused (if possible), even if the destination
2792 revlog would not select the same revisions for the delta. This is the
2785 revlog would not select the same revisions for the delta. This is the
2793 fastest mode of operation.
2786 fastest mode of operation.
2794 DELTAREUSESAMEREVS
2787 DELTAREUSESAMEREVS
2795 Deltas will be reused if the destination revlog would pick the same
2788 Deltas will be reused if the destination revlog would pick the same
2796 revisions for the delta. This mode strikes a balance between speed
2789 revisions for the delta. This mode strikes a balance between speed
2797 and optimization.
2790 and optimization.
2798 DELTAREUSENEVER
2791 DELTAREUSENEVER
2799 Deltas will never be reused. This is the slowest mode of execution.
2792 Deltas will never be reused. This is the slowest mode of execution.
2800 This mode can be used to recompute deltas (e.g. if the diff/delta
2793 This mode can be used to recompute deltas (e.g. if the diff/delta
2801 algorithm changes).
2794 algorithm changes).
2802 DELTAREUSEFULLADD
2795 DELTAREUSEFULLADD
2803 Revision will be re-added as if their were new content. This is
2796 Revision will be re-added as if their were new content. This is
2804 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2797 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2805 eg: large file detection and handling.
2798 eg: large file detection and handling.
2806
2799
2807 Delta computation can be slow, so the choice of delta reuse policy can
2800 Delta computation can be slow, so the choice of delta reuse policy can
2808 significantly affect run time.
2801 significantly affect run time.
2809
2802
2810 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2803 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2811 two extremes. Deltas will be reused if they are appropriate. But if the
2804 two extremes. Deltas will be reused if they are appropriate. But if the
2812 delta could choose a better revision, it will do so. This means if you
2805 delta could choose a better revision, it will do so. This means if you
2813 are converting a non-generaldelta revlog to a generaldelta revlog,
2806 are converting a non-generaldelta revlog to a generaldelta revlog,
2814 deltas will be recomputed if the delta's parent isn't a parent of the
2807 deltas will be recomputed if the delta's parent isn't a parent of the
2815 revision.
2808 revision.
2816
2809
2817 In addition to the delta policy, the ``forcedeltabothparents``
2810 In addition to the delta policy, the ``forcedeltabothparents``
2818 argument controls whether to force compute deltas against both parents
2811 argument controls whether to force compute deltas against both parents
2819 for merges. By default, the current default is used.
2812 for merges. By default, the current default is used.
2820
2813
2821 If not None, the `sidedatacompanion` is callable that accept two
2814 If not None, the `sidedatacompanion` is callable that accept two
2822 arguments:
2815 arguments:
2823
2816
2824 (srcrevlog, rev)
2817 (srcrevlog, rev)
2825
2818
2826 and return a quintet that control changes to sidedata content from the
2819 and return a quintet that control changes to sidedata content from the
2827 old revision to the new clone result:
2820 old revision to the new clone result:
2828
2821
2829 (dropall, filterout, update, new_flags, dropped_flags)
2822 (dropall, filterout, update, new_flags, dropped_flags)
2830
2823
2831 * if `dropall` is True, all sidedata should be dropped
2824 * if `dropall` is True, all sidedata should be dropped
2832 * `filterout` is a set of sidedata keys that should be dropped
2825 * `filterout` is a set of sidedata keys that should be dropped
2833 * `update` is a mapping of additionnal/new key -> value
2826 * `update` is a mapping of additionnal/new key -> value
2834 * new_flags is a bitfields of new flags that the revision should get
2827 * new_flags is a bitfields of new flags that the revision should get
2835 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2828 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2836 """
2829 """
2837 if deltareuse not in self.DELTAREUSEALL:
2830 if deltareuse not in self.DELTAREUSEALL:
2838 raise ValueError(
2831 raise ValueError(
2839 _(b'value for deltareuse invalid: %s') % deltareuse
2832 _(b'value for deltareuse invalid: %s') % deltareuse
2840 )
2833 )
2841
2834
2842 if len(destrevlog):
2835 if len(destrevlog):
2843 raise ValueError(_(b'destination revlog is not empty'))
2836 raise ValueError(_(b'destination revlog is not empty'))
2844
2837
2845 if getattr(self, 'filteredrevs', None):
2838 if getattr(self, 'filteredrevs', None):
2846 raise ValueError(_(b'source revlog has filtered revisions'))
2839 raise ValueError(_(b'source revlog has filtered revisions'))
2847 if getattr(destrevlog, 'filteredrevs', None):
2840 if getattr(destrevlog, 'filteredrevs', None):
2848 raise ValueError(_(b'destination revlog has filtered revisions'))
2841 raise ValueError(_(b'destination revlog has filtered revisions'))
2849
2842
2850 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2843 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2851 # if possible.
2844 # if possible.
2852 oldlazydelta = destrevlog._lazydelta
2845 oldlazydelta = destrevlog._lazydelta
2853 oldlazydeltabase = destrevlog._lazydeltabase
2846 oldlazydeltabase = destrevlog._lazydeltabase
2854 oldamd = destrevlog._deltabothparents
2847 oldamd = destrevlog._deltabothparents
2855
2848
2856 try:
2849 try:
2857 if deltareuse == self.DELTAREUSEALWAYS:
2850 if deltareuse == self.DELTAREUSEALWAYS:
2858 destrevlog._lazydeltabase = True
2851 destrevlog._lazydeltabase = True
2859 destrevlog._lazydelta = True
2852 destrevlog._lazydelta = True
2860 elif deltareuse == self.DELTAREUSESAMEREVS:
2853 elif deltareuse == self.DELTAREUSESAMEREVS:
2861 destrevlog._lazydeltabase = False
2854 destrevlog._lazydeltabase = False
2862 destrevlog._lazydelta = True
2855 destrevlog._lazydelta = True
2863 elif deltareuse == self.DELTAREUSENEVER:
2856 elif deltareuse == self.DELTAREUSENEVER:
2864 destrevlog._lazydeltabase = False
2857 destrevlog._lazydeltabase = False
2865 destrevlog._lazydelta = False
2858 destrevlog._lazydelta = False
2866
2859
2867 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2860 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2868
2861
2869 self._clone(
2862 self._clone(
2870 tr,
2863 tr,
2871 destrevlog,
2864 destrevlog,
2872 addrevisioncb,
2865 addrevisioncb,
2873 deltareuse,
2866 deltareuse,
2874 forcedeltabothparents,
2867 forcedeltabothparents,
2875 sidedatacompanion,
2868 sidedatacompanion,
2876 )
2869 )
2877
2870
2878 finally:
2871 finally:
2879 destrevlog._lazydelta = oldlazydelta
2872 destrevlog._lazydelta = oldlazydelta
2880 destrevlog._lazydeltabase = oldlazydeltabase
2873 destrevlog._lazydeltabase = oldlazydeltabase
2881 destrevlog._deltabothparents = oldamd
2874 destrevlog._deltabothparents = oldamd
2882
2875
2883 def _clone(
2876 def _clone(
2884 self,
2877 self,
2885 tr,
2878 tr,
2886 destrevlog,
2879 destrevlog,
2887 addrevisioncb,
2880 addrevisioncb,
2888 deltareuse,
2881 deltareuse,
2889 forcedeltabothparents,
2882 forcedeltabothparents,
2890 sidedatacompanion,
2883 sidedatacompanion,
2891 ):
2884 ):
2892 """perform the core duty of `revlog.clone` after parameter processing"""
2885 """perform the core duty of `revlog.clone` after parameter processing"""
2893 deltacomputer = deltautil.deltacomputer(destrevlog)
2886 deltacomputer = deltautil.deltacomputer(destrevlog)
2894 index = self.index
2887 index = self.index
2895 for rev in self:
2888 for rev in self:
2896 entry = index[rev]
2889 entry = index[rev]
2897
2890
2898 # Some classes override linkrev to take filtered revs into
2891 # Some classes override linkrev to take filtered revs into
2899 # account. Use raw entry from index.
2892 # account. Use raw entry from index.
2900 flags = entry[0] & 0xFFFF
2893 flags = entry[0] & 0xFFFF
2901 linkrev = entry[4]
2894 linkrev = entry[4]
2902 p1 = index[entry[5]][7]
2895 p1 = index[entry[5]][7]
2903 p2 = index[entry[6]][7]
2896 p2 = index[entry[6]][7]
2904 node = entry[7]
2897 node = entry[7]
2905
2898
2906 sidedataactions = (False, [], {}, 0, 0)
2899 sidedataactions = (False, [], {}, 0, 0)
2907 if sidedatacompanion is not None:
2900 if sidedatacompanion is not None:
2908 sidedataactions = sidedatacompanion(self, rev)
2901 sidedataactions = sidedatacompanion(self, rev)
2909
2902
2910 # (Possibly) reuse the delta from the revlog if allowed and
2903 # (Possibly) reuse the delta from the revlog if allowed and
2911 # the revlog chunk is a delta.
2904 # the revlog chunk is a delta.
2912 cachedelta = None
2905 cachedelta = None
2913 rawtext = None
2906 rawtext = None
2914 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2907 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2915 dropall = sidedataactions[0]
2908 dropall = sidedataactions[0]
2916 filterout = sidedataactions[1]
2909 filterout = sidedataactions[1]
2917 update = sidedataactions[2]
2910 update = sidedataactions[2]
2918 new_flags = sidedataactions[3]
2911 new_flags = sidedataactions[3]
2919 dropped_flags = sidedataactions[4]
2912 dropped_flags = sidedataactions[4]
2920 text, sidedata = self._revisiondata(rev)
2913 text, sidedata = self._revisiondata(rev)
2921 if dropall:
2914 if dropall:
2922 sidedata = {}
2915 sidedata = {}
2923 for key in filterout:
2916 for key in filterout:
2924 sidedata.pop(key, None)
2917 sidedata.pop(key, None)
2925 sidedata.update(update)
2918 sidedata.update(update)
2926 if not sidedata:
2919 if not sidedata:
2927 sidedata = None
2920 sidedata = None
2928
2921
2929 flags |= new_flags
2922 flags |= new_flags
2930 flags &= ~dropped_flags
2923 flags &= ~dropped_flags
2931
2924
2932 destrevlog.addrevision(
2925 destrevlog.addrevision(
2933 text,
2926 text,
2934 tr,
2927 tr,
2935 linkrev,
2928 linkrev,
2936 p1,
2929 p1,
2937 p2,
2930 p2,
2938 cachedelta=cachedelta,
2931 cachedelta=cachedelta,
2939 node=node,
2932 node=node,
2940 flags=flags,
2933 flags=flags,
2941 deltacomputer=deltacomputer,
2934 deltacomputer=deltacomputer,
2942 sidedata=sidedata,
2935 sidedata=sidedata,
2943 )
2936 )
2944 else:
2937 else:
2945 if destrevlog._lazydelta:
2938 if destrevlog._lazydelta:
2946 dp = self.deltaparent(rev)
2939 dp = self.deltaparent(rev)
2947 if dp != nullrev:
2940 if dp != nullrev:
2948 cachedelta = (dp, bytes(self._chunk(rev)))
2941 cachedelta = (dp, bytes(self._chunk(rev)))
2949
2942
2950 if not cachedelta:
2943 if not cachedelta:
2951 rawtext = self.rawdata(rev)
2944 rawtext = self.rawdata(rev)
2952
2945
2953 ifh = destrevlog.opener(
2946 ifh = destrevlog.opener(
2954 destrevlog.indexfile, b'a+', checkambig=False
2947 destrevlog.indexfile, b'a+', checkambig=False
2955 )
2948 )
2956 dfh = None
2949 dfh = None
2957 if not destrevlog._inline:
2950 if not destrevlog._inline:
2958 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2951 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2959 try:
2952 try:
2960 destrevlog._addrevision(
2953 destrevlog._addrevision(
2961 node,
2954 node,
2962 rawtext,
2955 rawtext,
2963 tr,
2956 tr,
2964 linkrev,
2957 linkrev,
2965 p1,
2958 p1,
2966 p2,
2959 p2,
2967 flags,
2960 flags,
2968 cachedelta,
2961 cachedelta,
2969 ifh,
2962 ifh,
2970 dfh,
2963 dfh,
2971 deltacomputer=deltacomputer,
2964 deltacomputer=deltacomputer,
2972 )
2965 )
2973 finally:
2966 finally:
2974 if dfh:
2967 if dfh:
2975 dfh.close()
2968 dfh.close()
2976 ifh.close()
2969 ifh.close()
2977
2970
2978 if addrevisioncb:
2971 if addrevisioncb:
2979 addrevisioncb(self, rev, node)
2972 addrevisioncb(self, rev, node)
2980
2973
2981 def censorrevision(self, tr, censornode, tombstone=b''):
2974 def censorrevision(self, tr, censornode, tombstone=b''):
2982 if (self.version & 0xFFFF) == REVLOGV0:
2975 if (self.version & 0xFFFF) == REVLOGV0:
2983 raise error.RevlogError(
2976 raise error.RevlogError(
2984 _(b'cannot censor with version %d revlogs') % self.version
2977 _(b'cannot censor with version %d revlogs') % self.version
2985 )
2978 )
2986
2979
2987 censorrev = self.rev(censornode)
2980 censorrev = self.rev(censornode)
2988 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2981 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2989
2982
2990 if len(tombstone) > self.rawsize(censorrev):
2983 if len(tombstone) > self.rawsize(censorrev):
2991 raise error.Abort(
2984 raise error.Abort(
2992 _(b'censor tombstone must be no longer than censored data')
2985 _(b'censor tombstone must be no longer than censored data')
2993 )
2986 )
2994
2987
2995 # Rewriting the revlog in place is hard. Our strategy for censoring is
2988 # Rewriting the revlog in place is hard. Our strategy for censoring is
2996 # to create a new revlog, copy all revisions to it, then replace the
2989 # to create a new revlog, copy all revisions to it, then replace the
2997 # revlogs on transaction close.
2990 # revlogs on transaction close.
2998
2991
2999 newindexfile = self.indexfile + b'.tmpcensored'
2992 newindexfile = self.indexfile + b'.tmpcensored'
3000 newdatafile = self.datafile + b'.tmpcensored'
2993 newdatafile = self.datafile + b'.tmpcensored'
3001
2994
3002 # This is a bit dangerous. We could easily have a mismatch of state.
2995 # This is a bit dangerous. We could easily have a mismatch of state.
3003 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2996 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3004 newrl.version = self.version
2997 newrl.version = self.version
3005 newrl._generaldelta = self._generaldelta
2998 newrl._generaldelta = self._generaldelta
3006 newrl._io = self._io
2999 newrl._io = self._io
3007
3000
3008 for rev in self.revs():
3001 for rev in self.revs():
3009 node = self.node(rev)
3002 node = self.node(rev)
3010 p1, p2 = self.parents(node)
3003 p1, p2 = self.parents(node)
3011
3004
3012 if rev == censorrev:
3005 if rev == censorrev:
3013 newrl.addrawrevision(
3006 newrl.addrawrevision(
3014 tombstone,
3007 tombstone,
3015 tr,
3008 tr,
3016 self.linkrev(censorrev),
3009 self.linkrev(censorrev),
3017 p1,
3010 p1,
3018 p2,
3011 p2,
3019 censornode,
3012 censornode,
3020 REVIDX_ISCENSORED,
3013 REVIDX_ISCENSORED,
3021 )
3014 )
3022
3015
3023 if newrl.deltaparent(rev) != nullrev:
3016 if newrl.deltaparent(rev) != nullrev:
3024 raise error.Abort(
3017 raise error.Abort(
3025 _(
3018 _(
3026 b'censored revision stored as delta; '
3019 b'censored revision stored as delta; '
3027 b'cannot censor'
3020 b'cannot censor'
3028 ),
3021 ),
3029 hint=_(
3022 hint=_(
3030 b'censoring of revlogs is not '
3023 b'censoring of revlogs is not '
3031 b'fully implemented; please report '
3024 b'fully implemented; please report '
3032 b'this bug'
3025 b'this bug'
3033 ),
3026 ),
3034 )
3027 )
3035 continue
3028 continue
3036
3029
3037 if self.iscensored(rev):
3030 if self.iscensored(rev):
3038 if self.deltaparent(rev) != nullrev:
3031 if self.deltaparent(rev) != nullrev:
3039 raise error.Abort(
3032 raise error.Abort(
3040 _(
3033 _(
3041 b'cannot censor due to censored '
3034 b'cannot censor due to censored '
3042 b'revision having delta stored'
3035 b'revision having delta stored'
3043 )
3036 )
3044 )
3037 )
3045 rawtext = self._chunk(rev)
3038 rawtext = self._chunk(rev)
3046 else:
3039 else:
3047 rawtext = self.rawdata(rev)
3040 rawtext = self.rawdata(rev)
3048
3041
3049 newrl.addrawrevision(
3042 newrl.addrawrevision(
3050 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3043 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3051 )
3044 )
3052
3045
3053 tr.addbackup(self.indexfile, location=b'store')
3046 tr.addbackup(self.indexfile, location=b'store')
3054 if not self._inline:
3047 if not self._inline:
3055 tr.addbackup(self.datafile, location=b'store')
3048 tr.addbackup(self.datafile, location=b'store')
3056
3049
3057 self.opener.rename(newrl.indexfile, self.indexfile)
3050 self.opener.rename(newrl.indexfile, self.indexfile)
3058 if not self._inline:
3051 if not self._inline:
3059 self.opener.rename(newrl.datafile, self.datafile)
3052 self.opener.rename(newrl.datafile, self.datafile)
3060
3053
3061 self.clearcaches()
3054 self.clearcaches()
3062 self._loadindex()
3055 self._loadindex()
3063
3056
3064 def verifyintegrity(self, state):
3057 def verifyintegrity(self, state):
3065 """Verifies the integrity of the revlog.
3058 """Verifies the integrity of the revlog.
3066
3059
3067 Yields ``revlogproblem`` instances describing problems that are
3060 Yields ``revlogproblem`` instances describing problems that are
3068 found.
3061 found.
3069 """
3062 """
3070 dd, di = self.checksize()
3063 dd, di = self.checksize()
3071 if dd:
3064 if dd:
3072 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3065 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3073 if di:
3066 if di:
3074 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3067 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3075
3068
3076 version = self.version & 0xFFFF
3069 version = self.version & 0xFFFF
3077
3070
3078 # The verifier tells us what version revlog we should be.
3071 # The verifier tells us what version revlog we should be.
3079 if version != state[b'expectedversion']:
3072 if version != state[b'expectedversion']:
3080 yield revlogproblem(
3073 yield revlogproblem(
3081 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3074 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3082 % (self.indexfile, version, state[b'expectedversion'])
3075 % (self.indexfile, version, state[b'expectedversion'])
3083 )
3076 )
3084
3077
3085 state[b'skipread'] = set()
3078 state[b'skipread'] = set()
3086 state[b'safe_renamed'] = set()
3079 state[b'safe_renamed'] = set()
3087
3080
3088 for rev in self:
3081 for rev in self:
3089 node = self.node(rev)
3082 node = self.node(rev)
3090
3083
3091 # Verify contents. 4 cases to care about:
3084 # Verify contents. 4 cases to care about:
3092 #
3085 #
3093 # common: the most common case
3086 # common: the most common case
3094 # rename: with a rename
3087 # rename: with a rename
3095 # meta: file content starts with b'\1\n', the metadata
3088 # meta: file content starts with b'\1\n', the metadata
3096 # header defined in filelog.py, but without a rename
3089 # header defined in filelog.py, but without a rename
3097 # ext: content stored externally
3090 # ext: content stored externally
3098 #
3091 #
3099 # More formally, their differences are shown below:
3092 # More formally, their differences are shown below:
3100 #
3093 #
3101 # | common | rename | meta | ext
3094 # | common | rename | meta | ext
3102 # -------------------------------------------------------
3095 # -------------------------------------------------------
3103 # flags() | 0 | 0 | 0 | not 0
3096 # flags() | 0 | 0 | 0 | not 0
3104 # renamed() | False | True | False | ?
3097 # renamed() | False | True | False | ?
3105 # rawtext[0:2]=='\1\n'| False | True | True | ?
3098 # rawtext[0:2]=='\1\n'| False | True | True | ?
3106 #
3099 #
3107 # "rawtext" means the raw text stored in revlog data, which
3100 # "rawtext" means the raw text stored in revlog data, which
3108 # could be retrieved by "rawdata(rev)". "text"
3101 # could be retrieved by "rawdata(rev)". "text"
3109 # mentioned below is "revision(rev)".
3102 # mentioned below is "revision(rev)".
3110 #
3103 #
3111 # There are 3 different lengths stored physically:
3104 # There are 3 different lengths stored physically:
3112 # 1. L1: rawsize, stored in revlog index
3105 # 1. L1: rawsize, stored in revlog index
3113 # 2. L2: len(rawtext), stored in revlog data
3106 # 2. L2: len(rawtext), stored in revlog data
3114 # 3. L3: len(text), stored in revlog data if flags==0, or
3107 # 3. L3: len(text), stored in revlog data if flags==0, or
3115 # possibly somewhere else if flags!=0
3108 # possibly somewhere else if flags!=0
3116 #
3109 #
3117 # L1 should be equal to L2. L3 could be different from them.
3110 # L1 should be equal to L2. L3 could be different from them.
3118 # "text" may or may not affect commit hash depending on flag
3111 # "text" may or may not affect commit hash depending on flag
3119 # processors (see flagutil.addflagprocessor).
3112 # processors (see flagutil.addflagprocessor).
3120 #
3113 #
3121 # | common | rename | meta | ext
3114 # | common | rename | meta | ext
3122 # -------------------------------------------------
3115 # -------------------------------------------------
3123 # rawsize() | L1 | L1 | L1 | L1
3116 # rawsize() | L1 | L1 | L1 | L1
3124 # size() | L1 | L2-LM | L1(*) | L1 (?)
3117 # size() | L1 | L2-LM | L1(*) | L1 (?)
3125 # len(rawtext) | L2 | L2 | L2 | L2
3118 # len(rawtext) | L2 | L2 | L2 | L2
3126 # len(text) | L2 | L2 | L2 | L3
3119 # len(text) | L2 | L2 | L2 | L3
3127 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3120 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3128 #
3121 #
3129 # LM: length of metadata, depending on rawtext
3122 # LM: length of metadata, depending on rawtext
3130 # (*): not ideal, see comment in filelog.size
3123 # (*): not ideal, see comment in filelog.size
3131 # (?): could be "- len(meta)" if the resolved content has
3124 # (?): could be "- len(meta)" if the resolved content has
3132 # rename metadata
3125 # rename metadata
3133 #
3126 #
3134 # Checks needed to be done:
3127 # Checks needed to be done:
3135 # 1. length check: L1 == L2, in all cases.
3128 # 1. length check: L1 == L2, in all cases.
3136 # 2. hash check: depending on flag processor, we may need to
3129 # 2. hash check: depending on flag processor, we may need to
3137 # use either "text" (external), or "rawtext" (in revlog).
3130 # use either "text" (external), or "rawtext" (in revlog).
3138
3131
3139 try:
3132 try:
3140 skipflags = state.get(b'skipflags', 0)
3133 skipflags = state.get(b'skipflags', 0)
3141 if skipflags:
3134 if skipflags:
3142 skipflags &= self.flags(rev)
3135 skipflags &= self.flags(rev)
3143
3136
3144 _verify_revision(self, skipflags, state, node)
3137 _verify_revision(self, skipflags, state, node)
3145
3138
3146 l1 = self.rawsize(rev)
3139 l1 = self.rawsize(rev)
3147 l2 = len(self.rawdata(node))
3140 l2 = len(self.rawdata(node))
3148
3141
3149 if l1 != l2:
3142 if l1 != l2:
3150 yield revlogproblem(
3143 yield revlogproblem(
3151 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3144 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3152 node=node,
3145 node=node,
3153 )
3146 )
3154
3147
3155 except error.CensoredNodeError:
3148 except error.CensoredNodeError:
3156 if state[b'erroroncensored']:
3149 if state[b'erroroncensored']:
3157 yield revlogproblem(
3150 yield revlogproblem(
3158 error=_(b'censored file data'), node=node
3151 error=_(b'censored file data'), node=node
3159 )
3152 )
3160 state[b'skipread'].add(node)
3153 state[b'skipread'].add(node)
3161 except Exception as e:
3154 except Exception as e:
3162 yield revlogproblem(
3155 yield revlogproblem(
3163 error=_(b'unpacking %s: %s')
3156 error=_(b'unpacking %s: %s')
3164 % (short(node), stringutil.forcebytestr(e)),
3157 % (short(node), stringutil.forcebytestr(e)),
3165 node=node,
3158 node=node,
3166 )
3159 )
3167 state[b'skipread'].add(node)
3160 state[b'skipread'].add(node)
3168
3161
3169 def storageinfo(
3162 def storageinfo(
3170 self,
3163 self,
3171 exclusivefiles=False,
3164 exclusivefiles=False,
3172 sharedfiles=False,
3165 sharedfiles=False,
3173 revisionscount=False,
3166 revisionscount=False,
3174 trackedsize=False,
3167 trackedsize=False,
3175 storedsize=False,
3168 storedsize=False,
3176 ):
3169 ):
3177 d = {}
3170 d = {}
3178
3171
3179 if exclusivefiles:
3172 if exclusivefiles:
3180 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3173 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3181 if not self._inline:
3174 if not self._inline:
3182 d[b'exclusivefiles'].append((self.opener, self.datafile))
3175 d[b'exclusivefiles'].append((self.opener, self.datafile))
3183
3176
3184 if sharedfiles:
3177 if sharedfiles:
3185 d[b'sharedfiles'] = []
3178 d[b'sharedfiles'] = []
3186
3179
3187 if revisionscount:
3180 if revisionscount:
3188 d[b'revisionscount'] = len(self)
3181 d[b'revisionscount'] = len(self)
3189
3182
3190 if trackedsize:
3183 if trackedsize:
3191 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3184 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3192
3185
3193 if storedsize:
3186 if storedsize:
3194 d[b'storedsize'] = sum(
3187 d[b'storedsize'] = sum(
3195 self.opener.stat(path).st_size for path in self.files()
3188 self.opener.stat(path).st_size for path in self.files()
3196 )
3189 )
3197
3190
3198 return d
3191 return d
3199
3192
3200 def rewrite_sidedata(self, helpers, startrev, endrev):
3193 def rewrite_sidedata(self, helpers, startrev, endrev):
3201 if self.version & 0xFFFF != REVLOGV2:
3194 if self.version & 0xFFFF != REVLOGV2:
3202 return
3195 return
3203 # inline are not yet supported because they suffer from an issue when
3196 # inline are not yet supported because they suffer from an issue when
3204 # rewriting them (since it's not an append-only operation).
3197 # rewriting them (since it's not an append-only operation).
3205 # See issue6485.
3198 # See issue6485.
3206 assert not self._inline
3199 assert not self._inline
3207 if not helpers[1] and not helpers[2]:
3200 if not helpers[1] and not helpers[2]:
3208 # Nothing to generate or remove
3201 # Nothing to generate or remove
3209 return
3202 return
3210
3203
3211 new_entries = []
3204 new_entries = []
3212 # append the new sidedata
3205 # append the new sidedata
3213 with self._datafp(b'a+') as fp:
3206 with self._datafp(b'a+') as fp:
3214 # Maybe this bug still exists, see revlog._writeentry
3207 # Maybe this bug still exists, see revlog._writeentry
3215 fp.seek(0, os.SEEK_END)
3208 fp.seek(0, os.SEEK_END)
3216 current_offset = fp.tell()
3209 current_offset = fp.tell()
3217 for rev in range(startrev, endrev + 1):
3210 for rev in range(startrev, endrev + 1):
3218 entry = self.index[rev]
3211 entry = self.index[rev]
3219 new_sidedata = storageutil.run_sidedata_helpers(
3212 new_sidedata = storageutil.run_sidedata_helpers(
3220 store=self,
3213 store=self,
3221 sidedata_helpers=helpers,
3214 sidedata_helpers=helpers,
3222 sidedata={},
3215 sidedata={},
3223 rev=rev,
3216 rev=rev,
3224 )
3217 )
3225
3218
3226 serialized_sidedata = sidedatautil.serialize_sidedata(
3219 serialized_sidedata = sidedatautil.serialize_sidedata(
3227 new_sidedata
3220 new_sidedata
3228 )
3221 )
3229 if entry[8] != 0 or entry[9] != 0:
3222 if entry[8] != 0 or entry[9] != 0:
3230 # rewriting entries that already have sidedata is not
3223 # rewriting entries that already have sidedata is not
3231 # supported yet, because it introduces garbage data in the
3224 # supported yet, because it introduces garbage data in the
3232 # revlog.
3225 # revlog.
3233 msg = b"Rewriting existing sidedata is not supported yet"
3226 msg = b"Rewriting existing sidedata is not supported yet"
3234 raise error.Abort(msg)
3227 raise error.Abort(msg)
3235 entry = entry[:8]
3228 entry = entry[:8]
3236 entry += (current_offset, len(serialized_sidedata))
3229 entry += (current_offset, len(serialized_sidedata))
3237
3230
3238 fp.write(serialized_sidedata)
3231 fp.write(serialized_sidedata)
3239 new_entries.append(entry)
3232 new_entries.append(entry)
3240 current_offset += len(serialized_sidedata)
3233 current_offset += len(serialized_sidedata)
3241
3234
3242 # rewrite the new index entries
3235 # rewrite the new index entries
3243 with self._indexfp(b'w+') as fp:
3236 with self._indexfp(b'w+') as fp:
3244 fp.seek(startrev * self._io.size)
3237 fp.seek(startrev * self.index.entry_size)
3245 for i, entry in enumerate(new_entries):
3238 for i, entry in enumerate(new_entries):
3246 rev = startrev + i
3239 rev = startrev + i
3247 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3240 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3248 packed = self._io.packentry(entry, self.node, self.version, rev)
3241 packed = self._io.packentry(entry, self.node, self.version, rev)
3249 fp.write(packed)
3242 fp.write(packed)
@@ -1,490 +1,494 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 cindex,
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 };
11 };
12 use cpython::{
12 use cpython::{
13 buffer::{Element, PyBuffer},
13 buffer::{Element, PyBuffer},
14 exc::{IndexError, ValueError},
14 exc::{IndexError, ValueError},
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 };
17 };
18 use hg::{
18 use hg::{
19 nodemap::{Block, NodeMapError, NodeTree},
19 nodemap::{Block, NodeMapError, NodeTree},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 Revision,
21 Revision,
22 };
22 };
23 use std::cell::RefCell;
23 use std::cell::RefCell;
24
24
25 /// Return a Struct implementing the Graph trait
25 /// Return a Struct implementing the Graph trait
26 pub(crate) fn pyindex_to_graph(
26 pub(crate) fn pyindex_to_graph(
27 py: Python,
27 py: Python,
28 index: PyObject,
28 index: PyObject,
29 ) -> PyResult<cindex::Index> {
29 ) -> PyResult<cindex::Index> {
30 match index.extract::<MixedIndex>(py) {
30 match index.extract::<MixedIndex>(py) {
31 Ok(midx) => Ok(midx.clone_cindex(py)),
31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 Err(_) => cindex::Index::new(py, index),
32 Err(_) => cindex::Index::new(py, index),
33 }
33 }
34 }
34 }
35
35
36 py_class!(pub class MixedIndex |py| {
36 py_class!(pub class MixedIndex |py| {
37 data cindex: RefCell<cindex::Index>;
37 data cindex: RefCell<cindex::Index>;
38 data nt: RefCell<Option<NodeTree>>;
38 data nt: RefCell<Option<NodeTree>>;
39 data docket: RefCell<Option<PyObject>>;
39 data docket: RefCell<Option<PyObject>>;
40 // Holds a reference to the mmap'ed persistent nodemap data
40 // Holds a reference to the mmap'ed persistent nodemap data
41 data mmap: RefCell<Option<PyBuffer>>;
41 data mmap: RefCell<Option<PyBuffer>>;
42
42
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 Self::new(py, cindex)
44 Self::new(py, cindex)
45 }
45 }
46
46
47 /// Compatibility layer used for Python consumers needing access to the C index
47 /// Compatibility layer used for Python consumers needing access to the C index
48 ///
48 ///
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 /// that may need to build a custom `nodetree`, based on a specified revset.
50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 /// this, by exposing our own standalone nodemap class,
52 /// this, by exposing our own standalone nodemap class,
53 /// ready to accept `MixedIndex`.
53 /// ready to accept `MixedIndex`.
54 def get_cindex(&self) -> PyResult<PyObject> {
54 def get_cindex(&self) -> PyResult<PyObject> {
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 }
56 }
57
57
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59
59
60 /// Return Revision if found, raises a bare `error.RevlogError`
60 /// Return Revision if found, raises a bare `error.RevlogError`
61 /// in case of ambiguity, same as C version does
61 /// in case of ambiguity, same as C version does
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 let opt = self.get_nodetree(py)?.borrow();
63 let opt = self.get_nodetree(py)?.borrow();
64 let nt = opt.as_ref().unwrap();
64 let nt = opt.as_ref().unwrap();
65 let idx = &*self.cindex(py).borrow();
65 let idx = &*self.cindex(py).borrow();
66 let node = node_from_py_bytes(py, &node)?;
66 let node = node_from_py_bytes(py, &node)?;
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 }
68 }
69
69
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 /// is not found.
71 /// is not found.
72 ///
72 ///
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 /// will catch and rewrap with it
74 /// will catch and rewrap with it
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 }
77 }
78
78
79 /// return True if the node exist in the index
79 /// return True if the node exist in the index
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 self.get_rev(py, node).map(|opt| opt.is_some())
81 self.get_rev(py, node).map(|opt| opt.is_some())
82 }
82 }
83
83
84 /// find length of shortest hex nodeid of a binary ID
84 /// find length of shortest hex nodeid of a binary ID
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 let opt = self.get_nodetree(py)?.borrow();
86 let opt = self.get_nodetree(py)?.borrow();
87 let nt = opt.as_ref().unwrap();
87 let nt = opt.as_ref().unwrap();
88 let idx = &*self.cindex(py).borrow();
88 let idx = &*self.cindex(py).borrow();
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 {
90 {
91 Ok(Some(l)) => Ok(l),
91 Ok(Some(l)) => Ok(l),
92 Ok(None) => Err(revlog_error(py)),
92 Ok(None) => Err(revlog_error(py)),
93 Err(e) => Err(nodemap_error(py, e)),
93 Err(e) => Err(nodemap_error(py, e)),
94 }
94 }
95 }
95 }
96
96
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 let opt = self.get_nodetree(py)?.borrow();
98 let opt = self.get_nodetree(py)?.borrow();
99 let nt = opt.as_ref().unwrap();
99 let nt = opt.as_ref().unwrap();
100 let idx = &*self.cindex(py).borrow();
100 let idx = &*self.cindex(py).borrow();
101
101
102 let node_as_string = if cfg!(feature = "python3-sys") {
102 let node_as_string = if cfg!(feature = "python3-sys") {
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 }
104 }
105 else {
105 else {
106 let node = node.extract::<PyBytes>(py)?;
106 let node = node.extract::<PyBytes>(py)?;
107 String::from_utf8_lossy(node.data(py)).to_string()
107 String::from_utf8_lossy(node.data(py)).to_string()
108 };
108 };
109
109
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111
111
112 nt.find_bin(idx, prefix)
112 nt.find_bin(idx, prefix)
113 // TODO make an inner API returning the node directly
113 // TODO make an inner API returning the node directly
114 .map(|opt| opt.map(
114 .map(|opt| opt.map(
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 .map_err(|e| nodemap_error(py, e))
116 .map_err(|e| nodemap_error(py, e))
117
117
118 }
118 }
119
119
120 /// append an index entry
120 /// append an index entry
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 if tup.len(py) < 8 {
122 if tup.len(py) < 8 {
123 // this is better than the panic promised by tup.get_item()
123 // this is better than the panic promised by tup.get_item()
124 return Err(
124 return Err(
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 }
126 }
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 let node = node_from_py_object(py, &node_bytes)?;
128 let node = node_from_py_object(py, &node_bytes)?;
129
129
130 let mut idx = self.cindex(py).borrow_mut();
130 let mut idx = self.cindex(py).borrow_mut();
131 let rev = idx.len() as Revision;
131 let rev = idx.len() as Revision;
132
132
133 idx.append(py, tup)?;
133 idx.append(py, tup)?;
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 .insert(&*idx, &node, rev)
135 .insert(&*idx, &node, rev)
136 .map_err(|e| nodemap_error(py, e))?;
136 .map_err(|e| nodemap_error(py, e))?;
137 Ok(py.None())
137 Ok(py.None())
138 }
138 }
139
139
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 self.cindex(py).borrow().inner().del_item(py, key)?;
142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 let mut nt = opt.as_mut().unwrap();
144 let mut nt = opt.as_mut().unwrap();
145 nt.invalidate_all();
145 nt.invalidate_all();
146 self.fill_nodemap(py, &mut nt)?;
146 self.fill_nodemap(py, &mut nt)?;
147 Ok(())
147 Ok(())
148 }
148 }
149
149
150 //
150 //
151 // Reforwarded C index API
151 // Reforwarded C index API
152 //
152 //
153
153
154 // index_methods (tp_methods). Same ordering as in revlog.c
154 // index_methods (tp_methods). Same ordering as in revlog.c
155
155
156 /// return the gca set of the given revs
156 /// return the gca set of the given revs
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 self.call_cindex(py, "ancestors", args, kw)
158 self.call_cindex(py, "ancestors", args, kw)
159 }
159 }
160
160
161 /// return the heads of the common ancestors of the given revs
161 /// return the heads of the common ancestors of the given revs
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 self.call_cindex(py, "commonancestorsheads", args, kw)
163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 }
164 }
165
165
166 /// Clear the index caches and inner py_class data.
166 /// Clear the index caches and inner py_class data.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 self.nt(py).borrow_mut().take();
169 self.nt(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
172 self.call_cindex(py, "clearcaches", args, kw)
172 self.call_cindex(py, "clearcaches", args, kw)
173 }
173 }
174
174
175 /// get an index entry
175 /// get an index entry
176 def get(&self, *args, **kw) -> PyResult<PyObject> {
176 def get(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "get", args, kw)
177 self.call_cindex(py, "get", args, kw)
178 }
178 }
179
179
180 /// compute phases
180 /// compute phases
181 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
181 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
182 self.call_cindex(py, "computephasesmapsets", args, kw)
182 self.call_cindex(py, "computephasesmapsets", args, kw)
183 }
183 }
184
184
185 /// reachableroots
185 /// reachableroots
186 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
186 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
187 self.call_cindex(py, "reachableroots2", args, kw)
187 self.call_cindex(py, "reachableroots2", args, kw)
188 }
188 }
189
189
190 /// get head revisions
190 /// get head revisions
191 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
191 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
192 self.call_cindex(py, "headrevs", args, kw)
192 self.call_cindex(py, "headrevs", args, kw)
193 }
193 }
194
194
195 /// get filtered head revisions
195 /// get filtered head revisions
196 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
196 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
197 self.call_cindex(py, "headrevsfiltered", args, kw)
197 self.call_cindex(py, "headrevsfiltered", args, kw)
198 }
198 }
199
199
200 /// True if the object is a snapshot
200 /// True if the object is a snapshot
201 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
201 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
202 self.call_cindex(py, "issnapshot", args, kw)
202 self.call_cindex(py, "issnapshot", args, kw)
203 }
203 }
204
204
205 /// Gather snapshot data in a cache dict
205 /// Gather snapshot data in a cache dict
206 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
206 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
207 self.call_cindex(py, "findsnapshots", args, kw)
207 self.call_cindex(py, "findsnapshots", args, kw)
208 }
208 }
209
209
210 /// determine revisions with deltas to reconstruct fulltext
210 /// determine revisions with deltas to reconstruct fulltext
211 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
211 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
212 self.call_cindex(py, "deltachain", args, kw)
212 self.call_cindex(py, "deltachain", args, kw)
213 }
213 }
214
214
215 /// slice planned chunk read to reach a density threshold
215 /// slice planned chunk read to reach a density threshold
216 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
216 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
217 self.call_cindex(py, "slicechunktodensity", args, kw)
217 self.call_cindex(py, "slicechunktodensity", args, kw)
218 }
218 }
219
219
220 /// stats for the index
220 /// stats for the index
221 def stats(&self, *args, **kw) -> PyResult<PyObject> {
221 def stats(&self, *args, **kw) -> PyResult<PyObject> {
222 self.call_cindex(py, "stats", args, kw)
222 self.call_cindex(py, "stats", args, kw)
223 }
223 }
224
224
225 // index_sequence_methods and index_mapping_methods.
225 // index_sequence_methods and index_mapping_methods.
226 //
226 //
227 // Since we call back through the high level Python API,
227 // Since we call back through the high level Python API,
228 // there's no point making a distinction between index_get
228 // there's no point making a distinction between index_get
229 // and index_getitem.
229 // and index_getitem.
230
230
231 def __len__(&self) -> PyResult<usize> {
231 def __len__(&self) -> PyResult<usize> {
232 self.cindex(py).borrow().inner().len(py)
232 self.cindex(py).borrow().inner().len(py)
233 }
233 }
234
234
235 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
235 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
236 // this conversion seems needless, but that's actually because
236 // this conversion seems needless, but that's actually because
237 // `index_getitem` does not handle conversion from PyLong,
237 // `index_getitem` does not handle conversion from PyLong,
238 // which expressions such as [e for e in index] internally use.
238 // which expressions such as [e for e in index] internally use.
239 // Note that we don't seem to have a direct way to call
239 // Note that we don't seem to have a direct way to call
240 // PySequence_GetItem (does the job), which would possibly be better
240 // PySequence_GetItem (does the job), which would possibly be better
241 // for performance
241 // for performance
242 let key = match key.extract::<Revision>(py) {
242 let key = match key.extract::<Revision>(py) {
243 Ok(rev) => rev.to_py_object(py).into_object(),
243 Ok(rev) => rev.to_py_object(py).into_object(),
244 Err(_) => key,
244 Err(_) => key,
245 };
245 };
246 self.cindex(py).borrow().inner().get_item(py, key)
246 self.cindex(py).borrow().inner().get_item(py, key)
247 }
247 }
248
248
249 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
249 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
250 self.cindex(py).borrow().inner().set_item(py, key, value)
250 self.cindex(py).borrow().inner().set_item(py, key, value)
251 }
251 }
252
252
253 def __contains__(&self, item: PyObject) -> PyResult<bool> {
253 def __contains__(&self, item: PyObject) -> PyResult<bool> {
254 // ObjectProtocol does not seem to provide contains(), so
254 // ObjectProtocol does not seem to provide contains(), so
255 // this is an equivalent implementation of the index_contains()
255 // this is an equivalent implementation of the index_contains()
256 // defined in revlog.c
256 // defined in revlog.c
257 let cindex = self.cindex(py).borrow();
257 let cindex = self.cindex(py).borrow();
258 match item.extract::<Revision>(py) {
258 match item.extract::<Revision>(py) {
259 Ok(rev) => {
259 Ok(rev) => {
260 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
260 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
261 }
261 }
262 Err(_) => {
262 Err(_) => {
263 cindex.inner().call_method(
263 cindex.inner().call_method(
264 py,
264 py,
265 "has_node",
265 "has_node",
266 PyTuple::new(py, &[item]),
266 PyTuple::new(py, &[item]),
267 None)?
267 None)?
268 .extract(py)
268 .extract(py)
269 }
269 }
270 }
270 }
271 }
271 }
272
272
273 def nodemap_data_all(&self) -> PyResult<PyBytes> {
273 def nodemap_data_all(&self) -> PyResult<PyBytes> {
274 self.inner_nodemap_data_all(py)
274 self.inner_nodemap_data_all(py)
275 }
275 }
276
276
277 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
277 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
278 self.inner_nodemap_data_incremental(py)
278 self.inner_nodemap_data_incremental(py)
279 }
279 }
280 def update_nodemap_data(
280 def update_nodemap_data(
281 &self,
281 &self,
282 docket: PyObject,
282 docket: PyObject,
283 nm_data: PyObject
283 nm_data: PyObject
284 ) -> PyResult<PyObject> {
284 ) -> PyResult<PyObject> {
285 self.inner_update_nodemap_data(py, docket, nm_data)
285 self.inner_update_nodemap_data(py, docket, nm_data)
286 }
286 }
287
287
288 @property
289 def entry_size(&self) -> PyResult<PyInt> {
290 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
291 }
288
292
289 });
293 });
290
294
291 impl MixedIndex {
295 impl MixedIndex {
292 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
296 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
293 Self::create_instance(
297 Self::create_instance(
294 py,
298 py,
295 RefCell::new(cindex::Index::new(py, cindex)?),
299 RefCell::new(cindex::Index::new(py, cindex)?),
296 RefCell::new(None),
300 RefCell::new(None),
297 RefCell::new(None),
301 RefCell::new(None),
298 RefCell::new(None),
302 RefCell::new(None),
299 )
303 )
300 }
304 }
301
305
302 /// This is scaffolding at this point, but it could also become
306 /// This is scaffolding at this point, but it could also become
303 /// a way to start a persistent nodemap or perform a
307 /// a way to start a persistent nodemap or perform a
304 /// vacuum / repack operation
308 /// vacuum / repack operation
305 fn fill_nodemap(
309 fn fill_nodemap(
306 &self,
310 &self,
307 py: Python,
311 py: Python,
308 nt: &mut NodeTree,
312 nt: &mut NodeTree,
309 ) -> PyResult<PyObject> {
313 ) -> PyResult<PyObject> {
310 let index = self.cindex(py).borrow();
314 let index = self.cindex(py).borrow();
311 for r in 0..index.len() {
315 for r in 0..index.len() {
312 let rev = r as Revision;
316 let rev = r as Revision;
313 // in this case node() won't ever return None
317 // in this case node() won't ever return None
314 nt.insert(&*index, index.node(rev).unwrap(), rev)
318 nt.insert(&*index, index.node(rev).unwrap(), rev)
315 .map_err(|e| nodemap_error(py, e))?
319 .map_err(|e| nodemap_error(py, e))?
316 }
320 }
317 Ok(py.None())
321 Ok(py.None())
318 }
322 }
319
323
320 fn get_nodetree<'a>(
324 fn get_nodetree<'a>(
321 &'a self,
325 &'a self,
322 py: Python<'a>,
326 py: Python<'a>,
323 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
327 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
324 if self.nt(py).borrow().is_none() {
328 if self.nt(py).borrow().is_none() {
325 let readonly = Box::new(Vec::new());
329 let readonly = Box::new(Vec::new());
326 let mut nt = NodeTree::load_bytes(readonly, 0);
330 let mut nt = NodeTree::load_bytes(readonly, 0);
327 self.fill_nodemap(py, &mut nt)?;
331 self.fill_nodemap(py, &mut nt)?;
328 self.nt(py).borrow_mut().replace(nt);
332 self.nt(py).borrow_mut().replace(nt);
329 }
333 }
330 Ok(self.nt(py))
334 Ok(self.nt(py))
331 }
335 }
332
336
333 /// forward a method call to the underlying C index
337 /// forward a method call to the underlying C index
334 fn call_cindex(
338 fn call_cindex(
335 &self,
339 &self,
336 py: Python,
340 py: Python,
337 name: &str,
341 name: &str,
338 args: &PyTuple,
342 args: &PyTuple,
339 kwargs: Option<&PyDict>,
343 kwargs: Option<&PyDict>,
340 ) -> PyResult<PyObject> {
344 ) -> PyResult<PyObject> {
341 self.cindex(py)
345 self.cindex(py)
342 .borrow()
346 .borrow()
343 .inner()
347 .inner()
344 .call_method(py, name, args, kwargs)
348 .call_method(py, name, args, kwargs)
345 }
349 }
346
350
347 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
351 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
348 self.cindex(py).borrow().clone_ref(py)
352 self.cindex(py).borrow().clone_ref(py)
349 }
353 }
350
354
351 /// Returns the full nodemap bytes to be written as-is to disk
355 /// Returns the full nodemap bytes to be written as-is to disk
352 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
356 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
353 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
357 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
354 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
358 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
355
359
356 // If there's anything readonly, we need to build the data again from
360 // If there's anything readonly, we need to build the data again from
357 // scratch
361 // scratch
358 let bytes = if readonly.len() > 0 {
362 let bytes = if readonly.len() > 0 {
359 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
363 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
360 self.fill_nodemap(py, &mut nt)?;
364 self.fill_nodemap(py, &mut nt)?;
361
365
362 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
366 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
363 assert_eq!(readonly.len(), 0);
367 assert_eq!(readonly.len(), 0);
364
368
365 bytes
369 bytes
366 } else {
370 } else {
367 bytes
371 bytes
368 };
372 };
369
373
370 let bytes = PyBytes::new(py, &bytes);
374 let bytes = PyBytes::new(py, &bytes);
371 Ok(bytes)
375 Ok(bytes)
372 }
376 }
373
377
374 /// Returns the last saved docket along with the size of any changed data
378 /// Returns the last saved docket along with the size of any changed data
375 /// (in number of blocks), and said data as bytes.
379 /// (in number of blocks), and said data as bytes.
376 fn inner_nodemap_data_incremental(
380 fn inner_nodemap_data_incremental(
377 &self,
381 &self,
378 py: Python,
382 py: Python,
379 ) -> PyResult<PyObject> {
383 ) -> PyResult<PyObject> {
380 let docket = self.docket(py).borrow();
384 let docket = self.docket(py).borrow();
381 let docket = match docket.as_ref() {
385 let docket = match docket.as_ref() {
382 Some(d) => d,
386 Some(d) => d,
383 None => return Ok(py.None()),
387 None => return Ok(py.None()),
384 };
388 };
385
389
386 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
390 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
387 let masked_blocks = node_tree.masked_readonly_blocks();
391 let masked_blocks = node_tree.masked_readonly_blocks();
388 let (_, data) = node_tree.into_readonly_and_added_bytes();
392 let (_, data) = node_tree.into_readonly_and_added_bytes();
389 let changed = masked_blocks * std::mem::size_of::<Block>();
393 let changed = masked_blocks * std::mem::size_of::<Block>();
390
394
391 Ok((docket, changed, PyBytes::new(py, &data))
395 Ok((docket, changed, PyBytes::new(py, &data))
392 .to_py_object(py)
396 .to_py_object(py)
393 .into_object())
397 .into_object())
394 }
398 }
395
399
396 /// Update the nodemap from the new (mmaped) data.
400 /// Update the nodemap from the new (mmaped) data.
397 /// The docket is kept as a reference for later incremental calls.
401 /// The docket is kept as a reference for later incremental calls.
398 fn inner_update_nodemap_data(
402 fn inner_update_nodemap_data(
399 &self,
403 &self,
400 py: Python,
404 py: Python,
401 docket: PyObject,
405 docket: PyObject,
402 nm_data: PyObject,
406 nm_data: PyObject,
403 ) -> PyResult<PyObject> {
407 ) -> PyResult<PyObject> {
404 let buf = PyBuffer::get(py, &nm_data)?;
408 let buf = PyBuffer::get(py, &nm_data)?;
405 let len = buf.item_count();
409 let len = buf.item_count();
406
410
407 // Build a slice from the mmap'ed buffer data
411 // Build a slice from the mmap'ed buffer data
408 let cbuf = buf.buf_ptr();
412 let cbuf = buf.buf_ptr();
409 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
413 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
410 && buf.is_c_contiguous()
414 && buf.is_c_contiguous()
411 && u8::is_compatible_format(buf.format())
415 && u8::is_compatible_format(buf.format())
412 {
416 {
413 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
417 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
414 } else {
418 } else {
415 return Err(PyErr::new::<ValueError, _>(
419 return Err(PyErr::new::<ValueError, _>(
416 py,
420 py,
417 "Nodemap data buffer has an invalid memory representation"
421 "Nodemap data buffer has an invalid memory representation"
418 .to_string(),
422 .to_string(),
419 ));
423 ));
420 };
424 };
421
425
422 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
426 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
423 // pointer.
427 // pointer.
424 self.mmap(py).borrow_mut().replace(buf);
428 self.mmap(py).borrow_mut().replace(buf);
425
429
426 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
430 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
427
431
428 let data_tip =
432 let data_tip =
429 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
433 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
430 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
434 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
431 let idx = self.cindex(py).borrow();
435 let idx = self.cindex(py).borrow();
432 let current_tip = idx.len();
436 let current_tip = idx.len();
433
437
434 for r in (data_tip + 1)..current_tip as Revision {
438 for r in (data_tip + 1)..current_tip as Revision {
435 let rev = r as Revision;
439 let rev = r as Revision;
436 // in this case node() won't ever return None
440 // in this case node() won't ever return None
437 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
441 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
438 .map_err(|e| nodemap_error(py, e))?
442 .map_err(|e| nodemap_error(py, e))?
439 }
443 }
440
444
441 *self.nt(py).borrow_mut() = Some(nt);
445 *self.nt(py).borrow_mut() = Some(nt);
442
446
443 Ok(py.None())
447 Ok(py.None())
444 }
448 }
445 }
449 }
446
450
447 fn revlog_error(py: Python) -> PyErr {
451 fn revlog_error(py: Python) -> PyErr {
448 match py
452 match py
449 .import("mercurial.error")
453 .import("mercurial.error")
450 .and_then(|m| m.get(py, "RevlogError"))
454 .and_then(|m| m.get(py, "RevlogError"))
451 {
455 {
452 Err(e) => e,
456 Err(e) => e,
453 Ok(cls) => PyErr::from_instance(py, cls),
457 Ok(cls) => PyErr::from_instance(py, cls),
454 }
458 }
455 }
459 }
456
460
457 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
461 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
458 PyErr::new::<ValueError, _>(
462 PyErr::new::<ValueError, _>(
459 py,
463 py,
460 format!(
464 format!(
461 "Inconsistency: Revision {} found in nodemap \
465 "Inconsistency: Revision {} found in nodemap \
462 is not in revlog index",
466 is not in revlog index",
463 rev
467 rev
464 ),
468 ),
465 )
469 )
466 }
470 }
467
471
468 /// Standard treatment of NodeMapError
472 /// Standard treatment of NodeMapError
469 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
473 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
470 match err {
474 match err {
471 NodeMapError::MultipleResults => revlog_error(py),
475 NodeMapError::MultipleResults => revlog_error(py),
472 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
476 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
473 }
477 }
474 }
478 }
475
479
476 /// Create the module, with __package__ given from parent
480 /// Create the module, with __package__ given from parent
477 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
481 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
478 let dotted_name = &format!("{}.revlog", package);
482 let dotted_name = &format!("{}.revlog", package);
479 let m = PyModule::new(py, dotted_name)?;
483 let m = PyModule::new(py, dotted_name)?;
480 m.add(py, "__package__", package)?;
484 m.add(py, "__package__", package)?;
481 m.add(py, "__doc__", "RevLog - Rust implementations")?;
485 m.add(py, "__doc__", "RevLog - Rust implementations")?;
482
486
483 m.add_class::<MixedIndex>(py)?;
487 m.add_class::<MixedIndex>(py)?;
484
488
485 let sys = PyModule::import(py, "sys")?;
489 let sys = PyModule::import(py, "sys")?;
486 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
490 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
487 sys_modules.set_item(py, dotted_name, &m)?;
491 sys_modules.set_item(py, dotted_name, &m)?;
488
492
489 Ok(m)
493 Ok(m)
490 }
494 }
General Comments 0
You need to be logged in to leave comments. Login now