##// END OF EJS Templates
revlog: add the option to track the expected compression upper bound...
marmoute -
r42662:bc4373ba default
parent child Browse files
Show More
@@ -1,3059 +1,3063 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance
2 '''helper extension to measure performance
3
3
4 Configurations
4 Configurations
5 ==============
5 ==============
6
6
7 ``perf``
7 ``perf``
8 --------
8 --------
9
9
10 ``all-timing``
10 ``all-timing``
11 When set, additional statistics will be reported for each benchmark: best,
11 When set, additional statistics will be reported for each benchmark: best,
12 worst, median average. If not set only the best timing is reported
12 worst, median average. If not set only the best timing is reported
13 (default: off).
13 (default: off).
14
14
15 ``presleep``
15 ``presleep``
16 number of second to wait before any group of runs (default: 1)
16 number of second to wait before any group of runs (default: 1)
17
17
18 ``pre-run``
18 ``pre-run``
19 number of run to perform before starting measurement.
19 number of run to perform before starting measurement.
20
20
21 ``profile-benchmark``
21 ``profile-benchmark``
22 Enable profiling for the benchmarked section.
22 Enable profiling for the benchmarked section.
23 (The first iteration is benchmarked)
23 (The first iteration is benchmarked)
24
24
25 ``run-limits``
25 ``run-limits``
26 Control the number of runs each benchmark will perform. The option value
26 Control the number of runs each benchmark will perform. The option value
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 conditions are considered in order with the following logic:
28 conditions are considered in order with the following logic:
29
29
30 If benchmark has been running for <time> seconds, and we have performed
30 If benchmark has been running for <time> seconds, and we have performed
31 <numberofrun> iterations, stop the benchmark,
31 <numberofrun> iterations, stop the benchmark,
32
32
33 The default value is: `3.0-100, 10.0-3`
33 The default value is: `3.0-100, 10.0-3`
34
34
35 ``stub``
35 ``stub``
36 When set, benchmarks will only be run once, useful for testing
36 When set, benchmarks will only be run once, useful for testing
37 (default: off)
37 (default: off)
38 '''
38 '''
39
39
40 # "historical portability" policy of perf.py:
40 # "historical portability" policy of perf.py:
41 #
41 #
42 # We have to do:
42 # We have to do:
43 # - make perf.py "loadable" with as wide Mercurial version as possible
43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 # This doesn't mean that perf commands work correctly with that Mercurial.
44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 # - make historical perf command work correctly with as wide Mercurial
46 # - make historical perf command work correctly with as wide Mercurial
47 # version as possible
47 # version as possible
48 #
48 #
49 # We have to do, if possible with reasonable cost:
49 # We have to do, if possible with reasonable cost:
50 # - make recent perf command for historical feature work correctly
50 # - make recent perf command for historical feature work correctly
51 # with early Mercurial
51 # with early Mercurial
52 #
52 #
53 # We don't have to do:
53 # We don't have to do:
54 # - make perf command for recent feature work correctly with early
54 # - make perf command for recent feature work correctly with early
55 # Mercurial
55 # Mercurial
56
56
57 from __future__ import absolute_import
57 from __future__ import absolute_import
58 import contextlib
58 import contextlib
59 import functools
59 import functools
60 import gc
60 import gc
61 import os
61 import os
62 import random
62 import random
63 import shutil
63 import shutil
64 import struct
64 import struct
65 import sys
65 import sys
66 import tempfile
66 import tempfile
67 import threading
67 import threading
68 import time
68 import time
69 from mercurial import (
69 from mercurial import (
70 changegroup,
70 changegroup,
71 cmdutil,
71 cmdutil,
72 commands,
72 commands,
73 copies,
73 copies,
74 error,
74 error,
75 extensions,
75 extensions,
76 hg,
76 hg,
77 mdiff,
77 mdiff,
78 merge,
78 merge,
79 revlog,
79 revlog,
80 util,
80 util,
81 )
81 )
82
82
83 # for "historical portability":
83 # for "historical portability":
84 # try to import modules separately (in dict order), and ignore
84 # try to import modules separately (in dict order), and ignore
85 # failure, because these aren't available with early Mercurial
85 # failure, because these aren't available with early Mercurial
86 try:
86 try:
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 except ImportError:
88 except ImportError:
89 pass
89 pass
90 try:
90 try:
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 except ImportError:
92 except ImportError:
93 pass
93 pass
94 try:
94 try:
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 dir(registrar) # forcibly load it
96 dir(registrar) # forcibly load it
97 except ImportError:
97 except ImportError:
98 registrar = None
98 registrar = None
99 try:
99 try:
100 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
100 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
101 except ImportError:
101 except ImportError:
102 pass
102 pass
103 try:
103 try:
104 from mercurial.utils import repoviewutil # since 5.0
104 from mercurial.utils import repoviewutil # since 5.0
105 except ImportError:
105 except ImportError:
106 repoviewutil = None
106 repoviewutil = None
107 try:
107 try:
108 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
108 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
109 except ImportError:
109 except ImportError:
110 pass
110 pass
111 try:
111 try:
112 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
112 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
113 except ImportError:
113 except ImportError:
114 pass
114 pass
115
115
116 try:
116 try:
117 from mercurial import profiling
117 from mercurial import profiling
118 except ImportError:
118 except ImportError:
119 profiling = None
119 profiling = None
120
120
121 def identity(a):
121 def identity(a):
122 return a
122 return a
123
123
124 try:
124 try:
125 from mercurial import pycompat
125 from mercurial import pycompat
126 getargspec = pycompat.getargspec # added to module after 4.5
126 getargspec = pycompat.getargspec # added to module after 4.5
127 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
127 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
128 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
128 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
129 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
129 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
130 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
130 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
131 if pycompat.ispy3:
131 if pycompat.ispy3:
132 _maxint = sys.maxsize # per py3 docs for replacing maxint
132 _maxint = sys.maxsize # per py3 docs for replacing maxint
133 else:
133 else:
134 _maxint = sys.maxint
134 _maxint = sys.maxint
135 except (ImportError, AttributeError):
135 except (ImportError, AttributeError):
136 import inspect
136 import inspect
137 getargspec = inspect.getargspec
137 getargspec = inspect.getargspec
138 _byteskwargs = identity
138 _byteskwargs = identity
139 fsencode = identity # no py3 support
139 fsencode = identity # no py3 support
140 _maxint = sys.maxint # no py3 support
140 _maxint = sys.maxint # no py3 support
141 _sysstr = lambda x: x # no py3 support
141 _sysstr = lambda x: x # no py3 support
142 _xrange = xrange
142 _xrange = xrange
143
143
144 try:
144 try:
145 # 4.7+
145 # 4.7+
146 queue = pycompat.queue.Queue
146 queue = pycompat.queue.Queue
147 except (AttributeError, ImportError):
147 except (AttributeError, ImportError):
148 # <4.7.
148 # <4.7.
149 try:
149 try:
150 queue = pycompat.queue
150 queue = pycompat.queue
151 except (AttributeError, ImportError):
151 except (AttributeError, ImportError):
152 queue = util.queue
152 queue = util.queue
153
153
154 try:
154 try:
155 from mercurial import logcmdutil
155 from mercurial import logcmdutil
156 makelogtemplater = logcmdutil.maketemplater
156 makelogtemplater = logcmdutil.maketemplater
157 except (AttributeError, ImportError):
157 except (AttributeError, ImportError):
158 try:
158 try:
159 makelogtemplater = cmdutil.makelogtemplater
159 makelogtemplater = cmdutil.makelogtemplater
160 except (AttributeError, ImportError):
160 except (AttributeError, ImportError):
161 makelogtemplater = None
161 makelogtemplater = None
162
162
163 # for "historical portability":
163 # for "historical portability":
164 # define util.safehasattr forcibly, because util.safehasattr has been
164 # define util.safehasattr forcibly, because util.safehasattr has been
165 # available since 1.9.3 (or 94b200a11cf7)
165 # available since 1.9.3 (or 94b200a11cf7)
166 _undefined = object()
166 _undefined = object()
167 def safehasattr(thing, attr):
167 def safehasattr(thing, attr):
168 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
168 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
169 setattr(util, 'safehasattr', safehasattr)
169 setattr(util, 'safehasattr', safehasattr)
170
170
171 # for "historical portability":
171 # for "historical portability":
172 # define util.timer forcibly, because util.timer has been available
172 # define util.timer forcibly, because util.timer has been available
173 # since ae5d60bb70c9
173 # since ae5d60bb70c9
174 if safehasattr(time, 'perf_counter'):
174 if safehasattr(time, 'perf_counter'):
175 util.timer = time.perf_counter
175 util.timer = time.perf_counter
176 elif os.name == b'nt':
176 elif os.name == b'nt':
177 util.timer = time.clock
177 util.timer = time.clock
178 else:
178 else:
179 util.timer = time.time
179 util.timer = time.time
180
180
181 # for "historical portability":
181 # for "historical portability":
182 # use locally defined empty option list, if formatteropts isn't
182 # use locally defined empty option list, if formatteropts isn't
183 # available, because commands.formatteropts has been available since
183 # available, because commands.formatteropts has been available since
184 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
184 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
185 # available since 2.2 (or ae5f92e154d3)
185 # available since 2.2 (or ae5f92e154d3)
186 formatteropts = getattr(cmdutil, "formatteropts",
186 formatteropts = getattr(cmdutil, "formatteropts",
187 getattr(commands, "formatteropts", []))
187 getattr(commands, "formatteropts", []))
188
188
189 # for "historical portability":
189 # for "historical portability":
190 # use locally defined option list, if debugrevlogopts isn't available,
190 # use locally defined option list, if debugrevlogopts isn't available,
191 # because commands.debugrevlogopts has been available since 3.7 (or
191 # because commands.debugrevlogopts has been available since 3.7 (or
192 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
192 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
193 # since 1.9 (or a79fea6b3e77).
193 # since 1.9 (or a79fea6b3e77).
194 revlogopts = getattr(cmdutil, "debugrevlogopts",
194 revlogopts = getattr(cmdutil, "debugrevlogopts",
195 getattr(commands, "debugrevlogopts", [
195 getattr(commands, "debugrevlogopts", [
196 (b'c', b'changelog', False, (b'open changelog')),
196 (b'c', b'changelog', False, (b'open changelog')),
197 (b'm', b'manifest', False, (b'open manifest')),
197 (b'm', b'manifest', False, (b'open manifest')),
198 (b'', b'dir', False, (b'open directory manifest')),
198 (b'', b'dir', False, (b'open directory manifest')),
199 ]))
199 ]))
200
200
201 cmdtable = {}
201 cmdtable = {}
202
202
203 # for "historical portability":
203 # for "historical portability":
204 # define parsealiases locally, because cmdutil.parsealiases has been
204 # define parsealiases locally, because cmdutil.parsealiases has been
205 # available since 1.5 (or 6252852b4332)
205 # available since 1.5 (or 6252852b4332)
206 def parsealiases(cmd):
206 def parsealiases(cmd):
207 return cmd.split(b"|")
207 return cmd.split(b"|")
208
208
209 if safehasattr(registrar, 'command'):
209 if safehasattr(registrar, 'command'):
210 command = registrar.command(cmdtable)
210 command = registrar.command(cmdtable)
211 elif safehasattr(cmdutil, 'command'):
211 elif safehasattr(cmdutil, 'command'):
212 command = cmdutil.command(cmdtable)
212 command = cmdutil.command(cmdtable)
213 if b'norepo' not in getargspec(command).args:
213 if b'norepo' not in getargspec(command).args:
214 # for "historical portability":
214 # for "historical portability":
215 # wrap original cmdutil.command, because "norepo" option has
215 # wrap original cmdutil.command, because "norepo" option has
216 # been available since 3.1 (or 75a96326cecb)
216 # been available since 3.1 (or 75a96326cecb)
217 _command = command
217 _command = command
218 def command(name, options=(), synopsis=None, norepo=False):
218 def command(name, options=(), synopsis=None, norepo=False):
219 if norepo:
219 if norepo:
220 commands.norepo += b' %s' % b' '.join(parsealiases(name))
220 commands.norepo += b' %s' % b' '.join(parsealiases(name))
221 return _command(name, list(options), synopsis)
221 return _command(name, list(options), synopsis)
222 else:
222 else:
223 # for "historical portability":
223 # for "historical portability":
224 # define "@command" annotation locally, because cmdutil.command
224 # define "@command" annotation locally, because cmdutil.command
225 # has been available since 1.9 (or 2daa5179e73f)
225 # has been available since 1.9 (or 2daa5179e73f)
226 def command(name, options=(), synopsis=None, norepo=False):
226 def command(name, options=(), synopsis=None, norepo=False):
227 def decorator(func):
227 def decorator(func):
228 if synopsis:
228 if synopsis:
229 cmdtable[name] = func, list(options), synopsis
229 cmdtable[name] = func, list(options), synopsis
230 else:
230 else:
231 cmdtable[name] = func, list(options)
231 cmdtable[name] = func, list(options)
232 if norepo:
232 if norepo:
233 commands.norepo += b' %s' % b' '.join(parsealiases(name))
233 commands.norepo += b' %s' % b' '.join(parsealiases(name))
234 return func
234 return func
235 return decorator
235 return decorator
236
236
237 try:
237 try:
238 import mercurial.registrar
238 import mercurial.registrar
239 import mercurial.configitems
239 import mercurial.configitems
240 configtable = {}
240 configtable = {}
241 configitem = mercurial.registrar.configitem(configtable)
241 configitem = mercurial.registrar.configitem(configtable)
242 configitem(b'perf', b'presleep',
242 configitem(b'perf', b'presleep',
243 default=mercurial.configitems.dynamicdefault,
243 default=mercurial.configitems.dynamicdefault,
244 )
244 )
245 configitem(b'perf', b'stub',
245 configitem(b'perf', b'stub',
246 default=mercurial.configitems.dynamicdefault,
246 default=mercurial.configitems.dynamicdefault,
247 )
247 )
248 configitem(b'perf', b'parentscount',
248 configitem(b'perf', b'parentscount',
249 default=mercurial.configitems.dynamicdefault,
249 default=mercurial.configitems.dynamicdefault,
250 )
250 )
251 configitem(b'perf', b'all-timing',
251 configitem(b'perf', b'all-timing',
252 default=mercurial.configitems.dynamicdefault,
252 default=mercurial.configitems.dynamicdefault,
253 )
253 )
254 configitem(b'perf', b'pre-run',
254 configitem(b'perf', b'pre-run',
255 default=mercurial.configitems.dynamicdefault,
255 default=mercurial.configitems.dynamicdefault,
256 )
256 )
257 configitem(b'perf', b'profile-benchmark',
257 configitem(b'perf', b'profile-benchmark',
258 default=mercurial.configitems.dynamicdefault,
258 default=mercurial.configitems.dynamicdefault,
259 )
259 )
260 configitem(b'perf', b'run-limits',
260 configitem(b'perf', b'run-limits',
261 default=mercurial.configitems.dynamicdefault,
261 default=mercurial.configitems.dynamicdefault,
262 )
262 )
263 except (ImportError, AttributeError):
263 except (ImportError, AttributeError):
264 pass
264 pass
265
265
266 def getlen(ui):
266 def getlen(ui):
267 if ui.configbool(b"perf", b"stub", False):
267 if ui.configbool(b"perf", b"stub", False):
268 return lambda x: 1
268 return lambda x: 1
269 return len
269 return len
270
270
271 class noop(object):
271 class noop(object):
272 """dummy context manager"""
272 """dummy context manager"""
273 def __enter__(self):
273 def __enter__(self):
274 pass
274 pass
275 def __exit__(self, *args):
275 def __exit__(self, *args):
276 pass
276 pass
277
277
278 NOOPCTX = noop()
278 NOOPCTX = noop()
279
279
280 def gettimer(ui, opts=None):
280 def gettimer(ui, opts=None):
281 """return a timer function and formatter: (timer, formatter)
281 """return a timer function and formatter: (timer, formatter)
282
282
283 This function exists to gather the creation of formatter in a single
283 This function exists to gather the creation of formatter in a single
284 place instead of duplicating it in all performance commands."""
284 place instead of duplicating it in all performance commands."""
285
285
286 # enforce an idle period before execution to counteract power management
286 # enforce an idle period before execution to counteract power management
287 # experimental config: perf.presleep
287 # experimental config: perf.presleep
288 time.sleep(getint(ui, b"perf", b"presleep", 1))
288 time.sleep(getint(ui, b"perf", b"presleep", 1))
289
289
290 if opts is None:
290 if opts is None:
291 opts = {}
291 opts = {}
292 # redirect all to stderr unless buffer api is in use
292 # redirect all to stderr unless buffer api is in use
293 if not ui._buffers:
293 if not ui._buffers:
294 ui = ui.copy()
294 ui = ui.copy()
295 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
295 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
296 if uifout:
296 if uifout:
297 # for "historical portability":
297 # for "historical portability":
298 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
298 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
299 uifout.set(ui.ferr)
299 uifout.set(ui.ferr)
300
300
301 # get a formatter
301 # get a formatter
302 uiformatter = getattr(ui, 'formatter', None)
302 uiformatter = getattr(ui, 'formatter', None)
303 if uiformatter:
303 if uiformatter:
304 fm = uiformatter(b'perf', opts)
304 fm = uiformatter(b'perf', opts)
305 else:
305 else:
306 # for "historical portability":
306 # for "historical portability":
307 # define formatter locally, because ui.formatter has been
307 # define formatter locally, because ui.formatter has been
308 # available since 2.2 (or ae5f92e154d3)
308 # available since 2.2 (or ae5f92e154d3)
309 from mercurial import node
309 from mercurial import node
310 class defaultformatter(object):
310 class defaultformatter(object):
311 """Minimized composition of baseformatter and plainformatter
311 """Minimized composition of baseformatter and plainformatter
312 """
312 """
313 def __init__(self, ui, topic, opts):
313 def __init__(self, ui, topic, opts):
314 self._ui = ui
314 self._ui = ui
315 if ui.debugflag:
315 if ui.debugflag:
316 self.hexfunc = node.hex
316 self.hexfunc = node.hex
317 else:
317 else:
318 self.hexfunc = node.short
318 self.hexfunc = node.short
319 def __nonzero__(self):
319 def __nonzero__(self):
320 return False
320 return False
321 __bool__ = __nonzero__
321 __bool__ = __nonzero__
322 def startitem(self):
322 def startitem(self):
323 pass
323 pass
324 def data(self, **data):
324 def data(self, **data):
325 pass
325 pass
326 def write(self, fields, deftext, *fielddata, **opts):
326 def write(self, fields, deftext, *fielddata, **opts):
327 self._ui.write(deftext % fielddata, **opts)
327 self._ui.write(deftext % fielddata, **opts)
328 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
328 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
329 if cond:
329 if cond:
330 self._ui.write(deftext % fielddata, **opts)
330 self._ui.write(deftext % fielddata, **opts)
331 def plain(self, text, **opts):
331 def plain(self, text, **opts):
332 self._ui.write(text, **opts)
332 self._ui.write(text, **opts)
333 def end(self):
333 def end(self):
334 pass
334 pass
335 fm = defaultformatter(ui, b'perf', opts)
335 fm = defaultformatter(ui, b'perf', opts)
336
336
337 # stub function, runs code only once instead of in a loop
337 # stub function, runs code only once instead of in a loop
338 # experimental config: perf.stub
338 # experimental config: perf.stub
339 if ui.configbool(b"perf", b"stub", False):
339 if ui.configbool(b"perf", b"stub", False):
340 return functools.partial(stub_timer, fm), fm
340 return functools.partial(stub_timer, fm), fm
341
341
342 # experimental config: perf.all-timing
342 # experimental config: perf.all-timing
343 displayall = ui.configbool(b"perf", b"all-timing", False)
343 displayall = ui.configbool(b"perf", b"all-timing", False)
344
344
345 # experimental config: perf.run-limits
345 # experimental config: perf.run-limits
346 limitspec = ui.configlist(b"perf", b"run-limits", [])
346 limitspec = ui.configlist(b"perf", b"run-limits", [])
347 limits = []
347 limits = []
348 for item in limitspec:
348 for item in limitspec:
349 parts = item.split(b'-', 1)
349 parts = item.split(b'-', 1)
350 if len(parts) < 2:
350 if len(parts) < 2:
351 ui.warn((b'malformatted run limit entry, missing "-": %s\n'
351 ui.warn((b'malformatted run limit entry, missing "-": %s\n'
352 % item))
352 % item))
353 continue
353 continue
354 try:
354 try:
355 time_limit = float(pycompat.sysstr(parts[0]))
355 time_limit = float(pycompat.sysstr(parts[0]))
356 except ValueError as e:
356 except ValueError as e:
357 ui.warn((b'malformatted run limit entry, %s: %s\n'
357 ui.warn((b'malformatted run limit entry, %s: %s\n'
358 % (pycompat.bytestr(e), item)))
358 % (pycompat.bytestr(e), item)))
359 continue
359 continue
360 try:
360 try:
361 run_limit = int(pycompat.sysstr(parts[1]))
361 run_limit = int(pycompat.sysstr(parts[1]))
362 except ValueError as e:
362 except ValueError as e:
363 ui.warn((b'malformatted run limit entry, %s: %s\n'
363 ui.warn((b'malformatted run limit entry, %s: %s\n'
364 % (pycompat.bytestr(e), item)))
364 % (pycompat.bytestr(e), item)))
365 continue
365 continue
366 limits.append((time_limit, run_limit))
366 limits.append((time_limit, run_limit))
367 if not limits:
367 if not limits:
368 limits = DEFAULTLIMITS
368 limits = DEFAULTLIMITS
369
369
370 profiler = None
370 profiler = None
371 if profiling is not None:
371 if profiling is not None:
372 if ui.configbool(b"perf", b"profile-benchmark", False):
372 if ui.configbool(b"perf", b"profile-benchmark", False):
373 profiler = profiling.profile(ui)
373 profiler = profiling.profile(ui)
374
374
375 prerun = getint(ui, b"perf", b"pre-run", 0)
375 prerun = getint(ui, b"perf", b"pre-run", 0)
376 t = functools.partial(_timer, fm, displayall=displayall, limits=limits,
376 t = functools.partial(_timer, fm, displayall=displayall, limits=limits,
377 prerun=prerun, profiler=profiler)
377 prerun=prerun, profiler=profiler)
378 return t, fm
378 return t, fm
379
379
380 def stub_timer(fm, func, setup=None, title=None):
380 def stub_timer(fm, func, setup=None, title=None):
381 if setup is not None:
381 if setup is not None:
382 setup()
382 setup()
383 func()
383 func()
384
384
385 @contextlib.contextmanager
385 @contextlib.contextmanager
386 def timeone():
386 def timeone():
387 r = []
387 r = []
388 ostart = os.times()
388 ostart = os.times()
389 cstart = util.timer()
389 cstart = util.timer()
390 yield r
390 yield r
391 cstop = util.timer()
391 cstop = util.timer()
392 ostop = os.times()
392 ostop = os.times()
393 a, b = ostart, ostop
393 a, b = ostart, ostop
394 r.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
394 r.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
395
395
396
396
397 # list of stop condition (elapsed time, minimal run count)
397 # list of stop condition (elapsed time, minimal run count)
398 DEFAULTLIMITS = (
398 DEFAULTLIMITS = (
399 (3.0, 100),
399 (3.0, 100),
400 (10.0, 3),
400 (10.0, 3),
401 )
401 )
402
402
403 def _timer(fm, func, setup=None, title=None, displayall=False,
403 def _timer(fm, func, setup=None, title=None, displayall=False,
404 limits=DEFAULTLIMITS, prerun=0, profiler=None):
404 limits=DEFAULTLIMITS, prerun=0, profiler=None):
405 gc.collect()
405 gc.collect()
406 results = []
406 results = []
407 begin = util.timer()
407 begin = util.timer()
408 count = 0
408 count = 0
409 if profiler is None:
409 if profiler is None:
410 profiler = NOOPCTX
410 profiler = NOOPCTX
411 for i in range(prerun):
411 for i in range(prerun):
412 if setup is not None:
412 if setup is not None:
413 setup()
413 setup()
414 func()
414 func()
415 keepgoing = True
415 keepgoing = True
416 while keepgoing:
416 while keepgoing:
417 if setup is not None:
417 if setup is not None:
418 setup()
418 setup()
419 with profiler:
419 with profiler:
420 with timeone() as item:
420 with timeone() as item:
421 r = func()
421 r = func()
422 profiler = NOOPCTX
422 profiler = NOOPCTX
423 count += 1
423 count += 1
424 results.append(item[0])
424 results.append(item[0])
425 cstop = util.timer()
425 cstop = util.timer()
426 # Look for a stop condition.
426 # Look for a stop condition.
427 elapsed = cstop - begin
427 elapsed = cstop - begin
428 for t, mincount in limits:
428 for t, mincount in limits:
429 if elapsed >= t and count >= mincount:
429 if elapsed >= t and count >= mincount:
430 keepgoing = False
430 keepgoing = False
431 break
431 break
432
432
433 formatone(fm, results, title=title, result=r,
433 formatone(fm, results, title=title, result=r,
434 displayall=displayall)
434 displayall=displayall)
435
435
436 def formatone(fm, timings, title=None, result=None, displayall=False):
436 def formatone(fm, timings, title=None, result=None, displayall=False):
437
437
438 count = len(timings)
438 count = len(timings)
439
439
440 fm.startitem()
440 fm.startitem()
441
441
442 if title:
442 if title:
443 fm.write(b'title', b'! %s\n', title)
443 fm.write(b'title', b'! %s\n', title)
444 if result:
444 if result:
445 fm.write(b'result', b'! result: %s\n', result)
445 fm.write(b'result', b'! result: %s\n', result)
446 def display(role, entry):
446 def display(role, entry):
447 prefix = b''
447 prefix = b''
448 if role != b'best':
448 if role != b'best':
449 prefix = b'%s.' % role
449 prefix = b'%s.' % role
450 fm.plain(b'!')
450 fm.plain(b'!')
451 fm.write(prefix + b'wall', b' wall %f', entry[0])
451 fm.write(prefix + b'wall', b' wall %f', entry[0])
452 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
452 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
453 fm.write(prefix + b'user', b' user %f', entry[1])
453 fm.write(prefix + b'user', b' user %f', entry[1])
454 fm.write(prefix + b'sys', b' sys %f', entry[2])
454 fm.write(prefix + b'sys', b' sys %f', entry[2])
455 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
455 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
456 fm.plain(b'\n')
456 fm.plain(b'\n')
457 timings.sort()
457 timings.sort()
458 min_val = timings[0]
458 min_val = timings[0]
459 display(b'best', min_val)
459 display(b'best', min_val)
460 if displayall:
460 if displayall:
461 max_val = timings[-1]
461 max_val = timings[-1]
462 display(b'max', max_val)
462 display(b'max', max_val)
463 avg = tuple([sum(x) / count for x in zip(*timings)])
463 avg = tuple([sum(x) / count for x in zip(*timings)])
464 display(b'avg', avg)
464 display(b'avg', avg)
465 median = timings[len(timings) // 2]
465 median = timings[len(timings) // 2]
466 display(b'median', median)
466 display(b'median', median)
467
467
468 # utilities for historical portability
468 # utilities for historical portability
469
469
470 def getint(ui, section, name, default):
470 def getint(ui, section, name, default):
471 # for "historical portability":
471 # for "historical portability":
472 # ui.configint has been available since 1.9 (or fa2b596db182)
472 # ui.configint has been available since 1.9 (or fa2b596db182)
473 v = ui.config(section, name, None)
473 v = ui.config(section, name, None)
474 if v is None:
474 if v is None:
475 return default
475 return default
476 try:
476 try:
477 return int(v)
477 return int(v)
478 except ValueError:
478 except ValueError:
479 raise error.ConfigError((b"%s.%s is not an integer ('%s')")
479 raise error.ConfigError((b"%s.%s is not an integer ('%s')")
480 % (section, name, v))
480 % (section, name, v))
481
481
482 def safeattrsetter(obj, name, ignoremissing=False):
482 def safeattrsetter(obj, name, ignoremissing=False):
483 """Ensure that 'obj' has 'name' attribute before subsequent setattr
483 """Ensure that 'obj' has 'name' attribute before subsequent setattr
484
484
485 This function is aborted, if 'obj' doesn't have 'name' attribute
485 This function is aborted, if 'obj' doesn't have 'name' attribute
486 at runtime. This avoids overlooking removal of an attribute, which
486 at runtime. This avoids overlooking removal of an attribute, which
487 breaks assumption of performance measurement, in the future.
487 breaks assumption of performance measurement, in the future.
488
488
489 This function returns the object to (1) assign a new value, and
489 This function returns the object to (1) assign a new value, and
490 (2) restore an original value to the attribute.
490 (2) restore an original value to the attribute.
491
491
492 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
492 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
493 abortion, and this function returns None. This is useful to
493 abortion, and this function returns None. This is useful to
494 examine an attribute, which isn't ensured in all Mercurial
494 examine an attribute, which isn't ensured in all Mercurial
495 versions.
495 versions.
496 """
496 """
497 if not util.safehasattr(obj, name):
497 if not util.safehasattr(obj, name):
498 if ignoremissing:
498 if ignoremissing:
499 return None
499 return None
500 raise error.Abort((b"missing attribute %s of %s might break assumption"
500 raise error.Abort((b"missing attribute %s of %s might break assumption"
501 b" of performance measurement") % (name, obj))
501 b" of performance measurement") % (name, obj))
502
502
503 origvalue = getattr(obj, _sysstr(name))
503 origvalue = getattr(obj, _sysstr(name))
504 class attrutil(object):
504 class attrutil(object):
505 def set(self, newvalue):
505 def set(self, newvalue):
506 setattr(obj, _sysstr(name), newvalue)
506 setattr(obj, _sysstr(name), newvalue)
507 def restore(self):
507 def restore(self):
508 setattr(obj, _sysstr(name), origvalue)
508 setattr(obj, _sysstr(name), origvalue)
509
509
510 return attrutil()
510 return attrutil()
511
511
512 # utilities to examine each internal API changes
512 # utilities to examine each internal API changes
513
513
514 def getbranchmapsubsettable():
514 def getbranchmapsubsettable():
515 # for "historical portability":
515 # for "historical portability":
516 # subsettable is defined in:
516 # subsettable is defined in:
517 # - branchmap since 2.9 (or 175c6fd8cacc)
517 # - branchmap since 2.9 (or 175c6fd8cacc)
518 # - repoview since 2.5 (or 59a9f18d4587)
518 # - repoview since 2.5 (or 59a9f18d4587)
519 # - repoviewutil since 5.0
519 # - repoviewutil since 5.0
520 for mod in (branchmap, repoview, repoviewutil):
520 for mod in (branchmap, repoview, repoviewutil):
521 subsettable = getattr(mod, 'subsettable', None)
521 subsettable = getattr(mod, 'subsettable', None)
522 if subsettable:
522 if subsettable:
523 return subsettable
523 return subsettable
524
524
525 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
525 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
526 # branchmap and repoview modules exist, but subsettable attribute
526 # branchmap and repoview modules exist, but subsettable attribute
527 # doesn't)
527 # doesn't)
528 raise error.Abort((b"perfbranchmap not available with this Mercurial"),
528 raise error.Abort((b"perfbranchmap not available with this Mercurial"),
529 hint=b"use 2.5 or later")
529 hint=b"use 2.5 or later")
530
530
531 def getsvfs(repo):
531 def getsvfs(repo):
532 """Return appropriate object to access files under .hg/store
532 """Return appropriate object to access files under .hg/store
533 """
533 """
534 # for "historical portability":
534 # for "historical portability":
535 # repo.svfs has been available since 2.3 (or 7034365089bf)
535 # repo.svfs has been available since 2.3 (or 7034365089bf)
536 svfs = getattr(repo, 'svfs', None)
536 svfs = getattr(repo, 'svfs', None)
537 if svfs:
537 if svfs:
538 return svfs
538 return svfs
539 else:
539 else:
540 return getattr(repo, 'sopener')
540 return getattr(repo, 'sopener')
541
541
542 def getvfs(repo):
542 def getvfs(repo):
543 """Return appropriate object to access files under .hg
543 """Return appropriate object to access files under .hg
544 """
544 """
545 # for "historical portability":
545 # for "historical portability":
546 # repo.vfs has been available since 2.3 (or 7034365089bf)
546 # repo.vfs has been available since 2.3 (or 7034365089bf)
547 vfs = getattr(repo, 'vfs', None)
547 vfs = getattr(repo, 'vfs', None)
548 if vfs:
548 if vfs:
549 return vfs
549 return vfs
550 else:
550 else:
551 return getattr(repo, 'opener')
551 return getattr(repo, 'opener')
552
552
553 def repocleartagscachefunc(repo):
553 def repocleartagscachefunc(repo):
554 """Return the function to clear tags cache according to repo internal API
554 """Return the function to clear tags cache according to repo internal API
555 """
555 """
556 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
556 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
557 # in this case, setattr(repo, '_tagscache', None) or so isn't
557 # in this case, setattr(repo, '_tagscache', None) or so isn't
558 # correct way to clear tags cache, because existing code paths
558 # correct way to clear tags cache, because existing code paths
559 # expect _tagscache to be a structured object.
559 # expect _tagscache to be a structured object.
560 def clearcache():
560 def clearcache():
561 # _tagscache has been filteredpropertycache since 2.5 (or
561 # _tagscache has been filteredpropertycache since 2.5 (or
562 # 98c867ac1330), and delattr() can't work in such case
562 # 98c867ac1330), and delattr() can't work in such case
563 if b'_tagscache' in vars(repo):
563 if b'_tagscache' in vars(repo):
564 del repo.__dict__[b'_tagscache']
564 del repo.__dict__[b'_tagscache']
565 return clearcache
565 return clearcache
566
566
567 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
567 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
568 if repotags: # since 1.4 (or 5614a628d173)
568 if repotags: # since 1.4 (or 5614a628d173)
569 return lambda : repotags.set(None)
569 return lambda : repotags.set(None)
570
570
571 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
571 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
572 if repotagscache: # since 0.6 (or d7df759d0e97)
572 if repotagscache: # since 0.6 (or d7df759d0e97)
573 return lambda : repotagscache.set(None)
573 return lambda : repotagscache.set(None)
574
574
575 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
575 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
576 # this point, but it isn't so problematic, because:
576 # this point, but it isn't so problematic, because:
577 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
577 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
578 # in perftags() causes failure soon
578 # in perftags() causes failure soon
579 # - perf.py itself has been available since 1.1 (or eb240755386d)
579 # - perf.py itself has been available since 1.1 (or eb240755386d)
580 raise error.Abort((b"tags API of this hg command is unknown"))
580 raise error.Abort((b"tags API of this hg command is unknown"))
581
581
582 # utilities to clear cache
582 # utilities to clear cache
583
583
584 def clearfilecache(obj, attrname):
584 def clearfilecache(obj, attrname):
585 unfiltered = getattr(obj, 'unfiltered', None)
585 unfiltered = getattr(obj, 'unfiltered', None)
586 if unfiltered is not None:
586 if unfiltered is not None:
587 obj = obj.unfiltered()
587 obj = obj.unfiltered()
588 if attrname in vars(obj):
588 if attrname in vars(obj):
589 delattr(obj, attrname)
589 delattr(obj, attrname)
590 obj._filecache.pop(attrname, None)
590 obj._filecache.pop(attrname, None)
591
591
592 def clearchangelog(repo):
592 def clearchangelog(repo):
593 if repo is not repo.unfiltered():
593 if repo is not repo.unfiltered():
594 object.__setattr__(repo, r'_clcachekey', None)
594 object.__setattr__(repo, r'_clcachekey', None)
595 object.__setattr__(repo, r'_clcache', None)
595 object.__setattr__(repo, r'_clcache', None)
596 clearfilecache(repo.unfiltered(), 'changelog')
596 clearfilecache(repo.unfiltered(), 'changelog')
597
597
598 # perf commands
598 # perf commands
599
599
600 @command(b'perfwalk', formatteropts)
600 @command(b'perfwalk', formatteropts)
601 def perfwalk(ui, repo, *pats, **opts):
601 def perfwalk(ui, repo, *pats, **opts):
602 opts = _byteskwargs(opts)
602 opts = _byteskwargs(opts)
603 timer, fm = gettimer(ui, opts)
603 timer, fm = gettimer(ui, opts)
604 m = scmutil.match(repo[None], pats, {})
604 m = scmutil.match(repo[None], pats, {})
605 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
605 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
606 ignored=False))))
606 ignored=False))))
607 fm.end()
607 fm.end()
608
608
609 @command(b'perfannotate', formatteropts)
609 @command(b'perfannotate', formatteropts)
610 def perfannotate(ui, repo, f, **opts):
610 def perfannotate(ui, repo, f, **opts):
611 opts = _byteskwargs(opts)
611 opts = _byteskwargs(opts)
612 timer, fm = gettimer(ui, opts)
612 timer, fm = gettimer(ui, opts)
613 fc = repo[b'.'][f]
613 fc = repo[b'.'][f]
614 timer(lambda: len(fc.annotate(True)))
614 timer(lambda: len(fc.annotate(True)))
615 fm.end()
615 fm.end()
616
616
617 @command(b'perfstatus',
617 @command(b'perfstatus',
618 [(b'u', b'unknown', False,
618 [(b'u', b'unknown', False,
619 b'ask status to look for unknown files')] + formatteropts)
619 b'ask status to look for unknown files')] + formatteropts)
620 def perfstatus(ui, repo, **opts):
620 def perfstatus(ui, repo, **opts):
621 opts = _byteskwargs(opts)
621 opts = _byteskwargs(opts)
622 #m = match.always(repo.root, repo.getcwd())
622 #m = match.always(repo.root, repo.getcwd())
623 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
623 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
624 # False))))
624 # False))))
625 timer, fm = gettimer(ui, opts)
625 timer, fm = gettimer(ui, opts)
626 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
626 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
627 fm.end()
627 fm.end()
628
628
629 @command(b'perfaddremove', formatteropts)
629 @command(b'perfaddremove', formatteropts)
630 def perfaddremove(ui, repo, **opts):
630 def perfaddremove(ui, repo, **opts):
631 opts = _byteskwargs(opts)
631 opts = _byteskwargs(opts)
632 timer, fm = gettimer(ui, opts)
632 timer, fm = gettimer(ui, opts)
633 try:
633 try:
634 oldquiet = repo.ui.quiet
634 oldquiet = repo.ui.quiet
635 repo.ui.quiet = True
635 repo.ui.quiet = True
636 matcher = scmutil.match(repo[None])
636 matcher = scmutil.match(repo[None])
637 opts[b'dry_run'] = True
637 opts[b'dry_run'] = True
638 if b'uipathfn' in getargspec(scmutil.addremove).args:
638 if b'uipathfn' in getargspec(scmutil.addremove).args:
639 uipathfn = scmutil.getuipathfn(repo)
639 uipathfn = scmutil.getuipathfn(repo)
640 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
640 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
641 else:
641 else:
642 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
642 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
643 finally:
643 finally:
644 repo.ui.quiet = oldquiet
644 repo.ui.quiet = oldquiet
645 fm.end()
645 fm.end()
646
646
647 def clearcaches(cl):
647 def clearcaches(cl):
648 # behave somewhat consistently across internal API changes
648 # behave somewhat consistently across internal API changes
649 if util.safehasattr(cl, b'clearcaches'):
649 if util.safehasattr(cl, b'clearcaches'):
650 cl.clearcaches()
650 cl.clearcaches()
651 elif util.safehasattr(cl, b'_nodecache'):
651 elif util.safehasattr(cl, b'_nodecache'):
652 from mercurial.node import nullid, nullrev
652 from mercurial.node import nullid, nullrev
653 cl._nodecache = {nullid: nullrev}
653 cl._nodecache = {nullid: nullrev}
654 cl._nodepos = None
654 cl._nodepos = None
655
655
656 @command(b'perfheads', formatteropts)
656 @command(b'perfheads', formatteropts)
657 def perfheads(ui, repo, **opts):
657 def perfheads(ui, repo, **opts):
658 """benchmark the computation of a changelog heads"""
658 """benchmark the computation of a changelog heads"""
659 opts = _byteskwargs(opts)
659 opts = _byteskwargs(opts)
660 timer, fm = gettimer(ui, opts)
660 timer, fm = gettimer(ui, opts)
661 cl = repo.changelog
661 cl = repo.changelog
662 def s():
662 def s():
663 clearcaches(cl)
663 clearcaches(cl)
664 def d():
664 def d():
665 len(cl.headrevs())
665 len(cl.headrevs())
666 timer(d, setup=s)
666 timer(d, setup=s)
667 fm.end()
667 fm.end()
668
668
669 @command(b'perftags', formatteropts+
669 @command(b'perftags', formatteropts+
670 [
670 [
671 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
671 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
672 ])
672 ])
673 def perftags(ui, repo, **opts):
673 def perftags(ui, repo, **opts):
674 opts = _byteskwargs(opts)
674 opts = _byteskwargs(opts)
675 timer, fm = gettimer(ui, opts)
675 timer, fm = gettimer(ui, opts)
676 repocleartagscache = repocleartagscachefunc(repo)
676 repocleartagscache = repocleartagscachefunc(repo)
677 clearrevlogs = opts[b'clear_revlogs']
677 clearrevlogs = opts[b'clear_revlogs']
678 def s():
678 def s():
679 if clearrevlogs:
679 if clearrevlogs:
680 clearchangelog(repo)
680 clearchangelog(repo)
681 clearfilecache(repo.unfiltered(), 'manifest')
681 clearfilecache(repo.unfiltered(), 'manifest')
682 repocleartagscache()
682 repocleartagscache()
683 def t():
683 def t():
684 return len(repo.tags())
684 return len(repo.tags())
685 timer(t, setup=s)
685 timer(t, setup=s)
686 fm.end()
686 fm.end()
687
687
688 @command(b'perfancestors', formatteropts)
688 @command(b'perfancestors', formatteropts)
689 def perfancestors(ui, repo, **opts):
689 def perfancestors(ui, repo, **opts):
690 opts = _byteskwargs(opts)
690 opts = _byteskwargs(opts)
691 timer, fm = gettimer(ui, opts)
691 timer, fm = gettimer(ui, opts)
692 heads = repo.changelog.headrevs()
692 heads = repo.changelog.headrevs()
693 def d():
693 def d():
694 for a in repo.changelog.ancestors(heads):
694 for a in repo.changelog.ancestors(heads):
695 pass
695 pass
696 timer(d)
696 timer(d)
697 fm.end()
697 fm.end()
698
698
699 @command(b'perfancestorset', formatteropts)
699 @command(b'perfancestorset', formatteropts)
700 def perfancestorset(ui, repo, revset, **opts):
700 def perfancestorset(ui, repo, revset, **opts):
701 opts = _byteskwargs(opts)
701 opts = _byteskwargs(opts)
702 timer, fm = gettimer(ui, opts)
702 timer, fm = gettimer(ui, opts)
703 revs = repo.revs(revset)
703 revs = repo.revs(revset)
704 heads = repo.changelog.headrevs()
704 heads = repo.changelog.headrevs()
705 def d():
705 def d():
706 s = repo.changelog.ancestors(heads)
706 s = repo.changelog.ancestors(heads)
707 for rev in revs:
707 for rev in revs:
708 rev in s
708 rev in s
709 timer(d)
709 timer(d)
710 fm.end()
710 fm.end()
711
711
712 @command(b'perfdiscovery', formatteropts, b'PATH')
712 @command(b'perfdiscovery', formatteropts, b'PATH')
713 def perfdiscovery(ui, repo, path, **opts):
713 def perfdiscovery(ui, repo, path, **opts):
714 """benchmark discovery between local repo and the peer at given path
714 """benchmark discovery between local repo and the peer at given path
715 """
715 """
716 repos = [repo, None]
716 repos = [repo, None]
717 timer, fm = gettimer(ui, opts)
717 timer, fm = gettimer(ui, opts)
718 path = ui.expandpath(path)
718 path = ui.expandpath(path)
719
719
720 def s():
720 def s():
721 repos[1] = hg.peer(ui, opts, path)
721 repos[1] = hg.peer(ui, opts, path)
722 def d():
722 def d():
723 setdiscovery.findcommonheads(ui, *repos)
723 setdiscovery.findcommonheads(ui, *repos)
724 timer(d, setup=s)
724 timer(d, setup=s)
725 fm.end()
725 fm.end()
726
726
727 @command(b'perfbookmarks', formatteropts +
727 @command(b'perfbookmarks', formatteropts +
728 [
728 [
729 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
729 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
730 ])
730 ])
731 def perfbookmarks(ui, repo, **opts):
731 def perfbookmarks(ui, repo, **opts):
732 """benchmark parsing bookmarks from disk to memory"""
732 """benchmark parsing bookmarks from disk to memory"""
733 opts = _byteskwargs(opts)
733 opts = _byteskwargs(opts)
734 timer, fm = gettimer(ui, opts)
734 timer, fm = gettimer(ui, opts)
735
735
736 clearrevlogs = opts[b'clear_revlogs']
736 clearrevlogs = opts[b'clear_revlogs']
737 def s():
737 def s():
738 if clearrevlogs:
738 if clearrevlogs:
739 clearchangelog(repo)
739 clearchangelog(repo)
740 clearfilecache(repo, b'_bookmarks')
740 clearfilecache(repo, b'_bookmarks')
741 def d():
741 def d():
742 repo._bookmarks
742 repo._bookmarks
743 timer(d, setup=s)
743 timer(d, setup=s)
744 fm.end()
744 fm.end()
745
745
746 @command(b'perfbundleread', formatteropts, b'BUNDLE')
746 @command(b'perfbundleread', formatteropts, b'BUNDLE')
747 def perfbundleread(ui, repo, bundlepath, **opts):
747 def perfbundleread(ui, repo, bundlepath, **opts):
748 """Benchmark reading of bundle files.
748 """Benchmark reading of bundle files.
749
749
750 This command is meant to isolate the I/O part of bundle reading as
750 This command is meant to isolate the I/O part of bundle reading as
751 much as possible.
751 much as possible.
752 """
752 """
753 from mercurial import (
753 from mercurial import (
754 bundle2,
754 bundle2,
755 exchange,
755 exchange,
756 streamclone,
756 streamclone,
757 )
757 )
758
758
759 opts = _byteskwargs(opts)
759 opts = _byteskwargs(opts)
760
760
761 def makebench(fn):
761 def makebench(fn):
762 def run():
762 def run():
763 with open(bundlepath, b'rb') as fh:
763 with open(bundlepath, b'rb') as fh:
764 bundle = exchange.readbundle(ui, fh, bundlepath)
764 bundle = exchange.readbundle(ui, fh, bundlepath)
765 fn(bundle)
765 fn(bundle)
766
766
767 return run
767 return run
768
768
769 def makereadnbytes(size):
769 def makereadnbytes(size):
770 def run():
770 def run():
771 with open(bundlepath, b'rb') as fh:
771 with open(bundlepath, b'rb') as fh:
772 bundle = exchange.readbundle(ui, fh, bundlepath)
772 bundle = exchange.readbundle(ui, fh, bundlepath)
773 while bundle.read(size):
773 while bundle.read(size):
774 pass
774 pass
775
775
776 return run
776 return run
777
777
778 def makestdioread(size):
778 def makestdioread(size):
779 def run():
779 def run():
780 with open(bundlepath, b'rb') as fh:
780 with open(bundlepath, b'rb') as fh:
781 while fh.read(size):
781 while fh.read(size):
782 pass
782 pass
783
783
784 return run
784 return run
785
785
786 # bundle1
786 # bundle1
787
787
788 def deltaiter(bundle):
788 def deltaiter(bundle):
789 for delta in bundle.deltaiter():
789 for delta in bundle.deltaiter():
790 pass
790 pass
791
791
792 def iterchunks(bundle):
792 def iterchunks(bundle):
793 for chunk in bundle.getchunks():
793 for chunk in bundle.getchunks():
794 pass
794 pass
795
795
796 # bundle2
796 # bundle2
797
797
798 def forwardchunks(bundle):
798 def forwardchunks(bundle):
799 for chunk in bundle._forwardchunks():
799 for chunk in bundle._forwardchunks():
800 pass
800 pass
801
801
802 def iterparts(bundle):
802 def iterparts(bundle):
803 for part in bundle.iterparts():
803 for part in bundle.iterparts():
804 pass
804 pass
805
805
806 def iterpartsseekable(bundle):
806 def iterpartsseekable(bundle):
807 for part in bundle.iterparts(seekable=True):
807 for part in bundle.iterparts(seekable=True):
808 pass
808 pass
809
809
810 def seek(bundle):
810 def seek(bundle):
811 for part in bundle.iterparts(seekable=True):
811 for part in bundle.iterparts(seekable=True):
812 part.seek(0, os.SEEK_END)
812 part.seek(0, os.SEEK_END)
813
813
814 def makepartreadnbytes(size):
814 def makepartreadnbytes(size):
815 def run():
815 def run():
816 with open(bundlepath, b'rb') as fh:
816 with open(bundlepath, b'rb') as fh:
817 bundle = exchange.readbundle(ui, fh, bundlepath)
817 bundle = exchange.readbundle(ui, fh, bundlepath)
818 for part in bundle.iterparts():
818 for part in bundle.iterparts():
819 while part.read(size):
819 while part.read(size):
820 pass
820 pass
821
821
822 return run
822 return run
823
823
824 benches = [
824 benches = [
825 (makestdioread(8192), b'read(8k)'),
825 (makestdioread(8192), b'read(8k)'),
826 (makestdioread(16384), b'read(16k)'),
826 (makestdioread(16384), b'read(16k)'),
827 (makestdioread(32768), b'read(32k)'),
827 (makestdioread(32768), b'read(32k)'),
828 (makestdioread(131072), b'read(128k)'),
828 (makestdioread(131072), b'read(128k)'),
829 ]
829 ]
830
830
831 with open(bundlepath, b'rb') as fh:
831 with open(bundlepath, b'rb') as fh:
832 bundle = exchange.readbundle(ui, fh, bundlepath)
832 bundle = exchange.readbundle(ui, fh, bundlepath)
833
833
834 if isinstance(bundle, changegroup.cg1unpacker):
834 if isinstance(bundle, changegroup.cg1unpacker):
835 benches.extend([
835 benches.extend([
836 (makebench(deltaiter), b'cg1 deltaiter()'),
836 (makebench(deltaiter), b'cg1 deltaiter()'),
837 (makebench(iterchunks), b'cg1 getchunks()'),
837 (makebench(iterchunks), b'cg1 getchunks()'),
838 (makereadnbytes(8192), b'cg1 read(8k)'),
838 (makereadnbytes(8192), b'cg1 read(8k)'),
839 (makereadnbytes(16384), b'cg1 read(16k)'),
839 (makereadnbytes(16384), b'cg1 read(16k)'),
840 (makereadnbytes(32768), b'cg1 read(32k)'),
840 (makereadnbytes(32768), b'cg1 read(32k)'),
841 (makereadnbytes(131072), b'cg1 read(128k)'),
841 (makereadnbytes(131072), b'cg1 read(128k)'),
842 ])
842 ])
843 elif isinstance(bundle, bundle2.unbundle20):
843 elif isinstance(bundle, bundle2.unbundle20):
844 benches.extend([
844 benches.extend([
845 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
845 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
846 (makebench(iterparts), b'bundle2 iterparts()'),
846 (makebench(iterparts), b'bundle2 iterparts()'),
847 (makebench(iterpartsseekable), b'bundle2 iterparts() seekable'),
847 (makebench(iterpartsseekable), b'bundle2 iterparts() seekable'),
848 (makebench(seek), b'bundle2 part seek()'),
848 (makebench(seek), b'bundle2 part seek()'),
849 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
849 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
850 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
850 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
851 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
851 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
852 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
852 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
853 ])
853 ])
854 elif isinstance(bundle, streamclone.streamcloneapplier):
854 elif isinstance(bundle, streamclone.streamcloneapplier):
855 raise error.Abort(b'stream clone bundles not supported')
855 raise error.Abort(b'stream clone bundles not supported')
856 else:
856 else:
857 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
857 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
858
858
859 for fn, title in benches:
859 for fn, title in benches:
860 timer, fm = gettimer(ui, opts)
860 timer, fm = gettimer(ui, opts)
861 timer(fn, title=title)
861 timer(fn, title=title)
862 fm.end()
862 fm.end()
863
863
864 @command(b'perfchangegroupchangelog', formatteropts +
864 @command(b'perfchangegroupchangelog', formatteropts +
865 [(b'', b'cgversion', b'02', b'changegroup version'),
865 [(b'', b'cgversion', b'02', b'changegroup version'),
866 (b'r', b'rev', b'', b'revisions to add to changegroup')])
866 (b'r', b'rev', b'', b'revisions to add to changegroup')])
867 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
867 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
868 """Benchmark producing a changelog group for a changegroup.
868 """Benchmark producing a changelog group for a changegroup.
869
869
870 This measures the time spent processing the changelog during a
870 This measures the time spent processing the changelog during a
871 bundle operation. This occurs during `hg bundle` and on a server
871 bundle operation. This occurs during `hg bundle` and on a server
872 processing a `getbundle` wire protocol request (handles clones
872 processing a `getbundle` wire protocol request (handles clones
873 and pull requests).
873 and pull requests).
874
874
875 By default, all revisions are added to the changegroup.
875 By default, all revisions are added to the changegroup.
876 """
876 """
877 opts = _byteskwargs(opts)
877 opts = _byteskwargs(opts)
878 cl = repo.changelog
878 cl = repo.changelog
879 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
879 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
880 bundler = changegroup.getbundler(cgversion, repo)
880 bundler = changegroup.getbundler(cgversion, repo)
881
881
882 def d():
882 def d():
883 state, chunks = bundler._generatechangelog(cl, nodes)
883 state, chunks = bundler._generatechangelog(cl, nodes)
884 for chunk in chunks:
884 for chunk in chunks:
885 pass
885 pass
886
886
887 timer, fm = gettimer(ui, opts)
887 timer, fm = gettimer(ui, opts)
888
888
889 # Terminal printing can interfere with timing. So disable it.
889 # Terminal printing can interfere with timing. So disable it.
890 with ui.configoverride({(b'progress', b'disable'): True}):
890 with ui.configoverride({(b'progress', b'disable'): True}):
891 timer(d)
891 timer(d)
892
892
893 fm.end()
893 fm.end()
894
894
895 @command(b'perfdirs', formatteropts)
895 @command(b'perfdirs', formatteropts)
896 def perfdirs(ui, repo, **opts):
896 def perfdirs(ui, repo, **opts):
897 opts = _byteskwargs(opts)
897 opts = _byteskwargs(opts)
898 timer, fm = gettimer(ui, opts)
898 timer, fm = gettimer(ui, opts)
899 dirstate = repo.dirstate
899 dirstate = repo.dirstate
900 b'a' in dirstate
900 b'a' in dirstate
901 def d():
901 def d():
902 dirstate.hasdir(b'a')
902 dirstate.hasdir(b'a')
903 del dirstate._map._dirs
903 del dirstate._map._dirs
904 timer(d)
904 timer(d)
905 fm.end()
905 fm.end()
906
906
907 @command(b'perfdirstate', formatteropts)
907 @command(b'perfdirstate', formatteropts)
908 def perfdirstate(ui, repo, **opts):
908 def perfdirstate(ui, repo, **opts):
909 opts = _byteskwargs(opts)
909 opts = _byteskwargs(opts)
910 timer, fm = gettimer(ui, opts)
910 timer, fm = gettimer(ui, opts)
911 b"a" in repo.dirstate
911 b"a" in repo.dirstate
912 def d():
912 def d():
913 repo.dirstate.invalidate()
913 repo.dirstate.invalidate()
914 b"a" in repo.dirstate
914 b"a" in repo.dirstate
915 timer(d)
915 timer(d)
916 fm.end()
916 fm.end()
917
917
918 @command(b'perfdirstatedirs', formatteropts)
918 @command(b'perfdirstatedirs', formatteropts)
919 def perfdirstatedirs(ui, repo, **opts):
919 def perfdirstatedirs(ui, repo, **opts):
920 opts = _byteskwargs(opts)
920 opts = _byteskwargs(opts)
921 timer, fm = gettimer(ui, opts)
921 timer, fm = gettimer(ui, opts)
922 b"a" in repo.dirstate
922 b"a" in repo.dirstate
923 def d():
923 def d():
924 repo.dirstate.hasdir(b"a")
924 repo.dirstate.hasdir(b"a")
925 del repo.dirstate._map._dirs
925 del repo.dirstate._map._dirs
926 timer(d)
926 timer(d)
927 fm.end()
927 fm.end()
928
928
929 @command(b'perfdirstatefoldmap', formatteropts)
929 @command(b'perfdirstatefoldmap', formatteropts)
930 def perfdirstatefoldmap(ui, repo, **opts):
930 def perfdirstatefoldmap(ui, repo, **opts):
931 opts = _byteskwargs(opts)
931 opts = _byteskwargs(opts)
932 timer, fm = gettimer(ui, opts)
932 timer, fm = gettimer(ui, opts)
933 dirstate = repo.dirstate
933 dirstate = repo.dirstate
934 b'a' in dirstate
934 b'a' in dirstate
935 def d():
935 def d():
936 dirstate._map.filefoldmap.get(b'a')
936 dirstate._map.filefoldmap.get(b'a')
937 del dirstate._map.filefoldmap
937 del dirstate._map.filefoldmap
938 timer(d)
938 timer(d)
939 fm.end()
939 fm.end()
940
940
941 @command(b'perfdirfoldmap', formatteropts)
941 @command(b'perfdirfoldmap', formatteropts)
942 def perfdirfoldmap(ui, repo, **opts):
942 def perfdirfoldmap(ui, repo, **opts):
943 opts = _byteskwargs(opts)
943 opts = _byteskwargs(opts)
944 timer, fm = gettimer(ui, opts)
944 timer, fm = gettimer(ui, opts)
945 dirstate = repo.dirstate
945 dirstate = repo.dirstate
946 b'a' in dirstate
946 b'a' in dirstate
947 def d():
947 def d():
948 dirstate._map.dirfoldmap.get(b'a')
948 dirstate._map.dirfoldmap.get(b'a')
949 del dirstate._map.dirfoldmap
949 del dirstate._map.dirfoldmap
950 del dirstate._map._dirs
950 del dirstate._map._dirs
951 timer(d)
951 timer(d)
952 fm.end()
952 fm.end()
953
953
954 @command(b'perfdirstatewrite', formatteropts)
954 @command(b'perfdirstatewrite', formatteropts)
955 def perfdirstatewrite(ui, repo, **opts):
955 def perfdirstatewrite(ui, repo, **opts):
956 opts = _byteskwargs(opts)
956 opts = _byteskwargs(opts)
957 timer, fm = gettimer(ui, opts)
957 timer, fm = gettimer(ui, opts)
958 ds = repo.dirstate
958 ds = repo.dirstate
959 b"a" in ds
959 b"a" in ds
960 def d():
960 def d():
961 ds._dirty = True
961 ds._dirty = True
962 ds.write(repo.currenttransaction())
962 ds.write(repo.currenttransaction())
963 timer(d)
963 timer(d)
964 fm.end()
964 fm.end()
965
965
966 def _getmergerevs(repo, opts):
966 def _getmergerevs(repo, opts):
967 """parse command argument to return rev involved in merge
967 """parse command argument to return rev involved in merge
968
968
969 input: options dictionnary with `rev`, `from` and `bse`
969 input: options dictionnary with `rev`, `from` and `bse`
970 output: (localctx, otherctx, basectx)
970 output: (localctx, otherctx, basectx)
971 """
971 """
972 if opts[b'from']:
972 if opts[b'from']:
973 fromrev = scmutil.revsingle(repo, opts[b'from'])
973 fromrev = scmutil.revsingle(repo, opts[b'from'])
974 wctx = repo[fromrev]
974 wctx = repo[fromrev]
975 else:
975 else:
976 wctx = repo[None]
976 wctx = repo[None]
977 # we don't want working dir files to be stat'd in the benchmark, so
977 # we don't want working dir files to be stat'd in the benchmark, so
978 # prime that cache
978 # prime that cache
979 wctx.dirty()
979 wctx.dirty()
980 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
980 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
981 if opts[b'base']:
981 if opts[b'base']:
982 fromrev = scmutil.revsingle(repo, opts[b'base'])
982 fromrev = scmutil.revsingle(repo, opts[b'base'])
983 ancestor = repo[fromrev]
983 ancestor = repo[fromrev]
984 else:
984 else:
985 ancestor = wctx.ancestor(rctx)
985 ancestor = wctx.ancestor(rctx)
986 return (wctx, rctx, ancestor)
986 return (wctx, rctx, ancestor)
987
987
988 @command(b'perfmergecalculate',
988 @command(b'perfmergecalculate',
989 [
989 [
990 (b'r', b'rev', b'.', b'rev to merge against'),
990 (b'r', b'rev', b'.', b'rev to merge against'),
991 (b'', b'from', b'', b'rev to merge from'),
991 (b'', b'from', b'', b'rev to merge from'),
992 (b'', b'base', b'', b'the revision to use as base'),
992 (b'', b'base', b'', b'the revision to use as base'),
993 ] + formatteropts)
993 ] + formatteropts)
994 def perfmergecalculate(ui, repo, **opts):
994 def perfmergecalculate(ui, repo, **opts):
995 opts = _byteskwargs(opts)
995 opts = _byteskwargs(opts)
996 timer, fm = gettimer(ui, opts)
996 timer, fm = gettimer(ui, opts)
997
997
998 wctx, rctx, ancestor = _getmergerevs(repo, opts)
998 wctx, rctx, ancestor = _getmergerevs(repo, opts)
999 def d():
999 def d():
1000 # acceptremote is True because we don't want prompts in the middle of
1000 # acceptremote is True because we don't want prompts in the middle of
1001 # our benchmark
1001 # our benchmark
1002 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
1002 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
1003 acceptremote=True, followcopies=True)
1003 acceptremote=True, followcopies=True)
1004 timer(d)
1004 timer(d)
1005 fm.end()
1005 fm.end()
1006
1006
1007 @command(b'perfmergecopies',
1007 @command(b'perfmergecopies',
1008 [
1008 [
1009 (b'r', b'rev', b'.', b'rev to merge against'),
1009 (b'r', b'rev', b'.', b'rev to merge against'),
1010 (b'', b'from', b'', b'rev to merge from'),
1010 (b'', b'from', b'', b'rev to merge from'),
1011 (b'', b'base', b'', b'the revision to use as base'),
1011 (b'', b'base', b'', b'the revision to use as base'),
1012 ] + formatteropts)
1012 ] + formatteropts)
1013 def perfmergecopies(ui, repo, **opts):
1013 def perfmergecopies(ui, repo, **opts):
1014 """measure runtime of `copies.mergecopies`"""
1014 """measure runtime of `copies.mergecopies`"""
1015 opts = _byteskwargs(opts)
1015 opts = _byteskwargs(opts)
1016 timer, fm = gettimer(ui, opts)
1016 timer, fm = gettimer(ui, opts)
1017 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1017 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1018 def d():
1018 def d():
1019 # acceptremote is True because we don't want prompts in the middle of
1019 # acceptremote is True because we don't want prompts in the middle of
1020 # our benchmark
1020 # our benchmark
1021 copies.mergecopies(repo, wctx, rctx, ancestor)
1021 copies.mergecopies(repo, wctx, rctx, ancestor)
1022 timer(d)
1022 timer(d)
1023 fm.end()
1023 fm.end()
1024
1024
1025 @command(b'perfpathcopies', [], b"REV REV")
1025 @command(b'perfpathcopies', [], b"REV REV")
1026 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1026 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1027 """benchmark the copy tracing logic"""
1027 """benchmark the copy tracing logic"""
1028 opts = _byteskwargs(opts)
1028 opts = _byteskwargs(opts)
1029 timer, fm = gettimer(ui, opts)
1029 timer, fm = gettimer(ui, opts)
1030 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1030 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1031 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1031 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1032 def d():
1032 def d():
1033 copies.pathcopies(ctx1, ctx2)
1033 copies.pathcopies(ctx1, ctx2)
1034 timer(d)
1034 timer(d)
1035 fm.end()
1035 fm.end()
1036
1036
1037 @command(b'perfphases',
1037 @command(b'perfphases',
1038 [(b'', b'full', False, b'include file reading time too'),
1038 [(b'', b'full', False, b'include file reading time too'),
1039 ], b"")
1039 ], b"")
1040 def perfphases(ui, repo, **opts):
1040 def perfphases(ui, repo, **opts):
1041 """benchmark phasesets computation"""
1041 """benchmark phasesets computation"""
1042 opts = _byteskwargs(opts)
1042 opts = _byteskwargs(opts)
1043 timer, fm = gettimer(ui, opts)
1043 timer, fm = gettimer(ui, opts)
1044 _phases = repo._phasecache
1044 _phases = repo._phasecache
1045 full = opts.get(b'full')
1045 full = opts.get(b'full')
1046 def d():
1046 def d():
1047 phases = _phases
1047 phases = _phases
1048 if full:
1048 if full:
1049 clearfilecache(repo, b'_phasecache')
1049 clearfilecache(repo, b'_phasecache')
1050 phases = repo._phasecache
1050 phases = repo._phasecache
1051 phases.invalidate()
1051 phases.invalidate()
1052 phases.loadphaserevs(repo)
1052 phases.loadphaserevs(repo)
1053 timer(d)
1053 timer(d)
1054 fm.end()
1054 fm.end()
1055
1055
1056 @command(b'perfphasesremote',
1056 @command(b'perfphasesremote',
1057 [], b"[DEST]")
1057 [], b"[DEST]")
1058 def perfphasesremote(ui, repo, dest=None, **opts):
1058 def perfphasesremote(ui, repo, dest=None, **opts):
1059 """benchmark time needed to analyse phases of the remote server"""
1059 """benchmark time needed to analyse phases of the remote server"""
1060 from mercurial.node import (
1060 from mercurial.node import (
1061 bin,
1061 bin,
1062 )
1062 )
1063 from mercurial import (
1063 from mercurial import (
1064 exchange,
1064 exchange,
1065 hg,
1065 hg,
1066 phases,
1066 phases,
1067 )
1067 )
1068 opts = _byteskwargs(opts)
1068 opts = _byteskwargs(opts)
1069 timer, fm = gettimer(ui, opts)
1069 timer, fm = gettimer(ui, opts)
1070
1070
1071 path = ui.paths.getpath(dest, default=(b'default-push', b'default'))
1071 path = ui.paths.getpath(dest, default=(b'default-push', b'default'))
1072 if not path:
1072 if not path:
1073 raise error.Abort((b'default repository not configured!'),
1073 raise error.Abort((b'default repository not configured!'),
1074 hint=(b"see 'hg help config.paths'"))
1074 hint=(b"see 'hg help config.paths'"))
1075 dest = path.pushloc or path.loc
1075 dest = path.pushloc or path.loc
1076 ui.status((b'analysing phase of %s\n') % util.hidepassword(dest))
1076 ui.status((b'analysing phase of %s\n') % util.hidepassword(dest))
1077 other = hg.peer(repo, opts, dest)
1077 other = hg.peer(repo, opts, dest)
1078
1078
1079 # easier to perform discovery through the operation
1079 # easier to perform discovery through the operation
1080 op = exchange.pushoperation(repo, other)
1080 op = exchange.pushoperation(repo, other)
1081 exchange._pushdiscoverychangeset(op)
1081 exchange._pushdiscoverychangeset(op)
1082
1082
1083 remotesubset = op.fallbackheads
1083 remotesubset = op.fallbackheads
1084
1084
1085 with other.commandexecutor() as e:
1085 with other.commandexecutor() as e:
1086 remotephases = e.callcommand(b'listkeys',
1086 remotephases = e.callcommand(b'listkeys',
1087 {b'namespace': b'phases'}).result()
1087 {b'namespace': b'phases'}).result()
1088 del other
1088 del other
1089 publishing = remotephases.get(b'publishing', False)
1089 publishing = remotephases.get(b'publishing', False)
1090 if publishing:
1090 if publishing:
1091 ui.status((b'publishing: yes\n'))
1091 ui.status((b'publishing: yes\n'))
1092 else:
1092 else:
1093 ui.status((b'publishing: no\n'))
1093 ui.status((b'publishing: no\n'))
1094
1094
1095 nodemap = repo.changelog.nodemap
1095 nodemap = repo.changelog.nodemap
1096 nonpublishroots = 0
1096 nonpublishroots = 0
1097 for nhex, phase in remotephases.iteritems():
1097 for nhex, phase in remotephases.iteritems():
1098 if nhex == b'publishing': # ignore data related to publish option
1098 if nhex == b'publishing': # ignore data related to publish option
1099 continue
1099 continue
1100 node = bin(nhex)
1100 node = bin(nhex)
1101 if node in nodemap and int(phase):
1101 if node in nodemap and int(phase):
1102 nonpublishroots += 1
1102 nonpublishroots += 1
1103 ui.status((b'number of roots: %d\n') % len(remotephases))
1103 ui.status((b'number of roots: %d\n') % len(remotephases))
1104 ui.status((b'number of known non public roots: %d\n') % nonpublishroots)
1104 ui.status((b'number of known non public roots: %d\n') % nonpublishroots)
1105 def d():
1105 def d():
1106 phases.remotephasessummary(repo,
1106 phases.remotephasessummary(repo,
1107 remotesubset,
1107 remotesubset,
1108 remotephases)
1108 remotephases)
1109 timer(d)
1109 timer(d)
1110 fm.end()
1110 fm.end()
1111
1111
1112 @command(b'perfmanifest',[
1112 @command(b'perfmanifest',[
1113 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1113 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1114 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1114 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1115 ] + formatteropts, b'REV|NODE')
1115 ] + formatteropts, b'REV|NODE')
1116 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1116 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1117 """benchmark the time to read a manifest from disk and return a usable
1117 """benchmark the time to read a manifest from disk and return a usable
1118 dict-like object
1118 dict-like object
1119
1119
1120 Manifest caches are cleared before retrieval."""
1120 Manifest caches are cleared before retrieval."""
1121 opts = _byteskwargs(opts)
1121 opts = _byteskwargs(opts)
1122 timer, fm = gettimer(ui, opts)
1122 timer, fm = gettimer(ui, opts)
1123 if not manifest_rev:
1123 if not manifest_rev:
1124 ctx = scmutil.revsingle(repo, rev, rev)
1124 ctx = scmutil.revsingle(repo, rev, rev)
1125 t = ctx.manifestnode()
1125 t = ctx.manifestnode()
1126 else:
1126 else:
1127 from mercurial.node import bin
1127 from mercurial.node import bin
1128
1128
1129 if len(rev) == 40:
1129 if len(rev) == 40:
1130 t = bin(rev)
1130 t = bin(rev)
1131 else:
1131 else:
1132 try:
1132 try:
1133 rev = int(rev)
1133 rev = int(rev)
1134
1134
1135 if util.safehasattr(repo.manifestlog, b'getstorage'):
1135 if util.safehasattr(repo.manifestlog, b'getstorage'):
1136 t = repo.manifestlog.getstorage(b'').node(rev)
1136 t = repo.manifestlog.getstorage(b'').node(rev)
1137 else:
1137 else:
1138 t = repo.manifestlog._revlog.lookup(rev)
1138 t = repo.manifestlog._revlog.lookup(rev)
1139 except ValueError:
1139 except ValueError:
1140 raise error.Abort(b'manifest revision must be integer or full '
1140 raise error.Abort(b'manifest revision must be integer or full '
1141 b'node')
1141 b'node')
1142 def d():
1142 def d():
1143 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1143 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1144 repo.manifestlog[t].read()
1144 repo.manifestlog[t].read()
1145 timer(d)
1145 timer(d)
1146 fm.end()
1146 fm.end()
1147
1147
1148 @command(b'perfchangeset', formatteropts)
1148 @command(b'perfchangeset', formatteropts)
1149 def perfchangeset(ui, repo, rev, **opts):
1149 def perfchangeset(ui, repo, rev, **opts):
1150 opts = _byteskwargs(opts)
1150 opts = _byteskwargs(opts)
1151 timer, fm = gettimer(ui, opts)
1151 timer, fm = gettimer(ui, opts)
1152 n = scmutil.revsingle(repo, rev).node()
1152 n = scmutil.revsingle(repo, rev).node()
1153 def d():
1153 def d():
1154 repo.changelog.read(n)
1154 repo.changelog.read(n)
1155 #repo.changelog._cache = None
1155 #repo.changelog._cache = None
1156 timer(d)
1156 timer(d)
1157 fm.end()
1157 fm.end()
1158
1158
1159 @command(b'perfignore', formatteropts)
1159 @command(b'perfignore', formatteropts)
1160 def perfignore(ui, repo, **opts):
1160 def perfignore(ui, repo, **opts):
1161 """benchmark operation related to computing ignore"""
1161 """benchmark operation related to computing ignore"""
1162 opts = _byteskwargs(opts)
1162 opts = _byteskwargs(opts)
1163 timer, fm = gettimer(ui, opts)
1163 timer, fm = gettimer(ui, opts)
1164 dirstate = repo.dirstate
1164 dirstate = repo.dirstate
1165
1165
1166 def setupone():
1166 def setupone():
1167 dirstate.invalidate()
1167 dirstate.invalidate()
1168 clearfilecache(dirstate, b'_ignore')
1168 clearfilecache(dirstate, b'_ignore')
1169
1169
1170 def runone():
1170 def runone():
1171 dirstate._ignore
1171 dirstate._ignore
1172
1172
1173 timer(runone, setup=setupone, title=b"load")
1173 timer(runone, setup=setupone, title=b"load")
1174 fm.end()
1174 fm.end()
1175
1175
1176 @command(b'perfindex', [
1176 @command(b'perfindex', [
1177 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1177 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1178 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1178 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1179 ] + formatteropts)
1179 ] + formatteropts)
1180 def perfindex(ui, repo, **opts):
1180 def perfindex(ui, repo, **opts):
1181 """benchmark index creation time followed by a lookup
1181 """benchmark index creation time followed by a lookup
1182
1182
1183 The default is to look `tip` up. Depending on the index implementation,
1183 The default is to look `tip` up. Depending on the index implementation,
1184 the revision looked up can matters. For example, an implementation
1184 the revision looked up can matters. For example, an implementation
1185 scanning the index will have a faster lookup time for `--rev tip` than for
1185 scanning the index will have a faster lookup time for `--rev tip` than for
1186 `--rev 0`. The number of looked up revisions and their order can also
1186 `--rev 0`. The number of looked up revisions and their order can also
1187 matters.
1187 matters.
1188
1188
1189 Example of useful set to test:
1189 Example of useful set to test:
1190 * tip
1190 * tip
1191 * 0
1191 * 0
1192 * -10:
1192 * -10:
1193 * :10
1193 * :10
1194 * -10: + :10
1194 * -10: + :10
1195 * :10: + -10:
1195 * :10: + -10:
1196 * -10000:
1196 * -10000:
1197 * -10000: + 0
1197 * -10000: + 0
1198
1198
1199 It is not currently possible to check for lookup of a missing node. For
1199 It is not currently possible to check for lookup of a missing node. For
1200 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1200 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1201 import mercurial.revlog
1201 import mercurial.revlog
1202 opts = _byteskwargs(opts)
1202 opts = _byteskwargs(opts)
1203 timer, fm = gettimer(ui, opts)
1203 timer, fm = gettimer(ui, opts)
1204 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1204 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1205 if opts[b'no_lookup']:
1205 if opts[b'no_lookup']:
1206 if opts['rev']:
1206 if opts['rev']:
1207 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1207 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1208 nodes = []
1208 nodes = []
1209 elif not opts[b'rev']:
1209 elif not opts[b'rev']:
1210 nodes = [repo[b"tip"].node()]
1210 nodes = [repo[b"tip"].node()]
1211 else:
1211 else:
1212 revs = scmutil.revrange(repo, opts[b'rev'])
1212 revs = scmutil.revrange(repo, opts[b'rev'])
1213 cl = repo.changelog
1213 cl = repo.changelog
1214 nodes = [cl.node(r) for r in revs]
1214 nodes = [cl.node(r) for r in revs]
1215
1215
1216 unfi = repo.unfiltered()
1216 unfi = repo.unfiltered()
1217 # find the filecache func directly
1217 # find the filecache func directly
1218 # This avoid polluting the benchmark with the filecache logic
1218 # This avoid polluting the benchmark with the filecache logic
1219 makecl = unfi.__class__.changelog.func
1219 makecl = unfi.__class__.changelog.func
1220 def setup():
1220 def setup():
1221 # probably not necessary, but for good measure
1221 # probably not necessary, but for good measure
1222 clearchangelog(unfi)
1222 clearchangelog(unfi)
1223 def d():
1223 def d():
1224 cl = makecl(unfi)
1224 cl = makecl(unfi)
1225 for n in nodes:
1225 for n in nodes:
1226 cl.rev(n)
1226 cl.rev(n)
1227 timer(d, setup=setup)
1227 timer(d, setup=setup)
1228 fm.end()
1228 fm.end()
1229
1229
1230 @command(b'perfnodemap', [
1230 @command(b'perfnodemap', [
1231 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1231 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1232 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1232 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1233 ] + formatteropts)
1233 ] + formatteropts)
1234 def perfnodemap(ui, repo, **opts):
1234 def perfnodemap(ui, repo, **opts):
1235 """benchmark the time necessary to look up revision from a cold nodemap
1235 """benchmark the time necessary to look up revision from a cold nodemap
1236
1236
1237 Depending on the implementation, the amount and order of revision we look
1237 Depending on the implementation, the amount and order of revision we look
1238 up can varies. Example of useful set to test:
1238 up can varies. Example of useful set to test:
1239 * tip
1239 * tip
1240 * 0
1240 * 0
1241 * -10:
1241 * -10:
1242 * :10
1242 * :10
1243 * -10: + :10
1243 * -10: + :10
1244 * :10: + -10:
1244 * :10: + -10:
1245 * -10000:
1245 * -10000:
1246 * -10000: + 0
1246 * -10000: + 0
1247
1247
1248 The command currently focus on valid binary lookup. Benchmarking for
1248 The command currently focus on valid binary lookup. Benchmarking for
1249 hexlookup, prefix lookup and missing lookup would also be valuable.
1249 hexlookup, prefix lookup and missing lookup would also be valuable.
1250 """
1250 """
1251 import mercurial.revlog
1251 import mercurial.revlog
1252 opts = _byteskwargs(opts)
1252 opts = _byteskwargs(opts)
1253 timer, fm = gettimer(ui, opts)
1253 timer, fm = gettimer(ui, opts)
1254 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1254 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1255
1255
1256 unfi = repo.unfiltered()
1256 unfi = repo.unfiltered()
1257 clearcaches = opts['clear_caches']
1257 clearcaches = opts['clear_caches']
1258 # find the filecache func directly
1258 # find the filecache func directly
1259 # This avoid polluting the benchmark with the filecache logic
1259 # This avoid polluting the benchmark with the filecache logic
1260 makecl = unfi.__class__.changelog.func
1260 makecl = unfi.__class__.changelog.func
1261 if not opts[b'rev']:
1261 if not opts[b'rev']:
1262 raise error.Abort('use --rev to specify revisions to look up')
1262 raise error.Abort('use --rev to specify revisions to look up')
1263 revs = scmutil.revrange(repo, opts[b'rev'])
1263 revs = scmutil.revrange(repo, opts[b'rev'])
1264 cl = repo.changelog
1264 cl = repo.changelog
1265 nodes = [cl.node(r) for r in revs]
1265 nodes = [cl.node(r) for r in revs]
1266
1266
1267 # use a list to pass reference to a nodemap from one closure to the next
1267 # use a list to pass reference to a nodemap from one closure to the next
1268 nodeget = [None]
1268 nodeget = [None]
1269 def setnodeget():
1269 def setnodeget():
1270 # probably not necessary, but for good measure
1270 # probably not necessary, but for good measure
1271 clearchangelog(unfi)
1271 clearchangelog(unfi)
1272 nodeget[0] = makecl(unfi).nodemap.get
1272 nodeget[0] = makecl(unfi).nodemap.get
1273
1273
1274 def d():
1274 def d():
1275 get = nodeget[0]
1275 get = nodeget[0]
1276 for n in nodes:
1276 for n in nodes:
1277 get(n)
1277 get(n)
1278
1278
1279 setup = None
1279 setup = None
1280 if clearcaches:
1280 if clearcaches:
1281 def setup():
1281 def setup():
1282 setnodeget()
1282 setnodeget()
1283 else:
1283 else:
1284 setnodeget()
1284 setnodeget()
1285 d() # prewarm the data structure
1285 d() # prewarm the data structure
1286 timer(d, setup=setup)
1286 timer(d, setup=setup)
1287 fm.end()
1287 fm.end()
1288
1288
1289 @command(b'perfstartup', formatteropts)
1289 @command(b'perfstartup', formatteropts)
1290 def perfstartup(ui, repo, **opts):
1290 def perfstartup(ui, repo, **opts):
1291 opts = _byteskwargs(opts)
1291 opts = _byteskwargs(opts)
1292 timer, fm = gettimer(ui, opts)
1292 timer, fm = gettimer(ui, opts)
1293 def d():
1293 def d():
1294 if os.name != r'nt':
1294 if os.name != r'nt':
1295 os.system(b"HGRCPATH= %s version -q > /dev/null" %
1295 os.system(b"HGRCPATH= %s version -q > /dev/null" %
1296 fsencode(sys.argv[0]))
1296 fsencode(sys.argv[0]))
1297 else:
1297 else:
1298 os.environ[r'HGRCPATH'] = r' '
1298 os.environ[r'HGRCPATH'] = r' '
1299 os.system(r"%s version -q > NUL" % sys.argv[0])
1299 os.system(r"%s version -q > NUL" % sys.argv[0])
1300 timer(d)
1300 timer(d)
1301 fm.end()
1301 fm.end()
1302
1302
1303 @command(b'perfparents', formatteropts)
1303 @command(b'perfparents', formatteropts)
1304 def perfparents(ui, repo, **opts):
1304 def perfparents(ui, repo, **opts):
1305 """benchmark the time necessary to fetch one changeset's parents.
1305 """benchmark the time necessary to fetch one changeset's parents.
1306
1306
1307 The fetch is done using the `node identifier`, traversing all object layers
1307 The fetch is done using the `node identifier`, traversing all object layers
1308 from the repository object. The first N revisions will be used for this
1308 from the repository object. The first N revisions will be used for this
1309 benchmark. N is controlled by the ``perf.parentscount`` config option
1309 benchmark. N is controlled by the ``perf.parentscount`` config option
1310 (default: 1000).
1310 (default: 1000).
1311 """
1311 """
1312 opts = _byteskwargs(opts)
1312 opts = _byteskwargs(opts)
1313 timer, fm = gettimer(ui, opts)
1313 timer, fm = gettimer(ui, opts)
1314 # control the number of commits perfparents iterates over
1314 # control the number of commits perfparents iterates over
1315 # experimental config: perf.parentscount
1315 # experimental config: perf.parentscount
1316 count = getint(ui, b"perf", b"parentscount", 1000)
1316 count = getint(ui, b"perf", b"parentscount", 1000)
1317 if len(repo.changelog) < count:
1317 if len(repo.changelog) < count:
1318 raise error.Abort(b"repo needs %d commits for this test" % count)
1318 raise error.Abort(b"repo needs %d commits for this test" % count)
1319 repo = repo.unfiltered()
1319 repo = repo.unfiltered()
1320 nl = [repo.changelog.node(i) for i in _xrange(count)]
1320 nl = [repo.changelog.node(i) for i in _xrange(count)]
1321 def d():
1321 def d():
1322 for n in nl:
1322 for n in nl:
1323 repo.changelog.parents(n)
1323 repo.changelog.parents(n)
1324 timer(d)
1324 timer(d)
1325 fm.end()
1325 fm.end()
1326
1326
1327 @command(b'perfctxfiles', formatteropts)
1327 @command(b'perfctxfiles', formatteropts)
1328 def perfctxfiles(ui, repo, x, **opts):
1328 def perfctxfiles(ui, repo, x, **opts):
1329 opts = _byteskwargs(opts)
1329 opts = _byteskwargs(opts)
1330 x = int(x)
1330 x = int(x)
1331 timer, fm = gettimer(ui, opts)
1331 timer, fm = gettimer(ui, opts)
1332 def d():
1332 def d():
1333 len(repo[x].files())
1333 len(repo[x].files())
1334 timer(d)
1334 timer(d)
1335 fm.end()
1335 fm.end()
1336
1336
1337 @command(b'perfrawfiles', formatteropts)
1337 @command(b'perfrawfiles', formatteropts)
1338 def perfrawfiles(ui, repo, x, **opts):
1338 def perfrawfiles(ui, repo, x, **opts):
1339 opts = _byteskwargs(opts)
1339 opts = _byteskwargs(opts)
1340 x = int(x)
1340 x = int(x)
1341 timer, fm = gettimer(ui, opts)
1341 timer, fm = gettimer(ui, opts)
1342 cl = repo.changelog
1342 cl = repo.changelog
1343 def d():
1343 def d():
1344 len(cl.read(x)[3])
1344 len(cl.read(x)[3])
1345 timer(d)
1345 timer(d)
1346 fm.end()
1346 fm.end()
1347
1347
1348 @command(b'perflookup', formatteropts)
1348 @command(b'perflookup', formatteropts)
1349 def perflookup(ui, repo, rev, **opts):
1349 def perflookup(ui, repo, rev, **opts):
1350 opts = _byteskwargs(opts)
1350 opts = _byteskwargs(opts)
1351 timer, fm = gettimer(ui, opts)
1351 timer, fm = gettimer(ui, opts)
1352 timer(lambda: len(repo.lookup(rev)))
1352 timer(lambda: len(repo.lookup(rev)))
1353 fm.end()
1353 fm.end()
1354
1354
1355 @command(b'perflinelogedits',
1355 @command(b'perflinelogedits',
1356 [(b'n', b'edits', 10000, b'number of edits'),
1356 [(b'n', b'edits', 10000, b'number of edits'),
1357 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1357 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1358 ], norepo=True)
1358 ], norepo=True)
1359 def perflinelogedits(ui, **opts):
1359 def perflinelogedits(ui, **opts):
1360 from mercurial import linelog
1360 from mercurial import linelog
1361
1361
1362 opts = _byteskwargs(opts)
1362 opts = _byteskwargs(opts)
1363
1363
1364 edits = opts[b'edits']
1364 edits = opts[b'edits']
1365 maxhunklines = opts[b'max_hunk_lines']
1365 maxhunklines = opts[b'max_hunk_lines']
1366
1366
1367 maxb1 = 100000
1367 maxb1 = 100000
1368 random.seed(0)
1368 random.seed(0)
1369 randint = random.randint
1369 randint = random.randint
1370 currentlines = 0
1370 currentlines = 0
1371 arglist = []
1371 arglist = []
1372 for rev in _xrange(edits):
1372 for rev in _xrange(edits):
1373 a1 = randint(0, currentlines)
1373 a1 = randint(0, currentlines)
1374 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1374 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1375 b1 = randint(0, maxb1)
1375 b1 = randint(0, maxb1)
1376 b2 = randint(b1, b1 + maxhunklines)
1376 b2 = randint(b1, b1 + maxhunklines)
1377 currentlines += (b2 - b1) - (a2 - a1)
1377 currentlines += (b2 - b1) - (a2 - a1)
1378 arglist.append((rev, a1, a2, b1, b2))
1378 arglist.append((rev, a1, a2, b1, b2))
1379
1379
1380 def d():
1380 def d():
1381 ll = linelog.linelog()
1381 ll = linelog.linelog()
1382 for args in arglist:
1382 for args in arglist:
1383 ll.replacelines(*args)
1383 ll.replacelines(*args)
1384
1384
1385 timer, fm = gettimer(ui, opts)
1385 timer, fm = gettimer(ui, opts)
1386 timer(d)
1386 timer(d)
1387 fm.end()
1387 fm.end()
1388
1388
1389 @command(b'perfrevrange', formatteropts)
1389 @command(b'perfrevrange', formatteropts)
1390 def perfrevrange(ui, repo, *specs, **opts):
1390 def perfrevrange(ui, repo, *specs, **opts):
1391 opts = _byteskwargs(opts)
1391 opts = _byteskwargs(opts)
1392 timer, fm = gettimer(ui, opts)
1392 timer, fm = gettimer(ui, opts)
1393 revrange = scmutil.revrange
1393 revrange = scmutil.revrange
1394 timer(lambda: len(revrange(repo, specs)))
1394 timer(lambda: len(revrange(repo, specs)))
1395 fm.end()
1395 fm.end()
1396
1396
1397 @command(b'perfnodelookup', formatteropts)
1397 @command(b'perfnodelookup', formatteropts)
1398 def perfnodelookup(ui, repo, rev, **opts):
1398 def perfnodelookup(ui, repo, rev, **opts):
1399 opts = _byteskwargs(opts)
1399 opts = _byteskwargs(opts)
1400 timer, fm = gettimer(ui, opts)
1400 timer, fm = gettimer(ui, opts)
1401 import mercurial.revlog
1401 import mercurial.revlog
1402 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1402 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1403 n = scmutil.revsingle(repo, rev).node()
1403 n = scmutil.revsingle(repo, rev).node()
1404 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1404 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1405 def d():
1405 def d():
1406 cl.rev(n)
1406 cl.rev(n)
1407 clearcaches(cl)
1407 clearcaches(cl)
1408 timer(d)
1408 timer(d)
1409 fm.end()
1409 fm.end()
1410
1410
1411 @command(b'perflog',
1411 @command(b'perflog',
1412 [(b'', b'rename', False, b'ask log to follow renames')
1412 [(b'', b'rename', False, b'ask log to follow renames')
1413 ] + formatteropts)
1413 ] + formatteropts)
1414 def perflog(ui, repo, rev=None, **opts):
1414 def perflog(ui, repo, rev=None, **opts):
1415 opts = _byteskwargs(opts)
1415 opts = _byteskwargs(opts)
1416 if rev is None:
1416 if rev is None:
1417 rev=[]
1417 rev=[]
1418 timer, fm = gettimer(ui, opts)
1418 timer, fm = gettimer(ui, opts)
1419 ui.pushbuffer()
1419 ui.pushbuffer()
1420 timer(lambda: commands.log(ui, repo, rev=rev, date=b'', user=b'',
1420 timer(lambda: commands.log(ui, repo, rev=rev, date=b'', user=b'',
1421 copies=opts.get(b'rename')))
1421 copies=opts.get(b'rename')))
1422 ui.popbuffer()
1422 ui.popbuffer()
1423 fm.end()
1423 fm.end()
1424
1424
1425 @command(b'perfmoonwalk', formatteropts)
1425 @command(b'perfmoonwalk', formatteropts)
1426 def perfmoonwalk(ui, repo, **opts):
1426 def perfmoonwalk(ui, repo, **opts):
1427 """benchmark walking the changelog backwards
1427 """benchmark walking the changelog backwards
1428
1428
1429 This also loads the changelog data for each revision in the changelog.
1429 This also loads the changelog data for each revision in the changelog.
1430 """
1430 """
1431 opts = _byteskwargs(opts)
1431 opts = _byteskwargs(opts)
1432 timer, fm = gettimer(ui, opts)
1432 timer, fm = gettimer(ui, opts)
1433 def moonwalk():
1433 def moonwalk():
1434 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1434 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1435 ctx = repo[i]
1435 ctx = repo[i]
1436 ctx.branch() # read changelog data (in addition to the index)
1436 ctx.branch() # read changelog data (in addition to the index)
1437 timer(moonwalk)
1437 timer(moonwalk)
1438 fm.end()
1438 fm.end()
1439
1439
1440 @command(b'perftemplating',
1440 @command(b'perftemplating',
1441 [(b'r', b'rev', [], b'revisions to run the template on'),
1441 [(b'r', b'rev', [], b'revisions to run the template on'),
1442 ] + formatteropts)
1442 ] + formatteropts)
1443 def perftemplating(ui, repo, testedtemplate=None, **opts):
1443 def perftemplating(ui, repo, testedtemplate=None, **opts):
1444 """test the rendering time of a given template"""
1444 """test the rendering time of a given template"""
1445 if makelogtemplater is None:
1445 if makelogtemplater is None:
1446 raise error.Abort((b"perftemplating not available with this Mercurial"),
1446 raise error.Abort((b"perftemplating not available with this Mercurial"),
1447 hint=b"use 4.3 or later")
1447 hint=b"use 4.3 or later")
1448
1448
1449 opts = _byteskwargs(opts)
1449 opts = _byteskwargs(opts)
1450
1450
1451 nullui = ui.copy()
1451 nullui = ui.copy()
1452 nullui.fout = open(os.devnull, r'wb')
1452 nullui.fout = open(os.devnull, r'wb')
1453 nullui.disablepager()
1453 nullui.disablepager()
1454 revs = opts.get(b'rev')
1454 revs = opts.get(b'rev')
1455 if not revs:
1455 if not revs:
1456 revs = [b'all()']
1456 revs = [b'all()']
1457 revs = list(scmutil.revrange(repo, revs))
1457 revs = list(scmutil.revrange(repo, revs))
1458
1458
1459 defaulttemplate = (b'{date|shortdate} [{rev}:{node|short}]'
1459 defaulttemplate = (b'{date|shortdate} [{rev}:{node|short}]'
1460 b' {author|person}: {desc|firstline}\n')
1460 b' {author|person}: {desc|firstline}\n')
1461 if testedtemplate is None:
1461 if testedtemplate is None:
1462 testedtemplate = defaulttemplate
1462 testedtemplate = defaulttemplate
1463 displayer = makelogtemplater(nullui, repo, testedtemplate)
1463 displayer = makelogtemplater(nullui, repo, testedtemplate)
1464 def format():
1464 def format():
1465 for r in revs:
1465 for r in revs:
1466 ctx = repo[r]
1466 ctx = repo[r]
1467 displayer.show(ctx)
1467 displayer.show(ctx)
1468 displayer.flush(ctx)
1468 displayer.flush(ctx)
1469
1469
1470 timer, fm = gettimer(ui, opts)
1470 timer, fm = gettimer(ui, opts)
1471 timer(format)
1471 timer(format)
1472 fm.end()
1472 fm.end()
1473
1473
1474 @command(b'perfhelper-mergecopies', formatteropts +
1474 @command(b'perfhelper-mergecopies', formatteropts +
1475 [
1475 [
1476 (b'r', b'revs', [], b'restrict search to these revisions'),
1476 (b'r', b'revs', [], b'restrict search to these revisions'),
1477 (b'', b'timing', False, b'provides extra data (costly)'),
1477 (b'', b'timing', False, b'provides extra data (costly)'),
1478 ])
1478 ])
1479 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1479 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1480 """find statistics about potential parameters for `perfmergecopies`
1480 """find statistics about potential parameters for `perfmergecopies`
1481
1481
1482 This command find (base, p1, p2) triplet relevant for copytracing
1482 This command find (base, p1, p2) triplet relevant for copytracing
1483 benchmarking in the context of a merge. It reports values for some of the
1483 benchmarking in the context of a merge. It reports values for some of the
1484 parameters that impact merge copy tracing time during merge.
1484 parameters that impact merge copy tracing time during merge.
1485
1485
1486 If `--timing` is set, rename detection is run and the associated timing
1486 If `--timing` is set, rename detection is run and the associated timing
1487 will be reported. The extra details come at the cost of slower command
1487 will be reported. The extra details come at the cost of slower command
1488 execution.
1488 execution.
1489
1489
1490 Since rename detection is only run once, other factors might easily
1490 Since rename detection is only run once, other factors might easily
1491 affect the precision of the timing. However it should give a good
1491 affect the precision of the timing. However it should give a good
1492 approximation of which revision triplets are very costly.
1492 approximation of which revision triplets are very costly.
1493 """
1493 """
1494 opts = _byteskwargs(opts)
1494 opts = _byteskwargs(opts)
1495 fm = ui.formatter(b'perf', opts)
1495 fm = ui.formatter(b'perf', opts)
1496 dotiming = opts[b'timing']
1496 dotiming = opts[b'timing']
1497
1497
1498 output_template = [
1498 output_template = [
1499 ("base", "%(base)12s"),
1499 ("base", "%(base)12s"),
1500 ("p1", "%(p1.node)12s"),
1500 ("p1", "%(p1.node)12s"),
1501 ("p2", "%(p2.node)12s"),
1501 ("p2", "%(p2.node)12s"),
1502 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1502 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1503 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1503 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1504 ("p1.renames", "%(p1.renamedfiles)12d"),
1504 ("p1.renames", "%(p1.renamedfiles)12d"),
1505 ("p1.time", "%(p1.time)12.3f"),
1505 ("p1.time", "%(p1.time)12.3f"),
1506 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1506 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1507 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1507 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1508 ("p2.renames", "%(p2.renamedfiles)12d"),
1508 ("p2.renames", "%(p2.renamedfiles)12d"),
1509 ("p2.time", "%(p2.time)12.3f"),
1509 ("p2.time", "%(p2.time)12.3f"),
1510 ("renames", "%(nbrenamedfiles)12d"),
1510 ("renames", "%(nbrenamedfiles)12d"),
1511 ("total.time", "%(time)12.3f"),
1511 ("total.time", "%(time)12.3f"),
1512 ]
1512 ]
1513 if not dotiming:
1513 if not dotiming:
1514 output_template = [i for i in output_template
1514 output_template = [i for i in output_template
1515 if not ('time' in i[0] or 'renames' in i[0])]
1515 if not ('time' in i[0] or 'renames' in i[0])]
1516 header_names = [h for (h, v) in output_template]
1516 header_names = [h for (h, v) in output_template]
1517 output = ' '.join([v for (h, v) in output_template]) + '\n'
1517 output = ' '.join([v for (h, v) in output_template]) + '\n'
1518 header = ' '.join(['%12s'] * len(header_names)) + '\n'
1518 header = ' '.join(['%12s'] * len(header_names)) + '\n'
1519 fm.plain(header % tuple(header_names))
1519 fm.plain(header % tuple(header_names))
1520
1520
1521 if not revs:
1521 if not revs:
1522 revs = ['all()']
1522 revs = ['all()']
1523 revs = scmutil.revrange(repo, revs)
1523 revs = scmutil.revrange(repo, revs)
1524
1524
1525 roi = repo.revs('merge() and %ld', revs)
1525 roi = repo.revs('merge() and %ld', revs)
1526 for r in roi:
1526 for r in roi:
1527 ctx = repo[r]
1527 ctx = repo[r]
1528 p1 = ctx.p1()
1528 p1 = ctx.p1()
1529 p2 = ctx.p2()
1529 p2 = ctx.p2()
1530 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
1530 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
1531 for b in bases:
1531 for b in bases:
1532 b = repo[b]
1532 b = repo[b]
1533 p1missing = copies._computeforwardmissing(b, p1)
1533 p1missing = copies._computeforwardmissing(b, p1)
1534 p2missing = copies._computeforwardmissing(b, p2)
1534 p2missing = copies._computeforwardmissing(b, p2)
1535 data = {
1535 data = {
1536 b'base': b.hex(),
1536 b'base': b.hex(),
1537 b'p1.node': p1.hex(),
1537 b'p1.node': p1.hex(),
1538 b'p1.nbrevs': len(repo.revs('%d::%d', b.rev(), p1.rev())),
1538 b'p1.nbrevs': len(repo.revs('%d::%d', b.rev(), p1.rev())),
1539 b'p1.nbmissingfiles': len(p1missing),
1539 b'p1.nbmissingfiles': len(p1missing),
1540 b'p2.node': p2.hex(),
1540 b'p2.node': p2.hex(),
1541 b'p2.nbrevs': len(repo.revs('%d::%d', b.rev(), p2.rev())),
1541 b'p2.nbrevs': len(repo.revs('%d::%d', b.rev(), p2.rev())),
1542 b'p2.nbmissingfiles': len(p2missing),
1542 b'p2.nbmissingfiles': len(p2missing),
1543 }
1543 }
1544 if dotiming:
1544 if dotiming:
1545 begin = util.timer()
1545 begin = util.timer()
1546 mergedata = copies.mergecopies(repo, p1, p2, b)
1546 mergedata = copies.mergecopies(repo, p1, p2, b)
1547 end = util.timer()
1547 end = util.timer()
1548 # not very stable timing since we did only one run
1548 # not very stable timing since we did only one run
1549 data['time'] = end - begin
1549 data['time'] = end - begin
1550 # mergedata contains five dicts: "copy", "movewithdir",
1550 # mergedata contains five dicts: "copy", "movewithdir",
1551 # "diverge", "renamedelete" and "dirmove".
1551 # "diverge", "renamedelete" and "dirmove".
1552 # The first 4 are about renamed file so lets count that.
1552 # The first 4 are about renamed file so lets count that.
1553 renames = len(mergedata[0])
1553 renames = len(mergedata[0])
1554 renames += len(mergedata[1])
1554 renames += len(mergedata[1])
1555 renames += len(mergedata[2])
1555 renames += len(mergedata[2])
1556 renames += len(mergedata[3])
1556 renames += len(mergedata[3])
1557 data['nbrenamedfiles'] = renames
1557 data['nbrenamedfiles'] = renames
1558 begin = util.timer()
1558 begin = util.timer()
1559 p1renames = copies.pathcopies(b, p1)
1559 p1renames = copies.pathcopies(b, p1)
1560 end = util.timer()
1560 end = util.timer()
1561 data['p1.time'] = end - begin
1561 data['p1.time'] = end - begin
1562 begin = util.timer()
1562 begin = util.timer()
1563 p2renames = copies.pathcopies(b, p2)
1563 p2renames = copies.pathcopies(b, p2)
1564 data['p2.time'] = end - begin
1564 data['p2.time'] = end - begin
1565 end = util.timer()
1565 end = util.timer()
1566 data['p1.renamedfiles'] = len(p1renames)
1566 data['p1.renamedfiles'] = len(p1renames)
1567 data['p2.renamedfiles'] = len(p2renames)
1567 data['p2.renamedfiles'] = len(p2renames)
1568 fm.startitem()
1568 fm.startitem()
1569 fm.data(**data)
1569 fm.data(**data)
1570 # make node pretty for the human output
1570 # make node pretty for the human output
1571 out = data.copy()
1571 out = data.copy()
1572 out['base'] = fm.hexfunc(b.node())
1572 out['base'] = fm.hexfunc(b.node())
1573 out['p1.node'] = fm.hexfunc(p1.node())
1573 out['p1.node'] = fm.hexfunc(p1.node())
1574 out['p2.node'] = fm.hexfunc(p2.node())
1574 out['p2.node'] = fm.hexfunc(p2.node())
1575 fm.plain(output % out)
1575 fm.plain(output % out)
1576
1576
1577 fm.end()
1577 fm.end()
1578
1578
1579 @command(b'perfhelper-pathcopies', formatteropts +
1579 @command(b'perfhelper-pathcopies', formatteropts +
1580 [
1580 [
1581 (b'r', b'revs', [], b'restrict search to these revisions'),
1581 (b'r', b'revs', [], b'restrict search to these revisions'),
1582 (b'', b'timing', False, b'provides extra data (costly)'),
1582 (b'', b'timing', False, b'provides extra data (costly)'),
1583 ])
1583 ])
1584 def perfhelperpathcopies(ui, repo, revs=[], **opts):
1584 def perfhelperpathcopies(ui, repo, revs=[], **opts):
1585 """find statistic about potential parameters for the `perftracecopies`
1585 """find statistic about potential parameters for the `perftracecopies`
1586
1586
1587 This command find source-destination pair relevant for copytracing testing.
1587 This command find source-destination pair relevant for copytracing testing.
1588 It report value for some of the parameters that impact copy tracing time.
1588 It report value for some of the parameters that impact copy tracing time.
1589
1589
1590 If `--timing` is set, rename detection is run and the associated timing
1590 If `--timing` is set, rename detection is run and the associated timing
1591 will be reported. The extra details comes at the cost of a slower command
1591 will be reported. The extra details comes at the cost of a slower command
1592 execution.
1592 execution.
1593
1593
1594 Since the rename detection is only run once, other factors might easily
1594 Since the rename detection is only run once, other factors might easily
1595 affect the precision of the timing. However it should give a good
1595 affect the precision of the timing. However it should give a good
1596 approximation of which revision pairs are very costly.
1596 approximation of which revision pairs are very costly.
1597 """
1597 """
1598 opts = _byteskwargs(opts)
1598 opts = _byteskwargs(opts)
1599 fm = ui.formatter(b'perf', opts)
1599 fm = ui.formatter(b'perf', opts)
1600 dotiming = opts[b'timing']
1600 dotiming = opts[b'timing']
1601
1601
1602 if dotiming:
1602 if dotiming:
1603 header = '%12s %12s %12s %12s %12s %12s\n'
1603 header = '%12s %12s %12s %12s %12s %12s\n'
1604 output = ("%(source)12s %(destination)12s "
1604 output = ("%(source)12s %(destination)12s "
1605 "%(nbrevs)12d %(nbmissingfiles)12d "
1605 "%(nbrevs)12d %(nbmissingfiles)12d "
1606 "%(nbrenamedfiles)12d %(time)18.5f\n")
1606 "%(nbrenamedfiles)12d %(time)18.5f\n")
1607 header_names = ("source", "destination", "nb-revs", "nb-files",
1607 header_names = ("source", "destination", "nb-revs", "nb-files",
1608 "nb-renames", "time")
1608 "nb-renames", "time")
1609 fm.plain(header % header_names)
1609 fm.plain(header % header_names)
1610 else:
1610 else:
1611 header = '%12s %12s %12s %12s\n'
1611 header = '%12s %12s %12s %12s\n'
1612 output = ("%(source)12s %(destination)12s "
1612 output = ("%(source)12s %(destination)12s "
1613 "%(nbrevs)12d %(nbmissingfiles)12d\n")
1613 "%(nbrevs)12d %(nbmissingfiles)12d\n")
1614 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
1614 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
1615
1615
1616 if not revs:
1616 if not revs:
1617 revs = ['all()']
1617 revs = ['all()']
1618 revs = scmutil.revrange(repo, revs)
1618 revs = scmutil.revrange(repo, revs)
1619
1619
1620 roi = repo.revs('merge() and %ld', revs)
1620 roi = repo.revs('merge() and %ld', revs)
1621 for r in roi:
1621 for r in roi:
1622 ctx = repo[r]
1622 ctx = repo[r]
1623 p1 = ctx.p1().rev()
1623 p1 = ctx.p1().rev()
1624 p2 = ctx.p2().rev()
1624 p2 = ctx.p2().rev()
1625 bases = repo.changelog._commonancestorsheads(p1, p2)
1625 bases = repo.changelog._commonancestorsheads(p1, p2)
1626 for p in (p1, p2):
1626 for p in (p1, p2):
1627 for b in bases:
1627 for b in bases:
1628 base = repo[b]
1628 base = repo[b]
1629 parent = repo[p]
1629 parent = repo[p]
1630 missing = copies._computeforwardmissing(base, parent)
1630 missing = copies._computeforwardmissing(base, parent)
1631 if not missing:
1631 if not missing:
1632 continue
1632 continue
1633 data = {
1633 data = {
1634 b'source': base.hex(),
1634 b'source': base.hex(),
1635 b'destination': parent.hex(),
1635 b'destination': parent.hex(),
1636 b'nbrevs': len(repo.revs('%d::%d', b, p)),
1636 b'nbrevs': len(repo.revs('%d::%d', b, p)),
1637 b'nbmissingfiles': len(missing),
1637 b'nbmissingfiles': len(missing),
1638 }
1638 }
1639 if dotiming:
1639 if dotiming:
1640 begin = util.timer()
1640 begin = util.timer()
1641 renames = copies.pathcopies(base, parent)
1641 renames = copies.pathcopies(base, parent)
1642 end = util.timer()
1642 end = util.timer()
1643 # not very stable timing since we did only one run
1643 # not very stable timing since we did only one run
1644 data['time'] = end - begin
1644 data['time'] = end - begin
1645 data['nbrenamedfiles'] = len(renames)
1645 data['nbrenamedfiles'] = len(renames)
1646 fm.startitem()
1646 fm.startitem()
1647 fm.data(**data)
1647 fm.data(**data)
1648 out = data.copy()
1648 out = data.copy()
1649 out['source'] = fm.hexfunc(base.node())
1649 out['source'] = fm.hexfunc(base.node())
1650 out['destination'] = fm.hexfunc(parent.node())
1650 out['destination'] = fm.hexfunc(parent.node())
1651 fm.plain(output % out)
1651 fm.plain(output % out)
1652
1652
1653 fm.end()
1653 fm.end()
1654
1654
1655 @command(b'perfcca', formatteropts)
1655 @command(b'perfcca', formatteropts)
1656 def perfcca(ui, repo, **opts):
1656 def perfcca(ui, repo, **opts):
1657 opts = _byteskwargs(opts)
1657 opts = _byteskwargs(opts)
1658 timer, fm = gettimer(ui, opts)
1658 timer, fm = gettimer(ui, opts)
1659 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
1659 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
1660 fm.end()
1660 fm.end()
1661
1661
1662 @command(b'perffncacheload', formatteropts)
1662 @command(b'perffncacheload', formatteropts)
1663 def perffncacheload(ui, repo, **opts):
1663 def perffncacheload(ui, repo, **opts):
1664 opts = _byteskwargs(opts)
1664 opts = _byteskwargs(opts)
1665 timer, fm = gettimer(ui, opts)
1665 timer, fm = gettimer(ui, opts)
1666 s = repo.store
1666 s = repo.store
1667 def d():
1667 def d():
1668 s.fncache._load()
1668 s.fncache._load()
1669 timer(d)
1669 timer(d)
1670 fm.end()
1670 fm.end()
1671
1671
1672 @command(b'perffncachewrite', formatteropts)
1672 @command(b'perffncachewrite', formatteropts)
1673 def perffncachewrite(ui, repo, **opts):
1673 def perffncachewrite(ui, repo, **opts):
1674 opts = _byteskwargs(opts)
1674 opts = _byteskwargs(opts)
1675 timer, fm = gettimer(ui, opts)
1675 timer, fm = gettimer(ui, opts)
1676 s = repo.store
1676 s = repo.store
1677 lock = repo.lock()
1677 lock = repo.lock()
1678 s.fncache._load()
1678 s.fncache._load()
1679 tr = repo.transaction(b'perffncachewrite')
1679 tr = repo.transaction(b'perffncachewrite')
1680 tr.addbackup(b'fncache')
1680 tr.addbackup(b'fncache')
1681 def d():
1681 def d():
1682 s.fncache._dirty = True
1682 s.fncache._dirty = True
1683 s.fncache.write(tr)
1683 s.fncache.write(tr)
1684 timer(d)
1684 timer(d)
1685 tr.close()
1685 tr.close()
1686 lock.release()
1686 lock.release()
1687 fm.end()
1687 fm.end()
1688
1688
1689 @command(b'perffncacheencode', formatteropts)
1689 @command(b'perffncacheencode', formatteropts)
1690 def perffncacheencode(ui, repo, **opts):
1690 def perffncacheencode(ui, repo, **opts):
1691 opts = _byteskwargs(opts)
1691 opts = _byteskwargs(opts)
1692 timer, fm = gettimer(ui, opts)
1692 timer, fm = gettimer(ui, opts)
1693 s = repo.store
1693 s = repo.store
1694 s.fncache._load()
1694 s.fncache._load()
1695 def d():
1695 def d():
1696 for p in s.fncache.entries:
1696 for p in s.fncache.entries:
1697 s.encode(p)
1697 s.encode(p)
1698 timer(d)
1698 timer(d)
1699 fm.end()
1699 fm.end()
1700
1700
1701 def _bdiffworker(q, blocks, xdiff, ready, done):
1701 def _bdiffworker(q, blocks, xdiff, ready, done):
1702 while not done.is_set():
1702 while not done.is_set():
1703 pair = q.get()
1703 pair = q.get()
1704 while pair is not None:
1704 while pair is not None:
1705 if xdiff:
1705 if xdiff:
1706 mdiff.bdiff.xdiffblocks(*pair)
1706 mdiff.bdiff.xdiffblocks(*pair)
1707 elif blocks:
1707 elif blocks:
1708 mdiff.bdiff.blocks(*pair)
1708 mdiff.bdiff.blocks(*pair)
1709 else:
1709 else:
1710 mdiff.textdiff(*pair)
1710 mdiff.textdiff(*pair)
1711 q.task_done()
1711 q.task_done()
1712 pair = q.get()
1712 pair = q.get()
1713 q.task_done() # for the None one
1713 q.task_done() # for the None one
1714 with ready:
1714 with ready:
1715 ready.wait()
1715 ready.wait()
1716
1716
1717 def _manifestrevision(repo, mnode):
1717 def _manifestrevision(repo, mnode):
1718 ml = repo.manifestlog
1718 ml = repo.manifestlog
1719
1719
1720 if util.safehasattr(ml, b'getstorage'):
1720 if util.safehasattr(ml, b'getstorage'):
1721 store = ml.getstorage(b'')
1721 store = ml.getstorage(b'')
1722 else:
1722 else:
1723 store = ml._revlog
1723 store = ml._revlog
1724
1724
1725 return store.revision(mnode)
1725 return store.revision(mnode)
1726
1726
1727 @command(b'perfbdiff', revlogopts + formatteropts + [
1727 @command(b'perfbdiff', revlogopts + formatteropts + [
1728 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1728 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1729 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
1729 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
1730 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
1730 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
1731 (b'', b'blocks', False, b'test computing diffs into blocks'),
1731 (b'', b'blocks', False, b'test computing diffs into blocks'),
1732 (b'', b'xdiff', False, b'use xdiff algorithm'),
1732 (b'', b'xdiff', False, b'use xdiff algorithm'),
1733 ],
1733 ],
1734
1734
1735 b'-c|-m|FILE REV')
1735 b'-c|-m|FILE REV')
1736 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
1736 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
1737 """benchmark a bdiff between revisions
1737 """benchmark a bdiff between revisions
1738
1738
1739 By default, benchmark a bdiff between its delta parent and itself.
1739 By default, benchmark a bdiff between its delta parent and itself.
1740
1740
1741 With ``--count``, benchmark bdiffs between delta parents and self for N
1741 With ``--count``, benchmark bdiffs between delta parents and self for N
1742 revisions starting at the specified revision.
1742 revisions starting at the specified revision.
1743
1743
1744 With ``--alldata``, assume the requested revision is a changeset and
1744 With ``--alldata``, assume the requested revision is a changeset and
1745 measure bdiffs for all changes related to that changeset (manifest
1745 measure bdiffs for all changes related to that changeset (manifest
1746 and filelogs).
1746 and filelogs).
1747 """
1747 """
1748 opts = _byteskwargs(opts)
1748 opts = _byteskwargs(opts)
1749
1749
1750 if opts[b'xdiff'] and not opts[b'blocks']:
1750 if opts[b'xdiff'] and not opts[b'blocks']:
1751 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
1751 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
1752
1752
1753 if opts[b'alldata']:
1753 if opts[b'alldata']:
1754 opts[b'changelog'] = True
1754 opts[b'changelog'] = True
1755
1755
1756 if opts.get(b'changelog') or opts.get(b'manifest'):
1756 if opts.get(b'changelog') or opts.get(b'manifest'):
1757 file_, rev = None, file_
1757 file_, rev = None, file_
1758 elif rev is None:
1758 elif rev is None:
1759 raise error.CommandError(b'perfbdiff', b'invalid arguments')
1759 raise error.CommandError(b'perfbdiff', b'invalid arguments')
1760
1760
1761 blocks = opts[b'blocks']
1761 blocks = opts[b'blocks']
1762 xdiff = opts[b'xdiff']
1762 xdiff = opts[b'xdiff']
1763 textpairs = []
1763 textpairs = []
1764
1764
1765 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
1765 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
1766
1766
1767 startrev = r.rev(r.lookup(rev))
1767 startrev = r.rev(r.lookup(rev))
1768 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1768 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1769 if opts[b'alldata']:
1769 if opts[b'alldata']:
1770 # Load revisions associated with changeset.
1770 # Load revisions associated with changeset.
1771 ctx = repo[rev]
1771 ctx = repo[rev]
1772 mtext = _manifestrevision(repo, ctx.manifestnode())
1772 mtext = _manifestrevision(repo, ctx.manifestnode())
1773 for pctx in ctx.parents():
1773 for pctx in ctx.parents():
1774 pman = _manifestrevision(repo, pctx.manifestnode())
1774 pman = _manifestrevision(repo, pctx.manifestnode())
1775 textpairs.append((pman, mtext))
1775 textpairs.append((pman, mtext))
1776
1776
1777 # Load filelog revisions by iterating manifest delta.
1777 # Load filelog revisions by iterating manifest delta.
1778 man = ctx.manifest()
1778 man = ctx.manifest()
1779 pman = ctx.p1().manifest()
1779 pman = ctx.p1().manifest()
1780 for filename, change in pman.diff(man).items():
1780 for filename, change in pman.diff(man).items():
1781 fctx = repo.file(filename)
1781 fctx = repo.file(filename)
1782 f1 = fctx.revision(change[0][0] or -1)
1782 f1 = fctx.revision(change[0][0] or -1)
1783 f2 = fctx.revision(change[1][0] or -1)
1783 f2 = fctx.revision(change[1][0] or -1)
1784 textpairs.append((f1, f2))
1784 textpairs.append((f1, f2))
1785 else:
1785 else:
1786 dp = r.deltaparent(rev)
1786 dp = r.deltaparent(rev)
1787 textpairs.append((r.revision(dp), r.revision(rev)))
1787 textpairs.append((r.revision(dp), r.revision(rev)))
1788
1788
1789 withthreads = threads > 0
1789 withthreads = threads > 0
1790 if not withthreads:
1790 if not withthreads:
1791 def d():
1791 def d():
1792 for pair in textpairs:
1792 for pair in textpairs:
1793 if xdiff:
1793 if xdiff:
1794 mdiff.bdiff.xdiffblocks(*pair)
1794 mdiff.bdiff.xdiffblocks(*pair)
1795 elif blocks:
1795 elif blocks:
1796 mdiff.bdiff.blocks(*pair)
1796 mdiff.bdiff.blocks(*pair)
1797 else:
1797 else:
1798 mdiff.textdiff(*pair)
1798 mdiff.textdiff(*pair)
1799 else:
1799 else:
1800 q = queue()
1800 q = queue()
1801 for i in _xrange(threads):
1801 for i in _xrange(threads):
1802 q.put(None)
1802 q.put(None)
1803 ready = threading.Condition()
1803 ready = threading.Condition()
1804 done = threading.Event()
1804 done = threading.Event()
1805 for i in _xrange(threads):
1805 for i in _xrange(threads):
1806 threading.Thread(target=_bdiffworker,
1806 threading.Thread(target=_bdiffworker,
1807 args=(q, blocks, xdiff, ready, done)).start()
1807 args=(q, blocks, xdiff, ready, done)).start()
1808 q.join()
1808 q.join()
1809 def d():
1809 def d():
1810 for pair in textpairs:
1810 for pair in textpairs:
1811 q.put(pair)
1811 q.put(pair)
1812 for i in _xrange(threads):
1812 for i in _xrange(threads):
1813 q.put(None)
1813 q.put(None)
1814 with ready:
1814 with ready:
1815 ready.notify_all()
1815 ready.notify_all()
1816 q.join()
1816 q.join()
1817 timer, fm = gettimer(ui, opts)
1817 timer, fm = gettimer(ui, opts)
1818 timer(d)
1818 timer(d)
1819 fm.end()
1819 fm.end()
1820
1820
1821 if withthreads:
1821 if withthreads:
1822 done.set()
1822 done.set()
1823 for i in _xrange(threads):
1823 for i in _xrange(threads):
1824 q.put(None)
1824 q.put(None)
1825 with ready:
1825 with ready:
1826 ready.notify_all()
1826 ready.notify_all()
1827
1827
1828 @command(b'perfunidiff', revlogopts + formatteropts + [
1828 @command(b'perfunidiff', revlogopts + formatteropts + [
1829 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1829 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1830 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
1830 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
1831 ], b'-c|-m|FILE REV')
1831 ], b'-c|-m|FILE REV')
1832 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1832 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1833 """benchmark a unified diff between revisions
1833 """benchmark a unified diff between revisions
1834
1834
1835 This doesn't include any copy tracing - it's just a unified diff
1835 This doesn't include any copy tracing - it's just a unified diff
1836 of the texts.
1836 of the texts.
1837
1837
1838 By default, benchmark a diff between its delta parent and itself.
1838 By default, benchmark a diff between its delta parent and itself.
1839
1839
1840 With ``--count``, benchmark diffs between delta parents and self for N
1840 With ``--count``, benchmark diffs between delta parents and self for N
1841 revisions starting at the specified revision.
1841 revisions starting at the specified revision.
1842
1842
1843 With ``--alldata``, assume the requested revision is a changeset and
1843 With ``--alldata``, assume the requested revision is a changeset and
1844 measure diffs for all changes related to that changeset (manifest
1844 measure diffs for all changes related to that changeset (manifest
1845 and filelogs).
1845 and filelogs).
1846 """
1846 """
1847 opts = _byteskwargs(opts)
1847 opts = _byteskwargs(opts)
1848 if opts[b'alldata']:
1848 if opts[b'alldata']:
1849 opts[b'changelog'] = True
1849 opts[b'changelog'] = True
1850
1850
1851 if opts.get(b'changelog') or opts.get(b'manifest'):
1851 if opts.get(b'changelog') or opts.get(b'manifest'):
1852 file_, rev = None, file_
1852 file_, rev = None, file_
1853 elif rev is None:
1853 elif rev is None:
1854 raise error.CommandError(b'perfunidiff', b'invalid arguments')
1854 raise error.CommandError(b'perfunidiff', b'invalid arguments')
1855
1855
1856 textpairs = []
1856 textpairs = []
1857
1857
1858 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
1858 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
1859
1859
1860 startrev = r.rev(r.lookup(rev))
1860 startrev = r.rev(r.lookup(rev))
1861 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1861 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1862 if opts[b'alldata']:
1862 if opts[b'alldata']:
1863 # Load revisions associated with changeset.
1863 # Load revisions associated with changeset.
1864 ctx = repo[rev]
1864 ctx = repo[rev]
1865 mtext = _manifestrevision(repo, ctx.manifestnode())
1865 mtext = _manifestrevision(repo, ctx.manifestnode())
1866 for pctx in ctx.parents():
1866 for pctx in ctx.parents():
1867 pman = _manifestrevision(repo, pctx.manifestnode())
1867 pman = _manifestrevision(repo, pctx.manifestnode())
1868 textpairs.append((pman, mtext))
1868 textpairs.append((pman, mtext))
1869
1869
1870 # Load filelog revisions by iterating manifest delta.
1870 # Load filelog revisions by iterating manifest delta.
1871 man = ctx.manifest()
1871 man = ctx.manifest()
1872 pman = ctx.p1().manifest()
1872 pman = ctx.p1().manifest()
1873 for filename, change in pman.diff(man).items():
1873 for filename, change in pman.diff(man).items():
1874 fctx = repo.file(filename)
1874 fctx = repo.file(filename)
1875 f1 = fctx.revision(change[0][0] or -1)
1875 f1 = fctx.revision(change[0][0] or -1)
1876 f2 = fctx.revision(change[1][0] or -1)
1876 f2 = fctx.revision(change[1][0] or -1)
1877 textpairs.append((f1, f2))
1877 textpairs.append((f1, f2))
1878 else:
1878 else:
1879 dp = r.deltaparent(rev)
1879 dp = r.deltaparent(rev)
1880 textpairs.append((r.revision(dp), r.revision(rev)))
1880 textpairs.append((r.revision(dp), r.revision(rev)))
1881
1881
1882 def d():
1882 def d():
1883 for left, right in textpairs:
1883 for left, right in textpairs:
1884 # The date strings don't matter, so we pass empty strings.
1884 # The date strings don't matter, so we pass empty strings.
1885 headerlines, hunks = mdiff.unidiff(
1885 headerlines, hunks = mdiff.unidiff(
1886 left, b'', right, b'', b'left', b'right', binary=False)
1886 left, b'', right, b'', b'left', b'right', binary=False)
1887 # consume iterators in roughly the way patch.py does
1887 # consume iterators in roughly the way patch.py does
1888 b'\n'.join(headerlines)
1888 b'\n'.join(headerlines)
1889 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1889 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1890 timer, fm = gettimer(ui, opts)
1890 timer, fm = gettimer(ui, opts)
1891 timer(d)
1891 timer(d)
1892 fm.end()
1892 fm.end()
1893
1893
1894 @command(b'perfdiffwd', formatteropts)
1894 @command(b'perfdiffwd', formatteropts)
1895 def perfdiffwd(ui, repo, **opts):
1895 def perfdiffwd(ui, repo, **opts):
1896 """Profile diff of working directory changes"""
1896 """Profile diff of working directory changes"""
1897 opts = _byteskwargs(opts)
1897 opts = _byteskwargs(opts)
1898 timer, fm = gettimer(ui, opts)
1898 timer, fm = gettimer(ui, opts)
1899 options = {
1899 options = {
1900 'w': 'ignore_all_space',
1900 'w': 'ignore_all_space',
1901 'b': 'ignore_space_change',
1901 'b': 'ignore_space_change',
1902 'B': 'ignore_blank_lines',
1902 'B': 'ignore_blank_lines',
1903 }
1903 }
1904
1904
1905 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1905 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1906 opts = dict((options[c], b'1') for c in diffopt)
1906 opts = dict((options[c], b'1') for c in diffopt)
1907 def d():
1907 def d():
1908 ui.pushbuffer()
1908 ui.pushbuffer()
1909 commands.diff(ui, repo, **opts)
1909 commands.diff(ui, repo, **opts)
1910 ui.popbuffer()
1910 ui.popbuffer()
1911 diffopt = diffopt.encode('ascii')
1911 diffopt = diffopt.encode('ascii')
1912 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
1912 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
1913 timer(d, title=title)
1913 timer(d, title=title)
1914 fm.end()
1914 fm.end()
1915
1915
1916 @command(b'perfrevlogindex', revlogopts + formatteropts,
1916 @command(b'perfrevlogindex', revlogopts + formatteropts,
1917 b'-c|-m|FILE')
1917 b'-c|-m|FILE')
1918 def perfrevlogindex(ui, repo, file_=None, **opts):
1918 def perfrevlogindex(ui, repo, file_=None, **opts):
1919 """Benchmark operations against a revlog index.
1919 """Benchmark operations against a revlog index.
1920
1920
1921 This tests constructing a revlog instance, reading index data,
1921 This tests constructing a revlog instance, reading index data,
1922 parsing index data, and performing various operations related to
1922 parsing index data, and performing various operations related to
1923 index data.
1923 index data.
1924 """
1924 """
1925
1925
1926 opts = _byteskwargs(opts)
1926 opts = _byteskwargs(opts)
1927
1927
1928 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
1928 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
1929
1929
1930 opener = getattr(rl, 'opener') # trick linter
1930 opener = getattr(rl, 'opener') # trick linter
1931 indexfile = rl.indexfile
1931 indexfile = rl.indexfile
1932 data = opener.read(indexfile)
1932 data = opener.read(indexfile)
1933
1933
1934 header = struct.unpack(b'>I', data[0:4])[0]
1934 header = struct.unpack(b'>I', data[0:4])[0]
1935 version = header & 0xFFFF
1935 version = header & 0xFFFF
1936 if version == 1:
1936 if version == 1:
1937 revlogio = revlog.revlogio()
1937 revlogio = revlog.revlogio()
1938 inline = header & (1 << 16)
1938 inline = header & (1 << 16)
1939 else:
1939 else:
1940 raise error.Abort((b'unsupported revlog version: %d') % version)
1940 raise error.Abort((b'unsupported revlog version: %d') % version)
1941
1941
1942 rllen = len(rl)
1942 rllen = len(rl)
1943
1943
1944 node0 = rl.node(0)
1944 node0 = rl.node(0)
1945 node25 = rl.node(rllen // 4)
1945 node25 = rl.node(rllen // 4)
1946 node50 = rl.node(rllen // 2)
1946 node50 = rl.node(rllen // 2)
1947 node75 = rl.node(rllen // 4 * 3)
1947 node75 = rl.node(rllen // 4 * 3)
1948 node100 = rl.node(rllen - 1)
1948 node100 = rl.node(rllen - 1)
1949
1949
1950 allrevs = range(rllen)
1950 allrevs = range(rllen)
1951 allrevsrev = list(reversed(allrevs))
1951 allrevsrev = list(reversed(allrevs))
1952 allnodes = [rl.node(rev) for rev in range(rllen)]
1952 allnodes = [rl.node(rev) for rev in range(rllen)]
1953 allnodesrev = list(reversed(allnodes))
1953 allnodesrev = list(reversed(allnodes))
1954
1954
1955 def constructor():
1955 def constructor():
1956 revlog.revlog(opener, indexfile)
1956 revlog.revlog(opener, indexfile)
1957
1957
1958 def read():
1958 def read():
1959 with opener(indexfile) as fh:
1959 with opener(indexfile) as fh:
1960 fh.read()
1960 fh.read()
1961
1961
1962 def parseindex():
1962 def parseindex():
1963 revlogio.parseindex(data, inline)
1963 revlogio.parseindex(data, inline)
1964
1964
1965 def getentry(revornode):
1965 def getentry(revornode):
1966 index = revlogio.parseindex(data, inline)[0]
1966 index = revlogio.parseindex(data, inline)[0]
1967 index[revornode]
1967 index[revornode]
1968
1968
1969 def getentries(revs, count=1):
1969 def getentries(revs, count=1):
1970 index = revlogio.parseindex(data, inline)[0]
1970 index = revlogio.parseindex(data, inline)[0]
1971
1971
1972 for i in range(count):
1972 for i in range(count):
1973 for rev in revs:
1973 for rev in revs:
1974 index[rev]
1974 index[rev]
1975
1975
1976 def resolvenode(node):
1976 def resolvenode(node):
1977 nodemap = revlogio.parseindex(data, inline)[1]
1977 nodemap = revlogio.parseindex(data, inline)[1]
1978 # This only works for the C code.
1978 # This only works for the C code.
1979 if nodemap is None:
1979 if nodemap is None:
1980 return
1980 return
1981
1981
1982 try:
1982 try:
1983 nodemap[node]
1983 nodemap[node]
1984 except error.RevlogError:
1984 except error.RevlogError:
1985 pass
1985 pass
1986
1986
1987 def resolvenodes(nodes, count=1):
1987 def resolvenodes(nodes, count=1):
1988 nodemap = revlogio.parseindex(data, inline)[1]
1988 nodemap = revlogio.parseindex(data, inline)[1]
1989 if nodemap is None:
1989 if nodemap is None:
1990 return
1990 return
1991
1991
1992 for i in range(count):
1992 for i in range(count):
1993 for node in nodes:
1993 for node in nodes:
1994 try:
1994 try:
1995 nodemap[node]
1995 nodemap[node]
1996 except error.RevlogError:
1996 except error.RevlogError:
1997 pass
1997 pass
1998
1998
1999 benches = [
1999 benches = [
2000 (constructor, b'revlog constructor'),
2000 (constructor, b'revlog constructor'),
2001 (read, b'read'),
2001 (read, b'read'),
2002 (parseindex, b'create index object'),
2002 (parseindex, b'create index object'),
2003 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2003 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2004 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2004 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2005 (lambda: resolvenode(node0), b'look up node at rev 0'),
2005 (lambda: resolvenode(node0), b'look up node at rev 0'),
2006 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2006 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2007 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2007 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2008 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2008 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2009 (lambda: resolvenode(node100), b'look up node at tip'),
2009 (lambda: resolvenode(node100), b'look up node at tip'),
2010 # 2x variation is to measure caching impact.
2010 # 2x variation is to measure caching impact.
2011 (lambda: resolvenodes(allnodes),
2011 (lambda: resolvenodes(allnodes),
2012 b'look up all nodes (forward)'),
2012 b'look up all nodes (forward)'),
2013 (lambda: resolvenodes(allnodes, 2),
2013 (lambda: resolvenodes(allnodes, 2),
2014 b'look up all nodes 2x (forward)'),
2014 b'look up all nodes 2x (forward)'),
2015 (lambda: resolvenodes(allnodesrev),
2015 (lambda: resolvenodes(allnodesrev),
2016 b'look up all nodes (reverse)'),
2016 b'look up all nodes (reverse)'),
2017 (lambda: resolvenodes(allnodesrev, 2),
2017 (lambda: resolvenodes(allnodesrev, 2),
2018 b'look up all nodes 2x (reverse)'),
2018 b'look up all nodes 2x (reverse)'),
2019 (lambda: getentries(allrevs),
2019 (lambda: getentries(allrevs),
2020 b'retrieve all index entries (forward)'),
2020 b'retrieve all index entries (forward)'),
2021 (lambda: getentries(allrevs, 2),
2021 (lambda: getentries(allrevs, 2),
2022 b'retrieve all index entries 2x (forward)'),
2022 b'retrieve all index entries 2x (forward)'),
2023 (lambda: getentries(allrevsrev),
2023 (lambda: getentries(allrevsrev),
2024 b'retrieve all index entries (reverse)'),
2024 b'retrieve all index entries (reverse)'),
2025 (lambda: getentries(allrevsrev, 2),
2025 (lambda: getentries(allrevsrev, 2),
2026 b'retrieve all index entries 2x (reverse)'),
2026 b'retrieve all index entries 2x (reverse)'),
2027 ]
2027 ]
2028
2028
2029 for fn, title in benches:
2029 for fn, title in benches:
2030 timer, fm = gettimer(ui, opts)
2030 timer, fm = gettimer(ui, opts)
2031 timer(fn, title=title)
2031 timer(fn, title=title)
2032 fm.end()
2032 fm.end()
2033
2033
2034 @command(b'perfrevlogrevisions', revlogopts + formatteropts +
2034 @command(b'perfrevlogrevisions', revlogopts + formatteropts +
2035 [(b'd', b'dist', 100, b'distance between the revisions'),
2035 [(b'd', b'dist', 100, b'distance between the revisions'),
2036 (b's', b'startrev', 0, b'revision to start reading at'),
2036 (b's', b'startrev', 0, b'revision to start reading at'),
2037 (b'', b'reverse', False, b'read in reverse')],
2037 (b'', b'reverse', False, b'read in reverse')],
2038 b'-c|-m|FILE')
2038 b'-c|-m|FILE')
2039 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
2039 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
2040 **opts):
2040 **opts):
2041 """Benchmark reading a series of revisions from a revlog.
2041 """Benchmark reading a series of revisions from a revlog.
2042
2042
2043 By default, we read every ``-d/--dist`` revision from 0 to tip of
2043 By default, we read every ``-d/--dist`` revision from 0 to tip of
2044 the specified revlog.
2044 the specified revlog.
2045
2045
2046 The start revision can be defined via ``-s/--startrev``.
2046 The start revision can be defined via ``-s/--startrev``.
2047 """
2047 """
2048 opts = _byteskwargs(opts)
2048 opts = _byteskwargs(opts)
2049
2049
2050 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2050 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2051 rllen = getlen(ui)(rl)
2051 rllen = getlen(ui)(rl)
2052
2052
2053 if startrev < 0:
2053 if startrev < 0:
2054 startrev = rllen + startrev
2054 startrev = rllen + startrev
2055
2055
2056 def d():
2056 def d():
2057 rl.clearcaches()
2057 rl.clearcaches()
2058
2058
2059 beginrev = startrev
2059 beginrev = startrev
2060 endrev = rllen
2060 endrev = rllen
2061 dist = opts[b'dist']
2061 dist = opts[b'dist']
2062
2062
2063 if reverse:
2063 if reverse:
2064 beginrev, endrev = endrev - 1, beginrev - 1
2064 beginrev, endrev = endrev - 1, beginrev - 1
2065 dist = -1 * dist
2065 dist = -1 * dist
2066
2066
2067 for x in _xrange(beginrev, endrev, dist):
2067 for x in _xrange(beginrev, endrev, dist):
2068 # Old revisions don't support passing int.
2068 # Old revisions don't support passing int.
2069 n = rl.node(x)
2069 n = rl.node(x)
2070 rl.revision(n)
2070 rl.revision(n)
2071
2071
2072 timer, fm = gettimer(ui, opts)
2072 timer, fm = gettimer(ui, opts)
2073 timer(d)
2073 timer(d)
2074 fm.end()
2074 fm.end()
2075
2075
2076 @command(b'perfrevlogwrite', revlogopts + formatteropts +
2076 @command(b'perfrevlogwrite', revlogopts + formatteropts +
2077 [(b's', b'startrev', 1000, b'revision to start writing at'),
2077 [(b's', b'startrev', 1000, b'revision to start writing at'),
2078 (b'', b'stoprev', -1, b'last revision to write'),
2078 (b'', b'stoprev', -1, b'last revision to write'),
2079 (b'', b'count', 3, b'number of passes to perform'),
2079 (b'', b'count', 3, b'number of passes to perform'),
2080 (b'', b'details', False, b'print timing for every revisions tested'),
2080 (b'', b'details', False, b'print timing for every revisions tested'),
2081 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2081 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2082 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2082 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2083 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2083 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2084 ],
2084 ],
2085 b'-c|-m|FILE')
2085 b'-c|-m|FILE')
2086 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2086 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2087 """Benchmark writing a series of revisions to a revlog.
2087 """Benchmark writing a series of revisions to a revlog.
2088
2088
2089 Possible source values are:
2089 Possible source values are:
2090 * `full`: add from a full text (default).
2090 * `full`: add from a full text (default).
2091 * `parent-1`: add from a delta to the first parent
2091 * `parent-1`: add from a delta to the first parent
2092 * `parent-2`: add from a delta to the second parent if it exists
2092 * `parent-2`: add from a delta to the second parent if it exists
2093 (use a delta from the first parent otherwise)
2093 (use a delta from the first parent otherwise)
2094 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2094 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2095 * `storage`: add from the existing precomputed deltas
2095 * `storage`: add from the existing precomputed deltas
2096
2096
2097 Note: This performance command measures performance in a custom way. As a
2097 Note: This performance command measures performance in a custom way. As a
2098 result some of the global configuration of the 'perf' command does not
2098 result some of the global configuration of the 'perf' command does not
2099 apply to it:
2099 apply to it:
2100
2100
2101 * ``pre-run``: disabled
2101 * ``pre-run``: disabled
2102
2102
2103 * ``profile-benchmark``: disabled
2103 * ``profile-benchmark``: disabled
2104
2104
2105 * ``run-limits``: disabled use --count instead
2105 * ``run-limits``: disabled use --count instead
2106 """
2106 """
2107 opts = _byteskwargs(opts)
2107 opts = _byteskwargs(opts)
2108
2108
2109 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2109 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2110 rllen = getlen(ui)(rl)
2110 rllen = getlen(ui)(rl)
2111 if startrev < 0:
2111 if startrev < 0:
2112 startrev = rllen + startrev
2112 startrev = rllen + startrev
2113 if stoprev < 0:
2113 if stoprev < 0:
2114 stoprev = rllen + stoprev
2114 stoprev = rllen + stoprev
2115
2115
2116 lazydeltabase = opts['lazydeltabase']
2116 lazydeltabase = opts['lazydeltabase']
2117 source = opts['source']
2117 source = opts['source']
2118 clearcaches = opts['clear_caches']
2118 clearcaches = opts['clear_caches']
2119 validsource = (b'full', b'parent-1', b'parent-2', b'parent-smallest',
2119 validsource = (b'full', b'parent-1', b'parent-2', b'parent-smallest',
2120 b'storage')
2120 b'storage')
2121 if source not in validsource:
2121 if source not in validsource:
2122 raise error.Abort('invalid source type: %s' % source)
2122 raise error.Abort('invalid source type: %s' % source)
2123
2123
2124 ### actually gather results
2124 ### actually gather results
2125 count = opts['count']
2125 count = opts['count']
2126 if count <= 0:
2126 if count <= 0:
2127 raise error.Abort('invalide run count: %d' % count)
2127 raise error.Abort('invalide run count: %d' % count)
2128 allresults = []
2128 allresults = []
2129 for c in range(count):
2129 for c in range(count):
2130 timing = _timeonewrite(ui, rl, source, startrev, stoprev, c + 1,
2130 timing = _timeonewrite(ui, rl, source, startrev, stoprev, c + 1,
2131 lazydeltabase=lazydeltabase,
2131 lazydeltabase=lazydeltabase,
2132 clearcaches=clearcaches)
2132 clearcaches=clearcaches)
2133 allresults.append(timing)
2133 allresults.append(timing)
2134
2134
2135 ### consolidate the results in a single list
2135 ### consolidate the results in a single list
2136 results = []
2136 results = []
2137 for idx, (rev, t) in enumerate(allresults[0]):
2137 for idx, (rev, t) in enumerate(allresults[0]):
2138 ts = [t]
2138 ts = [t]
2139 for other in allresults[1:]:
2139 for other in allresults[1:]:
2140 orev, ot = other[idx]
2140 orev, ot = other[idx]
2141 assert orev == rev
2141 assert orev == rev
2142 ts.append(ot)
2142 ts.append(ot)
2143 results.append((rev, ts))
2143 results.append((rev, ts))
2144 resultcount = len(results)
2144 resultcount = len(results)
2145
2145
2146 ### Compute and display relevant statistics
2146 ### Compute and display relevant statistics
2147
2147
2148 # get a formatter
2148 # get a formatter
2149 fm = ui.formatter(b'perf', opts)
2149 fm = ui.formatter(b'perf', opts)
2150 displayall = ui.configbool(b"perf", b"all-timing", False)
2150 displayall = ui.configbool(b"perf", b"all-timing", False)
2151
2151
2152 # print individual details if requested
2152 # print individual details if requested
2153 if opts['details']:
2153 if opts['details']:
2154 for idx, item in enumerate(results, 1):
2154 for idx, item in enumerate(results, 1):
2155 rev, data = item
2155 rev, data = item
2156 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2156 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2157 formatone(fm, data, title=title, displayall=displayall)
2157 formatone(fm, data, title=title, displayall=displayall)
2158
2158
2159 # sorts results by median time
2159 # sorts results by median time
2160 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2160 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2161 # list of (name, index) to display)
2161 # list of (name, index) to display)
2162 relevants = [
2162 relevants = [
2163 ("min", 0),
2163 ("min", 0),
2164 ("10%", resultcount * 10 // 100),
2164 ("10%", resultcount * 10 // 100),
2165 ("25%", resultcount * 25 // 100),
2165 ("25%", resultcount * 25 // 100),
2166 ("50%", resultcount * 70 // 100),
2166 ("50%", resultcount * 70 // 100),
2167 ("75%", resultcount * 75 // 100),
2167 ("75%", resultcount * 75 // 100),
2168 ("90%", resultcount * 90 // 100),
2168 ("90%", resultcount * 90 // 100),
2169 ("95%", resultcount * 95 // 100),
2169 ("95%", resultcount * 95 // 100),
2170 ("99%", resultcount * 99 // 100),
2170 ("99%", resultcount * 99 // 100),
2171 ("99.9%", resultcount * 999 // 1000),
2171 ("99.9%", resultcount * 999 // 1000),
2172 ("99.99%", resultcount * 9999 // 10000),
2172 ("99.99%", resultcount * 9999 // 10000),
2173 ("99.999%", resultcount * 99999 // 100000),
2173 ("99.999%", resultcount * 99999 // 100000),
2174 ("max", -1),
2174 ("max", -1),
2175 ]
2175 ]
2176 if not ui.quiet:
2176 if not ui.quiet:
2177 for name, idx in relevants:
2177 for name, idx in relevants:
2178 data = results[idx]
2178 data = results[idx]
2179 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2179 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2180 formatone(fm, data[1], title=title, displayall=displayall)
2180 formatone(fm, data[1], title=title, displayall=displayall)
2181
2181
2182 # XXX summing that many float will not be very precise, we ignore this fact
2182 # XXX summing that many float will not be very precise, we ignore this fact
2183 # for now
2183 # for now
2184 totaltime = []
2184 totaltime = []
2185 for item in allresults:
2185 for item in allresults:
2186 totaltime.append((sum(x[1][0] for x in item),
2186 totaltime.append((sum(x[1][0] for x in item),
2187 sum(x[1][1] for x in item),
2187 sum(x[1][1] for x in item),
2188 sum(x[1][2] for x in item),)
2188 sum(x[1][2] for x in item),)
2189 )
2189 )
2190 formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
2190 formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
2191 displayall=displayall)
2191 displayall=displayall)
2192 fm.end()
2192 fm.end()
2193
2193
2194 class _faketr(object):
2194 class _faketr(object):
2195 def add(s, x, y, z=None):
2195 def add(s, x, y, z=None):
2196 return None
2196 return None
2197
2197
2198 def _timeonewrite(ui, orig, source, startrev, stoprev, runidx=None,
2198 def _timeonewrite(ui, orig, source, startrev, stoprev, runidx=None,
2199 lazydeltabase=True, clearcaches=True):
2199 lazydeltabase=True, clearcaches=True):
2200 timings = []
2200 timings = []
2201 tr = _faketr()
2201 tr = _faketr()
2202 with _temprevlog(ui, orig, startrev) as dest:
2202 with _temprevlog(ui, orig, startrev) as dest:
2203 dest._lazydeltabase = lazydeltabase
2203 dest._lazydeltabase = lazydeltabase
2204 revs = list(orig.revs(startrev, stoprev))
2204 revs = list(orig.revs(startrev, stoprev))
2205 total = len(revs)
2205 total = len(revs)
2206 topic = 'adding'
2206 topic = 'adding'
2207 if runidx is not None:
2207 if runidx is not None:
2208 topic += ' (run #%d)' % runidx
2208 topic += ' (run #%d)' % runidx
2209 # Support both old and new progress API
2209 # Support both old and new progress API
2210 if util.safehasattr(ui, 'makeprogress'):
2210 if util.safehasattr(ui, 'makeprogress'):
2211 progress = ui.makeprogress(topic, unit='revs', total=total)
2211 progress = ui.makeprogress(topic, unit='revs', total=total)
2212 def updateprogress(pos):
2212 def updateprogress(pos):
2213 progress.update(pos)
2213 progress.update(pos)
2214 def completeprogress():
2214 def completeprogress():
2215 progress.complete()
2215 progress.complete()
2216 else:
2216 else:
2217 def updateprogress(pos):
2217 def updateprogress(pos):
2218 ui.progress(topic, pos, unit='revs', total=total)
2218 ui.progress(topic, pos, unit='revs', total=total)
2219 def completeprogress():
2219 def completeprogress():
2220 ui.progress(topic, None, unit='revs', total=total)
2220 ui.progress(topic, None, unit='revs', total=total)
2221
2221
2222 for idx, rev in enumerate(revs):
2222 for idx, rev in enumerate(revs):
2223 updateprogress(idx)
2223 updateprogress(idx)
2224 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2224 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2225 if clearcaches:
2225 if clearcaches:
2226 dest.index.clearcaches()
2226 dest.index.clearcaches()
2227 dest.clearcaches()
2227 dest.clearcaches()
2228 with timeone() as r:
2228 with timeone() as r:
2229 dest.addrawrevision(*addargs, **addkwargs)
2229 dest.addrawrevision(*addargs, **addkwargs)
2230 timings.append((rev, r[0]))
2230 timings.append((rev, r[0]))
2231 updateprogress(total)
2231 updateprogress(total)
2232 completeprogress()
2232 completeprogress()
2233 return timings
2233 return timings
2234
2234
2235 def _getrevisionseed(orig, rev, tr, source):
2235 def _getrevisionseed(orig, rev, tr, source):
2236 from mercurial.node import nullid
2236 from mercurial.node import nullid
2237
2237
2238 linkrev = orig.linkrev(rev)
2238 linkrev = orig.linkrev(rev)
2239 node = orig.node(rev)
2239 node = orig.node(rev)
2240 p1, p2 = orig.parents(node)
2240 p1, p2 = orig.parents(node)
2241 flags = orig.flags(rev)
2241 flags = orig.flags(rev)
2242 cachedelta = None
2242 cachedelta = None
2243 text = None
2243 text = None
2244
2244
2245 if source == b'full':
2245 if source == b'full':
2246 text = orig.revision(rev)
2246 text = orig.revision(rev)
2247 elif source == b'parent-1':
2247 elif source == b'parent-1':
2248 baserev = orig.rev(p1)
2248 baserev = orig.rev(p1)
2249 cachedelta = (baserev, orig.revdiff(p1, rev))
2249 cachedelta = (baserev, orig.revdiff(p1, rev))
2250 elif source == b'parent-2':
2250 elif source == b'parent-2':
2251 parent = p2
2251 parent = p2
2252 if p2 == nullid:
2252 if p2 == nullid:
2253 parent = p1
2253 parent = p1
2254 baserev = orig.rev(parent)
2254 baserev = orig.rev(parent)
2255 cachedelta = (baserev, orig.revdiff(parent, rev))
2255 cachedelta = (baserev, orig.revdiff(parent, rev))
2256 elif source == b'parent-smallest':
2256 elif source == b'parent-smallest':
2257 p1diff = orig.revdiff(p1, rev)
2257 p1diff = orig.revdiff(p1, rev)
2258 parent = p1
2258 parent = p1
2259 diff = p1diff
2259 diff = p1diff
2260 if p2 != nullid:
2260 if p2 != nullid:
2261 p2diff = orig.revdiff(p2, rev)
2261 p2diff = orig.revdiff(p2, rev)
2262 if len(p1diff) > len(p2diff):
2262 if len(p1diff) > len(p2diff):
2263 parent = p2
2263 parent = p2
2264 diff = p2diff
2264 diff = p2diff
2265 baserev = orig.rev(parent)
2265 baserev = orig.rev(parent)
2266 cachedelta = (baserev, diff)
2266 cachedelta = (baserev, diff)
2267 elif source == b'storage':
2267 elif source == b'storage':
2268 baserev = orig.deltaparent(rev)
2268 baserev = orig.deltaparent(rev)
2269 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
2269 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
2270
2270
2271 return ((text, tr, linkrev, p1, p2),
2271 return ((text, tr, linkrev, p1, p2),
2272 {'node': node, 'flags': flags, 'cachedelta': cachedelta})
2272 {'node': node, 'flags': flags, 'cachedelta': cachedelta})
2273
2273
2274 @contextlib.contextmanager
2274 @contextlib.contextmanager
2275 def _temprevlog(ui, orig, truncaterev):
2275 def _temprevlog(ui, orig, truncaterev):
2276 from mercurial import vfs as vfsmod
2276 from mercurial import vfs as vfsmod
2277
2277
2278 if orig._inline:
2278 if orig._inline:
2279 raise error.Abort('not supporting inline revlog (yet)')
2279 raise error.Abort('not supporting inline revlog (yet)')
2280 revlogkwargs = {}
2281 k = 'upperboundcomp'
2282 if util.safehasattr(orig, k):
2283 revlogkwargs[k] = getattr(orig, k)
2280
2284
2281 origindexpath = orig.opener.join(orig.indexfile)
2285 origindexpath = orig.opener.join(orig.indexfile)
2282 origdatapath = orig.opener.join(orig.datafile)
2286 origdatapath = orig.opener.join(orig.datafile)
2283 indexname = 'revlog.i'
2287 indexname = 'revlog.i'
2284 dataname = 'revlog.d'
2288 dataname = 'revlog.d'
2285
2289
2286 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
2290 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
2287 try:
2291 try:
2288 # copy the data file in a temporary directory
2292 # copy the data file in a temporary directory
2289 ui.debug('copying data in %s\n' % tmpdir)
2293 ui.debug('copying data in %s\n' % tmpdir)
2290 destindexpath = os.path.join(tmpdir, 'revlog.i')
2294 destindexpath = os.path.join(tmpdir, 'revlog.i')
2291 destdatapath = os.path.join(tmpdir, 'revlog.d')
2295 destdatapath = os.path.join(tmpdir, 'revlog.d')
2292 shutil.copyfile(origindexpath, destindexpath)
2296 shutil.copyfile(origindexpath, destindexpath)
2293 shutil.copyfile(origdatapath, destdatapath)
2297 shutil.copyfile(origdatapath, destdatapath)
2294
2298
2295 # remove the data we want to add again
2299 # remove the data we want to add again
2296 ui.debug('truncating data to be rewritten\n')
2300 ui.debug('truncating data to be rewritten\n')
2297 with open(destindexpath, 'ab') as index:
2301 with open(destindexpath, 'ab') as index:
2298 index.seek(0)
2302 index.seek(0)
2299 index.truncate(truncaterev * orig._io.size)
2303 index.truncate(truncaterev * orig._io.size)
2300 with open(destdatapath, 'ab') as data:
2304 with open(destdatapath, 'ab') as data:
2301 data.seek(0)
2305 data.seek(0)
2302 data.truncate(orig.start(truncaterev))
2306 data.truncate(orig.start(truncaterev))
2303
2307
2304 # instantiate a new revlog from the temporary copy
2308 # instantiate a new revlog from the temporary copy
2305 ui.debug('truncating adding to be rewritten\n')
2309 ui.debug('truncating adding to be rewritten\n')
2306 vfs = vfsmod.vfs(tmpdir)
2310 vfs = vfsmod.vfs(tmpdir)
2307 vfs.options = getattr(orig.opener, 'options', None)
2311 vfs.options = getattr(orig.opener, 'options', None)
2308
2312
2309 dest = revlog.revlog(vfs,
2313 dest = revlog.revlog(vfs,
2310 indexfile=indexname,
2314 indexfile=indexname,
2311 datafile=dataname)
2315 datafile=dataname, **revlogkwargs)
2312 if dest._inline:
2316 if dest._inline:
2313 raise error.Abort('not supporting inline revlog (yet)')
2317 raise error.Abort('not supporting inline revlog (yet)')
2314 # make sure internals are initialized
2318 # make sure internals are initialized
2315 dest.revision(len(dest) - 1)
2319 dest.revision(len(dest) - 1)
2316 yield dest
2320 yield dest
2317 del dest, vfs
2321 del dest, vfs
2318 finally:
2322 finally:
2319 shutil.rmtree(tmpdir, True)
2323 shutil.rmtree(tmpdir, True)
2320
2324
2321 @command(b'perfrevlogchunks', revlogopts + formatteropts +
2325 @command(b'perfrevlogchunks', revlogopts + formatteropts +
2322 [(b'e', b'engines', b'', b'compression engines to use'),
2326 [(b'e', b'engines', b'', b'compression engines to use'),
2323 (b's', b'startrev', 0, b'revision to start at')],
2327 (b's', b'startrev', 0, b'revision to start at')],
2324 b'-c|-m|FILE')
2328 b'-c|-m|FILE')
2325 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
2329 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
2326 """Benchmark operations on revlog chunks.
2330 """Benchmark operations on revlog chunks.
2327
2331
2328 Logically, each revlog is a collection of fulltext revisions. However,
2332 Logically, each revlog is a collection of fulltext revisions. However,
2329 stored within each revlog are "chunks" of possibly compressed data. This
2333 stored within each revlog are "chunks" of possibly compressed data. This
2330 data needs to be read and decompressed or compressed and written.
2334 data needs to be read and decompressed or compressed and written.
2331
2335
2332 This command measures the time it takes to read+decompress and recompress
2336 This command measures the time it takes to read+decompress and recompress
2333 chunks in a revlog. It effectively isolates I/O and compression performance.
2337 chunks in a revlog. It effectively isolates I/O and compression performance.
2334 For measurements of higher-level operations like resolving revisions,
2338 For measurements of higher-level operations like resolving revisions,
2335 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
2339 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
2336 """
2340 """
2337 opts = _byteskwargs(opts)
2341 opts = _byteskwargs(opts)
2338
2342
2339 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
2343 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
2340
2344
2341 # _chunkraw was renamed to _getsegmentforrevs.
2345 # _chunkraw was renamed to _getsegmentforrevs.
2342 try:
2346 try:
2343 segmentforrevs = rl._getsegmentforrevs
2347 segmentforrevs = rl._getsegmentforrevs
2344 except AttributeError:
2348 except AttributeError:
2345 segmentforrevs = rl._chunkraw
2349 segmentforrevs = rl._chunkraw
2346
2350
2347 # Verify engines argument.
2351 # Verify engines argument.
2348 if engines:
2352 if engines:
2349 engines = set(e.strip() for e in engines.split(b','))
2353 engines = set(e.strip() for e in engines.split(b','))
2350 for engine in engines:
2354 for engine in engines:
2351 try:
2355 try:
2352 util.compressionengines[engine]
2356 util.compressionengines[engine]
2353 except KeyError:
2357 except KeyError:
2354 raise error.Abort(b'unknown compression engine: %s' % engine)
2358 raise error.Abort(b'unknown compression engine: %s' % engine)
2355 else:
2359 else:
2356 engines = []
2360 engines = []
2357 for e in util.compengines:
2361 for e in util.compengines:
2358 engine = util.compengines[e]
2362 engine = util.compengines[e]
2359 try:
2363 try:
2360 if engine.available():
2364 if engine.available():
2361 engine.revlogcompressor().compress(b'dummy')
2365 engine.revlogcompressor().compress(b'dummy')
2362 engines.append(e)
2366 engines.append(e)
2363 except NotImplementedError:
2367 except NotImplementedError:
2364 pass
2368 pass
2365
2369
2366 revs = list(rl.revs(startrev, len(rl) - 1))
2370 revs = list(rl.revs(startrev, len(rl) - 1))
2367
2371
2368 def rlfh(rl):
2372 def rlfh(rl):
2369 if rl._inline:
2373 if rl._inline:
2370 return getsvfs(repo)(rl.indexfile)
2374 return getsvfs(repo)(rl.indexfile)
2371 else:
2375 else:
2372 return getsvfs(repo)(rl.datafile)
2376 return getsvfs(repo)(rl.datafile)
2373
2377
2374 def doread():
2378 def doread():
2375 rl.clearcaches()
2379 rl.clearcaches()
2376 for rev in revs:
2380 for rev in revs:
2377 segmentforrevs(rev, rev)
2381 segmentforrevs(rev, rev)
2378
2382
2379 def doreadcachedfh():
2383 def doreadcachedfh():
2380 rl.clearcaches()
2384 rl.clearcaches()
2381 fh = rlfh(rl)
2385 fh = rlfh(rl)
2382 for rev in revs:
2386 for rev in revs:
2383 segmentforrevs(rev, rev, df=fh)
2387 segmentforrevs(rev, rev, df=fh)
2384
2388
2385 def doreadbatch():
2389 def doreadbatch():
2386 rl.clearcaches()
2390 rl.clearcaches()
2387 segmentforrevs(revs[0], revs[-1])
2391 segmentforrevs(revs[0], revs[-1])
2388
2392
2389 def doreadbatchcachedfh():
2393 def doreadbatchcachedfh():
2390 rl.clearcaches()
2394 rl.clearcaches()
2391 fh = rlfh(rl)
2395 fh = rlfh(rl)
2392 segmentforrevs(revs[0], revs[-1], df=fh)
2396 segmentforrevs(revs[0], revs[-1], df=fh)
2393
2397
2394 def dochunk():
2398 def dochunk():
2395 rl.clearcaches()
2399 rl.clearcaches()
2396 fh = rlfh(rl)
2400 fh = rlfh(rl)
2397 for rev in revs:
2401 for rev in revs:
2398 rl._chunk(rev, df=fh)
2402 rl._chunk(rev, df=fh)
2399
2403
2400 chunks = [None]
2404 chunks = [None]
2401
2405
2402 def dochunkbatch():
2406 def dochunkbatch():
2403 rl.clearcaches()
2407 rl.clearcaches()
2404 fh = rlfh(rl)
2408 fh = rlfh(rl)
2405 # Save chunks as a side-effect.
2409 # Save chunks as a side-effect.
2406 chunks[0] = rl._chunks(revs, df=fh)
2410 chunks[0] = rl._chunks(revs, df=fh)
2407
2411
2408 def docompress(compressor):
2412 def docompress(compressor):
2409 rl.clearcaches()
2413 rl.clearcaches()
2410
2414
2411 try:
2415 try:
2412 # Swap in the requested compression engine.
2416 # Swap in the requested compression engine.
2413 oldcompressor = rl._compressor
2417 oldcompressor = rl._compressor
2414 rl._compressor = compressor
2418 rl._compressor = compressor
2415 for chunk in chunks[0]:
2419 for chunk in chunks[0]:
2416 rl.compress(chunk)
2420 rl.compress(chunk)
2417 finally:
2421 finally:
2418 rl._compressor = oldcompressor
2422 rl._compressor = oldcompressor
2419
2423
2420 benches = [
2424 benches = [
2421 (lambda: doread(), b'read'),
2425 (lambda: doread(), b'read'),
2422 (lambda: doreadcachedfh(), b'read w/ reused fd'),
2426 (lambda: doreadcachedfh(), b'read w/ reused fd'),
2423 (lambda: doreadbatch(), b'read batch'),
2427 (lambda: doreadbatch(), b'read batch'),
2424 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
2428 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
2425 (lambda: dochunk(), b'chunk'),
2429 (lambda: dochunk(), b'chunk'),
2426 (lambda: dochunkbatch(), b'chunk batch'),
2430 (lambda: dochunkbatch(), b'chunk batch'),
2427 ]
2431 ]
2428
2432
2429 for engine in sorted(engines):
2433 for engine in sorted(engines):
2430 compressor = util.compengines[engine].revlogcompressor()
2434 compressor = util.compengines[engine].revlogcompressor()
2431 benches.append((functools.partial(docompress, compressor),
2435 benches.append((functools.partial(docompress, compressor),
2432 b'compress w/ %s' % engine))
2436 b'compress w/ %s' % engine))
2433
2437
2434 for fn, title in benches:
2438 for fn, title in benches:
2435 timer, fm = gettimer(ui, opts)
2439 timer, fm = gettimer(ui, opts)
2436 timer(fn, title=title)
2440 timer(fn, title=title)
2437 fm.end()
2441 fm.end()
2438
2442
2439 @command(b'perfrevlogrevision', revlogopts + formatteropts +
2443 @command(b'perfrevlogrevision', revlogopts + formatteropts +
2440 [(b'', b'cache', False, b'use caches instead of clearing')],
2444 [(b'', b'cache', False, b'use caches instead of clearing')],
2441 b'-c|-m|FILE REV')
2445 b'-c|-m|FILE REV')
2442 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
2446 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
2443 """Benchmark obtaining a revlog revision.
2447 """Benchmark obtaining a revlog revision.
2444
2448
2445 Obtaining a revlog revision consists of roughly the following steps:
2449 Obtaining a revlog revision consists of roughly the following steps:
2446
2450
2447 1. Compute the delta chain
2451 1. Compute the delta chain
2448 2. Slice the delta chain if applicable
2452 2. Slice the delta chain if applicable
2449 3. Obtain the raw chunks for that delta chain
2453 3. Obtain the raw chunks for that delta chain
2450 4. Decompress each raw chunk
2454 4. Decompress each raw chunk
2451 5. Apply binary patches to obtain fulltext
2455 5. Apply binary patches to obtain fulltext
2452 6. Verify hash of fulltext
2456 6. Verify hash of fulltext
2453
2457
2454 This command measures the time spent in each of these phases.
2458 This command measures the time spent in each of these phases.
2455 """
2459 """
2456 opts = _byteskwargs(opts)
2460 opts = _byteskwargs(opts)
2457
2461
2458 if opts.get(b'changelog') or opts.get(b'manifest'):
2462 if opts.get(b'changelog') or opts.get(b'manifest'):
2459 file_, rev = None, file_
2463 file_, rev = None, file_
2460 elif rev is None:
2464 elif rev is None:
2461 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
2465 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
2462
2466
2463 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
2467 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
2464
2468
2465 # _chunkraw was renamed to _getsegmentforrevs.
2469 # _chunkraw was renamed to _getsegmentforrevs.
2466 try:
2470 try:
2467 segmentforrevs = r._getsegmentforrevs
2471 segmentforrevs = r._getsegmentforrevs
2468 except AttributeError:
2472 except AttributeError:
2469 segmentforrevs = r._chunkraw
2473 segmentforrevs = r._chunkraw
2470
2474
2471 node = r.lookup(rev)
2475 node = r.lookup(rev)
2472 rev = r.rev(node)
2476 rev = r.rev(node)
2473
2477
2474 def getrawchunks(data, chain):
2478 def getrawchunks(data, chain):
2475 start = r.start
2479 start = r.start
2476 length = r.length
2480 length = r.length
2477 inline = r._inline
2481 inline = r._inline
2478 iosize = r._io.size
2482 iosize = r._io.size
2479 buffer = util.buffer
2483 buffer = util.buffer
2480
2484
2481 chunks = []
2485 chunks = []
2482 ladd = chunks.append
2486 ladd = chunks.append
2483 for idx, item in enumerate(chain):
2487 for idx, item in enumerate(chain):
2484 offset = start(item[0])
2488 offset = start(item[0])
2485 bits = data[idx]
2489 bits = data[idx]
2486 for rev in item:
2490 for rev in item:
2487 chunkstart = start(rev)
2491 chunkstart = start(rev)
2488 if inline:
2492 if inline:
2489 chunkstart += (rev + 1) * iosize
2493 chunkstart += (rev + 1) * iosize
2490 chunklength = length(rev)
2494 chunklength = length(rev)
2491 ladd(buffer(bits, chunkstart - offset, chunklength))
2495 ladd(buffer(bits, chunkstart - offset, chunklength))
2492
2496
2493 return chunks
2497 return chunks
2494
2498
2495 def dodeltachain(rev):
2499 def dodeltachain(rev):
2496 if not cache:
2500 if not cache:
2497 r.clearcaches()
2501 r.clearcaches()
2498 r._deltachain(rev)
2502 r._deltachain(rev)
2499
2503
2500 def doread(chain):
2504 def doread(chain):
2501 if not cache:
2505 if not cache:
2502 r.clearcaches()
2506 r.clearcaches()
2503 for item in slicedchain:
2507 for item in slicedchain:
2504 segmentforrevs(item[0], item[-1])
2508 segmentforrevs(item[0], item[-1])
2505
2509
2506 def doslice(r, chain, size):
2510 def doslice(r, chain, size):
2507 for s in slicechunk(r, chain, targetsize=size):
2511 for s in slicechunk(r, chain, targetsize=size):
2508 pass
2512 pass
2509
2513
2510 def dorawchunks(data, chain):
2514 def dorawchunks(data, chain):
2511 if not cache:
2515 if not cache:
2512 r.clearcaches()
2516 r.clearcaches()
2513 getrawchunks(data, chain)
2517 getrawchunks(data, chain)
2514
2518
2515 def dodecompress(chunks):
2519 def dodecompress(chunks):
2516 decomp = r.decompress
2520 decomp = r.decompress
2517 for chunk in chunks:
2521 for chunk in chunks:
2518 decomp(chunk)
2522 decomp(chunk)
2519
2523
2520 def dopatch(text, bins):
2524 def dopatch(text, bins):
2521 if not cache:
2525 if not cache:
2522 r.clearcaches()
2526 r.clearcaches()
2523 mdiff.patches(text, bins)
2527 mdiff.patches(text, bins)
2524
2528
2525 def dohash(text):
2529 def dohash(text):
2526 if not cache:
2530 if not cache:
2527 r.clearcaches()
2531 r.clearcaches()
2528 r.checkhash(text, node, rev=rev)
2532 r.checkhash(text, node, rev=rev)
2529
2533
2530 def dorevision():
2534 def dorevision():
2531 if not cache:
2535 if not cache:
2532 r.clearcaches()
2536 r.clearcaches()
2533 r.revision(node)
2537 r.revision(node)
2534
2538
2535 try:
2539 try:
2536 from mercurial.revlogutils.deltas import slicechunk
2540 from mercurial.revlogutils.deltas import slicechunk
2537 except ImportError:
2541 except ImportError:
2538 slicechunk = getattr(revlog, '_slicechunk', None)
2542 slicechunk = getattr(revlog, '_slicechunk', None)
2539
2543
2540 size = r.length(rev)
2544 size = r.length(rev)
2541 chain = r._deltachain(rev)[0]
2545 chain = r._deltachain(rev)[0]
2542 if not getattr(r, '_withsparseread', False):
2546 if not getattr(r, '_withsparseread', False):
2543 slicedchain = (chain,)
2547 slicedchain = (chain,)
2544 else:
2548 else:
2545 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
2549 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
2546 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
2550 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
2547 rawchunks = getrawchunks(data, slicedchain)
2551 rawchunks = getrawchunks(data, slicedchain)
2548 bins = r._chunks(chain)
2552 bins = r._chunks(chain)
2549 text = bytes(bins[0])
2553 text = bytes(bins[0])
2550 bins = bins[1:]
2554 bins = bins[1:]
2551 text = mdiff.patches(text, bins)
2555 text = mdiff.patches(text, bins)
2552
2556
2553 benches = [
2557 benches = [
2554 (lambda: dorevision(), b'full'),
2558 (lambda: dorevision(), b'full'),
2555 (lambda: dodeltachain(rev), b'deltachain'),
2559 (lambda: dodeltachain(rev), b'deltachain'),
2556 (lambda: doread(chain), b'read'),
2560 (lambda: doread(chain), b'read'),
2557 ]
2561 ]
2558
2562
2559 if getattr(r, '_withsparseread', False):
2563 if getattr(r, '_withsparseread', False):
2560 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
2564 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
2561 benches.append(slicing)
2565 benches.append(slicing)
2562
2566
2563 benches.extend([
2567 benches.extend([
2564 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
2568 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
2565 (lambda: dodecompress(rawchunks), b'decompress'),
2569 (lambda: dodecompress(rawchunks), b'decompress'),
2566 (lambda: dopatch(text, bins), b'patch'),
2570 (lambda: dopatch(text, bins), b'patch'),
2567 (lambda: dohash(text), b'hash'),
2571 (lambda: dohash(text), b'hash'),
2568 ])
2572 ])
2569
2573
2570 timer, fm = gettimer(ui, opts)
2574 timer, fm = gettimer(ui, opts)
2571 for fn, title in benches:
2575 for fn, title in benches:
2572 timer(fn, title=title)
2576 timer(fn, title=title)
2573 fm.end()
2577 fm.end()
2574
2578
2575 @command(b'perfrevset',
2579 @command(b'perfrevset',
2576 [(b'C', b'clear', False, b'clear volatile cache between each call.'),
2580 [(b'C', b'clear', False, b'clear volatile cache between each call.'),
2577 (b'', b'contexts', False, b'obtain changectx for each revision')]
2581 (b'', b'contexts', False, b'obtain changectx for each revision')]
2578 + formatteropts, b"REVSET")
2582 + formatteropts, b"REVSET")
2579 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
2583 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
2580 """benchmark the execution time of a revset
2584 """benchmark the execution time of a revset
2581
2585
2582 Use the --clean option if need to evaluate the impact of build volatile
2586 Use the --clean option if need to evaluate the impact of build volatile
2583 revisions set cache on the revset execution. Volatile cache hold filtered
2587 revisions set cache on the revset execution. Volatile cache hold filtered
2584 and obsolete related cache."""
2588 and obsolete related cache."""
2585 opts = _byteskwargs(opts)
2589 opts = _byteskwargs(opts)
2586
2590
2587 timer, fm = gettimer(ui, opts)
2591 timer, fm = gettimer(ui, opts)
2588 def d():
2592 def d():
2589 if clear:
2593 if clear:
2590 repo.invalidatevolatilesets()
2594 repo.invalidatevolatilesets()
2591 if contexts:
2595 if contexts:
2592 for ctx in repo.set(expr): pass
2596 for ctx in repo.set(expr): pass
2593 else:
2597 else:
2594 for r in repo.revs(expr): pass
2598 for r in repo.revs(expr): pass
2595 timer(d)
2599 timer(d)
2596 fm.end()
2600 fm.end()
2597
2601
2598 @command(b'perfvolatilesets',
2602 @command(b'perfvolatilesets',
2599 [(b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
2603 [(b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
2600 ] + formatteropts)
2604 ] + formatteropts)
2601 def perfvolatilesets(ui, repo, *names, **opts):
2605 def perfvolatilesets(ui, repo, *names, **opts):
2602 """benchmark the computation of various volatile set
2606 """benchmark the computation of various volatile set
2603
2607
2604 Volatile set computes element related to filtering and obsolescence."""
2608 Volatile set computes element related to filtering and obsolescence."""
2605 opts = _byteskwargs(opts)
2609 opts = _byteskwargs(opts)
2606 timer, fm = gettimer(ui, opts)
2610 timer, fm = gettimer(ui, opts)
2607 repo = repo.unfiltered()
2611 repo = repo.unfiltered()
2608
2612
2609 def getobs(name):
2613 def getobs(name):
2610 def d():
2614 def d():
2611 repo.invalidatevolatilesets()
2615 repo.invalidatevolatilesets()
2612 if opts[b'clear_obsstore']:
2616 if opts[b'clear_obsstore']:
2613 clearfilecache(repo, b'obsstore')
2617 clearfilecache(repo, b'obsstore')
2614 obsolete.getrevs(repo, name)
2618 obsolete.getrevs(repo, name)
2615 return d
2619 return d
2616
2620
2617 allobs = sorted(obsolete.cachefuncs)
2621 allobs = sorted(obsolete.cachefuncs)
2618 if names:
2622 if names:
2619 allobs = [n for n in allobs if n in names]
2623 allobs = [n for n in allobs if n in names]
2620
2624
2621 for name in allobs:
2625 for name in allobs:
2622 timer(getobs(name), title=name)
2626 timer(getobs(name), title=name)
2623
2627
2624 def getfiltered(name):
2628 def getfiltered(name):
2625 def d():
2629 def d():
2626 repo.invalidatevolatilesets()
2630 repo.invalidatevolatilesets()
2627 if opts[b'clear_obsstore']:
2631 if opts[b'clear_obsstore']:
2628 clearfilecache(repo, b'obsstore')
2632 clearfilecache(repo, b'obsstore')
2629 repoview.filterrevs(repo, name)
2633 repoview.filterrevs(repo, name)
2630 return d
2634 return d
2631
2635
2632 allfilter = sorted(repoview.filtertable)
2636 allfilter = sorted(repoview.filtertable)
2633 if names:
2637 if names:
2634 allfilter = [n for n in allfilter if n in names]
2638 allfilter = [n for n in allfilter if n in names]
2635
2639
2636 for name in allfilter:
2640 for name in allfilter:
2637 timer(getfiltered(name), title=name)
2641 timer(getfiltered(name), title=name)
2638 fm.end()
2642 fm.end()
2639
2643
2640 @command(b'perfbranchmap',
2644 @command(b'perfbranchmap',
2641 [(b'f', b'full', False,
2645 [(b'f', b'full', False,
2642 b'Includes build time of subset'),
2646 b'Includes build time of subset'),
2643 (b'', b'clear-revbranch', False,
2647 (b'', b'clear-revbranch', False,
2644 b'purge the revbranch cache between computation'),
2648 b'purge the revbranch cache between computation'),
2645 ] + formatteropts)
2649 ] + formatteropts)
2646 def perfbranchmap(ui, repo, *filternames, **opts):
2650 def perfbranchmap(ui, repo, *filternames, **opts):
2647 """benchmark the update of a branchmap
2651 """benchmark the update of a branchmap
2648
2652
2649 This benchmarks the full repo.branchmap() call with read and write disabled
2653 This benchmarks the full repo.branchmap() call with read and write disabled
2650 """
2654 """
2651 opts = _byteskwargs(opts)
2655 opts = _byteskwargs(opts)
2652 full = opts.get(b"full", False)
2656 full = opts.get(b"full", False)
2653 clear_revbranch = opts.get(b"clear_revbranch", False)
2657 clear_revbranch = opts.get(b"clear_revbranch", False)
2654 timer, fm = gettimer(ui, opts)
2658 timer, fm = gettimer(ui, opts)
2655 def getbranchmap(filtername):
2659 def getbranchmap(filtername):
2656 """generate a benchmark function for the filtername"""
2660 """generate a benchmark function for the filtername"""
2657 if filtername is None:
2661 if filtername is None:
2658 view = repo
2662 view = repo
2659 else:
2663 else:
2660 view = repo.filtered(filtername)
2664 view = repo.filtered(filtername)
2661 if util.safehasattr(view._branchcaches, '_per_filter'):
2665 if util.safehasattr(view._branchcaches, '_per_filter'):
2662 filtered = view._branchcaches._per_filter
2666 filtered = view._branchcaches._per_filter
2663 else:
2667 else:
2664 # older versions
2668 # older versions
2665 filtered = view._branchcaches
2669 filtered = view._branchcaches
2666 def d():
2670 def d():
2667 if clear_revbranch:
2671 if clear_revbranch:
2668 repo.revbranchcache()._clear()
2672 repo.revbranchcache()._clear()
2669 if full:
2673 if full:
2670 view._branchcaches.clear()
2674 view._branchcaches.clear()
2671 else:
2675 else:
2672 filtered.pop(filtername, None)
2676 filtered.pop(filtername, None)
2673 view.branchmap()
2677 view.branchmap()
2674 return d
2678 return d
2675 # add filter in smaller subset to bigger subset
2679 # add filter in smaller subset to bigger subset
2676 possiblefilters = set(repoview.filtertable)
2680 possiblefilters = set(repoview.filtertable)
2677 if filternames:
2681 if filternames:
2678 possiblefilters &= set(filternames)
2682 possiblefilters &= set(filternames)
2679 subsettable = getbranchmapsubsettable()
2683 subsettable = getbranchmapsubsettable()
2680 allfilters = []
2684 allfilters = []
2681 while possiblefilters:
2685 while possiblefilters:
2682 for name in possiblefilters:
2686 for name in possiblefilters:
2683 subset = subsettable.get(name)
2687 subset = subsettable.get(name)
2684 if subset not in possiblefilters:
2688 if subset not in possiblefilters:
2685 break
2689 break
2686 else:
2690 else:
2687 assert False, b'subset cycle %s!' % possiblefilters
2691 assert False, b'subset cycle %s!' % possiblefilters
2688 allfilters.append(name)
2692 allfilters.append(name)
2689 possiblefilters.remove(name)
2693 possiblefilters.remove(name)
2690
2694
2691 # warm the cache
2695 # warm the cache
2692 if not full:
2696 if not full:
2693 for name in allfilters:
2697 for name in allfilters:
2694 repo.filtered(name).branchmap()
2698 repo.filtered(name).branchmap()
2695 if not filternames or b'unfiltered' in filternames:
2699 if not filternames or b'unfiltered' in filternames:
2696 # add unfiltered
2700 # add unfiltered
2697 allfilters.append(None)
2701 allfilters.append(None)
2698
2702
2699 if util.safehasattr(branchmap.branchcache, 'fromfile'):
2703 if util.safehasattr(branchmap.branchcache, 'fromfile'):
2700 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
2704 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
2701 branchcacheread.set(classmethod(lambda *args: None))
2705 branchcacheread.set(classmethod(lambda *args: None))
2702 else:
2706 else:
2703 # older versions
2707 # older versions
2704 branchcacheread = safeattrsetter(branchmap, b'read')
2708 branchcacheread = safeattrsetter(branchmap, b'read')
2705 branchcacheread.set(lambda *args: None)
2709 branchcacheread.set(lambda *args: None)
2706 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
2710 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
2707 branchcachewrite.set(lambda *args: None)
2711 branchcachewrite.set(lambda *args: None)
2708 try:
2712 try:
2709 for name in allfilters:
2713 for name in allfilters:
2710 printname = name
2714 printname = name
2711 if name is None:
2715 if name is None:
2712 printname = b'unfiltered'
2716 printname = b'unfiltered'
2713 timer(getbranchmap(name), title=str(printname))
2717 timer(getbranchmap(name), title=str(printname))
2714 finally:
2718 finally:
2715 branchcacheread.restore()
2719 branchcacheread.restore()
2716 branchcachewrite.restore()
2720 branchcachewrite.restore()
2717 fm.end()
2721 fm.end()
2718
2722
2719 @command(b'perfbranchmapupdate', [
2723 @command(b'perfbranchmapupdate', [
2720 (b'', b'base', [], b'subset of revision to start from'),
2724 (b'', b'base', [], b'subset of revision to start from'),
2721 (b'', b'target', [], b'subset of revision to end with'),
2725 (b'', b'target', [], b'subset of revision to end with'),
2722 (b'', b'clear-caches', False, b'clear cache between each runs')
2726 (b'', b'clear-caches', False, b'clear cache between each runs')
2723 ] + formatteropts)
2727 ] + formatteropts)
2724 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
2728 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
2725 """benchmark branchmap update from for <base> revs to <target> revs
2729 """benchmark branchmap update from for <base> revs to <target> revs
2726
2730
2727 If `--clear-caches` is passed, the following items will be reset before
2731 If `--clear-caches` is passed, the following items will be reset before
2728 each update:
2732 each update:
2729 * the changelog instance and associated indexes
2733 * the changelog instance and associated indexes
2730 * the rev-branch-cache instance
2734 * the rev-branch-cache instance
2731
2735
2732 Examples:
2736 Examples:
2733
2737
2734 # update for the one last revision
2738 # update for the one last revision
2735 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
2739 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
2736
2740
2737 $ update for change coming with a new branch
2741 $ update for change coming with a new branch
2738 $ hg perfbranchmapupdate --base 'stable' --target 'default'
2742 $ hg perfbranchmapupdate --base 'stable' --target 'default'
2739 """
2743 """
2740 from mercurial import branchmap
2744 from mercurial import branchmap
2741 from mercurial import repoview
2745 from mercurial import repoview
2742 opts = _byteskwargs(opts)
2746 opts = _byteskwargs(opts)
2743 timer, fm = gettimer(ui, opts)
2747 timer, fm = gettimer(ui, opts)
2744 clearcaches = opts[b'clear_caches']
2748 clearcaches = opts[b'clear_caches']
2745 unfi = repo.unfiltered()
2749 unfi = repo.unfiltered()
2746 x = [None] # used to pass data between closure
2750 x = [None] # used to pass data between closure
2747
2751
2748 # we use a `list` here to avoid possible side effect from smartset
2752 # we use a `list` here to avoid possible side effect from smartset
2749 baserevs = list(scmutil.revrange(repo, base))
2753 baserevs = list(scmutil.revrange(repo, base))
2750 targetrevs = list(scmutil.revrange(repo, target))
2754 targetrevs = list(scmutil.revrange(repo, target))
2751 if not baserevs:
2755 if not baserevs:
2752 raise error.Abort(b'no revisions selected for --base')
2756 raise error.Abort(b'no revisions selected for --base')
2753 if not targetrevs:
2757 if not targetrevs:
2754 raise error.Abort(b'no revisions selected for --target')
2758 raise error.Abort(b'no revisions selected for --target')
2755
2759
2756 # make sure the target branchmap also contains the one in the base
2760 # make sure the target branchmap also contains the one in the base
2757 targetrevs = list(set(baserevs) | set(targetrevs))
2761 targetrevs = list(set(baserevs) | set(targetrevs))
2758 targetrevs.sort()
2762 targetrevs.sort()
2759
2763
2760 cl = repo.changelog
2764 cl = repo.changelog
2761 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
2765 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
2762 allbaserevs.sort()
2766 allbaserevs.sort()
2763 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
2767 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
2764
2768
2765 newrevs = list(alltargetrevs.difference(allbaserevs))
2769 newrevs = list(alltargetrevs.difference(allbaserevs))
2766 newrevs.sort()
2770 newrevs.sort()
2767
2771
2768 allrevs = frozenset(unfi.changelog.revs())
2772 allrevs = frozenset(unfi.changelog.revs())
2769 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
2773 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
2770 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
2774 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
2771
2775
2772 def basefilter(repo, visibilityexceptions=None):
2776 def basefilter(repo, visibilityexceptions=None):
2773 return basefilterrevs
2777 return basefilterrevs
2774
2778
2775 def targetfilter(repo, visibilityexceptions=None):
2779 def targetfilter(repo, visibilityexceptions=None):
2776 return targetfilterrevs
2780 return targetfilterrevs
2777
2781
2778 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
2782 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
2779 ui.status(msg % (len(allbaserevs), len(newrevs)))
2783 ui.status(msg % (len(allbaserevs), len(newrevs)))
2780 if targetfilterrevs:
2784 if targetfilterrevs:
2781 msg = b'(%d revisions still filtered)\n'
2785 msg = b'(%d revisions still filtered)\n'
2782 ui.status(msg % len(targetfilterrevs))
2786 ui.status(msg % len(targetfilterrevs))
2783
2787
2784 try:
2788 try:
2785 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
2789 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
2786 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
2790 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
2787
2791
2788 baserepo = repo.filtered(b'__perf_branchmap_update_base')
2792 baserepo = repo.filtered(b'__perf_branchmap_update_base')
2789 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
2793 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
2790
2794
2791 # try to find an existing branchmap to reuse
2795 # try to find an existing branchmap to reuse
2792 subsettable = getbranchmapsubsettable()
2796 subsettable = getbranchmapsubsettable()
2793 candidatefilter = subsettable.get(None)
2797 candidatefilter = subsettable.get(None)
2794 while candidatefilter is not None:
2798 while candidatefilter is not None:
2795 candidatebm = repo.filtered(candidatefilter).branchmap()
2799 candidatebm = repo.filtered(candidatefilter).branchmap()
2796 if candidatebm.validfor(baserepo):
2800 if candidatebm.validfor(baserepo):
2797 filtered = repoview.filterrevs(repo, candidatefilter)
2801 filtered = repoview.filterrevs(repo, candidatefilter)
2798 missing = [r for r in allbaserevs if r in filtered]
2802 missing = [r for r in allbaserevs if r in filtered]
2799 base = candidatebm.copy()
2803 base = candidatebm.copy()
2800 base.update(baserepo, missing)
2804 base.update(baserepo, missing)
2801 break
2805 break
2802 candidatefilter = subsettable.get(candidatefilter)
2806 candidatefilter = subsettable.get(candidatefilter)
2803 else:
2807 else:
2804 # no suitable subset where found
2808 # no suitable subset where found
2805 base = branchmap.branchcache()
2809 base = branchmap.branchcache()
2806 base.update(baserepo, allbaserevs)
2810 base.update(baserepo, allbaserevs)
2807
2811
2808 def setup():
2812 def setup():
2809 x[0] = base.copy()
2813 x[0] = base.copy()
2810 if clearcaches:
2814 if clearcaches:
2811 unfi._revbranchcache = None
2815 unfi._revbranchcache = None
2812 clearchangelog(repo)
2816 clearchangelog(repo)
2813
2817
2814 def bench():
2818 def bench():
2815 x[0].update(targetrepo, newrevs)
2819 x[0].update(targetrepo, newrevs)
2816
2820
2817 timer(bench, setup=setup)
2821 timer(bench, setup=setup)
2818 fm.end()
2822 fm.end()
2819 finally:
2823 finally:
2820 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
2824 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
2821 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
2825 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
2822
2826
2823 @command(b'perfbranchmapload', [
2827 @command(b'perfbranchmapload', [
2824 (b'f', b'filter', b'', b'Specify repoview filter'),
2828 (b'f', b'filter', b'', b'Specify repoview filter'),
2825 (b'', b'list', False, b'List brachmap filter caches'),
2829 (b'', b'list', False, b'List brachmap filter caches'),
2826 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
2830 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
2827
2831
2828 ] + formatteropts)
2832 ] + formatteropts)
2829 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
2833 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
2830 """benchmark reading the branchmap"""
2834 """benchmark reading the branchmap"""
2831 opts = _byteskwargs(opts)
2835 opts = _byteskwargs(opts)
2832 clearrevlogs = opts[b'clear_revlogs']
2836 clearrevlogs = opts[b'clear_revlogs']
2833
2837
2834 if list:
2838 if list:
2835 for name, kind, st in repo.cachevfs.readdir(stat=True):
2839 for name, kind, st in repo.cachevfs.readdir(stat=True):
2836 if name.startswith(b'branch2'):
2840 if name.startswith(b'branch2'):
2837 filtername = name.partition(b'-')[2] or b'unfiltered'
2841 filtername = name.partition(b'-')[2] or b'unfiltered'
2838 ui.status(b'%s - %s\n'
2842 ui.status(b'%s - %s\n'
2839 % (filtername, util.bytecount(st.st_size)))
2843 % (filtername, util.bytecount(st.st_size)))
2840 return
2844 return
2841 if not filter:
2845 if not filter:
2842 filter = None
2846 filter = None
2843 subsettable = getbranchmapsubsettable()
2847 subsettable = getbranchmapsubsettable()
2844 if filter is None:
2848 if filter is None:
2845 repo = repo.unfiltered()
2849 repo = repo.unfiltered()
2846 else:
2850 else:
2847 repo = repoview.repoview(repo, filter)
2851 repo = repoview.repoview(repo, filter)
2848
2852
2849 repo.branchmap() # make sure we have a relevant, up to date branchmap
2853 repo.branchmap() # make sure we have a relevant, up to date branchmap
2850
2854
2851 try:
2855 try:
2852 fromfile = branchmap.branchcache.fromfile
2856 fromfile = branchmap.branchcache.fromfile
2853 except AttributeError:
2857 except AttributeError:
2854 # older versions
2858 # older versions
2855 fromfile = branchmap.read
2859 fromfile = branchmap.read
2856
2860
2857 currentfilter = filter
2861 currentfilter = filter
2858 # try once without timer, the filter may not be cached
2862 # try once without timer, the filter may not be cached
2859 while fromfile(repo) is None:
2863 while fromfile(repo) is None:
2860 currentfilter = subsettable.get(currentfilter)
2864 currentfilter = subsettable.get(currentfilter)
2861 if currentfilter is None:
2865 if currentfilter is None:
2862 raise error.Abort(b'No branchmap cached for %s repo'
2866 raise error.Abort(b'No branchmap cached for %s repo'
2863 % (filter or b'unfiltered'))
2867 % (filter or b'unfiltered'))
2864 repo = repo.filtered(currentfilter)
2868 repo = repo.filtered(currentfilter)
2865 timer, fm = gettimer(ui, opts)
2869 timer, fm = gettimer(ui, opts)
2866 def setup():
2870 def setup():
2867 if clearrevlogs:
2871 if clearrevlogs:
2868 clearchangelog(repo)
2872 clearchangelog(repo)
2869 def bench():
2873 def bench():
2870 fromfile(repo)
2874 fromfile(repo)
2871 timer(bench, setup=setup)
2875 timer(bench, setup=setup)
2872 fm.end()
2876 fm.end()
2873
2877
2874 @command(b'perfloadmarkers')
2878 @command(b'perfloadmarkers')
2875 def perfloadmarkers(ui, repo):
2879 def perfloadmarkers(ui, repo):
2876 """benchmark the time to parse the on-disk markers for a repo
2880 """benchmark the time to parse the on-disk markers for a repo
2877
2881
2878 Result is the number of markers in the repo."""
2882 Result is the number of markers in the repo."""
2879 timer, fm = gettimer(ui)
2883 timer, fm = gettimer(ui)
2880 svfs = getsvfs(repo)
2884 svfs = getsvfs(repo)
2881 timer(lambda: len(obsolete.obsstore(svfs)))
2885 timer(lambda: len(obsolete.obsstore(svfs)))
2882 fm.end()
2886 fm.end()
2883
2887
2884 @command(b'perflrucachedict', formatteropts +
2888 @command(b'perflrucachedict', formatteropts +
2885 [(b'', b'costlimit', 0, b'maximum total cost of items in cache'),
2889 [(b'', b'costlimit', 0, b'maximum total cost of items in cache'),
2886 (b'', b'mincost', 0, b'smallest cost of items in cache'),
2890 (b'', b'mincost', 0, b'smallest cost of items in cache'),
2887 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
2891 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
2888 (b'', b'size', 4, b'size of cache'),
2892 (b'', b'size', 4, b'size of cache'),
2889 (b'', b'gets', 10000, b'number of key lookups'),
2893 (b'', b'gets', 10000, b'number of key lookups'),
2890 (b'', b'sets', 10000, b'number of key sets'),
2894 (b'', b'sets', 10000, b'number of key sets'),
2891 (b'', b'mixed', 10000, b'number of mixed mode operations'),
2895 (b'', b'mixed', 10000, b'number of mixed mode operations'),
2892 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
2896 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
2893 norepo=True)
2897 norepo=True)
2894 def perflrucache(ui, mincost=0, maxcost=100, costlimit=0, size=4,
2898 def perflrucache(ui, mincost=0, maxcost=100, costlimit=0, size=4,
2895 gets=10000, sets=10000, mixed=10000, mixedgetfreq=50, **opts):
2899 gets=10000, sets=10000, mixed=10000, mixedgetfreq=50, **opts):
2896 opts = _byteskwargs(opts)
2900 opts = _byteskwargs(opts)
2897
2901
2898 def doinit():
2902 def doinit():
2899 for i in _xrange(10000):
2903 for i in _xrange(10000):
2900 util.lrucachedict(size)
2904 util.lrucachedict(size)
2901
2905
2902 costrange = list(range(mincost, maxcost + 1))
2906 costrange = list(range(mincost, maxcost + 1))
2903
2907
2904 values = []
2908 values = []
2905 for i in _xrange(size):
2909 for i in _xrange(size):
2906 values.append(random.randint(0, _maxint))
2910 values.append(random.randint(0, _maxint))
2907
2911
2908 # Get mode fills the cache and tests raw lookup performance with no
2912 # Get mode fills the cache and tests raw lookup performance with no
2909 # eviction.
2913 # eviction.
2910 getseq = []
2914 getseq = []
2911 for i in _xrange(gets):
2915 for i in _xrange(gets):
2912 getseq.append(random.choice(values))
2916 getseq.append(random.choice(values))
2913
2917
2914 def dogets():
2918 def dogets():
2915 d = util.lrucachedict(size)
2919 d = util.lrucachedict(size)
2916 for v in values:
2920 for v in values:
2917 d[v] = v
2921 d[v] = v
2918 for key in getseq:
2922 for key in getseq:
2919 value = d[key]
2923 value = d[key]
2920 value # silence pyflakes warning
2924 value # silence pyflakes warning
2921
2925
2922 def dogetscost():
2926 def dogetscost():
2923 d = util.lrucachedict(size, maxcost=costlimit)
2927 d = util.lrucachedict(size, maxcost=costlimit)
2924 for i, v in enumerate(values):
2928 for i, v in enumerate(values):
2925 d.insert(v, v, cost=costs[i])
2929 d.insert(v, v, cost=costs[i])
2926 for key in getseq:
2930 for key in getseq:
2927 try:
2931 try:
2928 value = d[key]
2932 value = d[key]
2929 value # silence pyflakes warning
2933 value # silence pyflakes warning
2930 except KeyError:
2934 except KeyError:
2931 pass
2935 pass
2932
2936
2933 # Set mode tests insertion speed with cache eviction.
2937 # Set mode tests insertion speed with cache eviction.
2934 setseq = []
2938 setseq = []
2935 costs = []
2939 costs = []
2936 for i in _xrange(sets):
2940 for i in _xrange(sets):
2937 setseq.append(random.randint(0, _maxint))
2941 setseq.append(random.randint(0, _maxint))
2938 costs.append(random.choice(costrange))
2942 costs.append(random.choice(costrange))
2939
2943
2940 def doinserts():
2944 def doinserts():
2941 d = util.lrucachedict(size)
2945 d = util.lrucachedict(size)
2942 for v in setseq:
2946 for v in setseq:
2943 d.insert(v, v)
2947 d.insert(v, v)
2944
2948
2945 def doinsertscost():
2949 def doinsertscost():
2946 d = util.lrucachedict(size, maxcost=costlimit)
2950 d = util.lrucachedict(size, maxcost=costlimit)
2947 for i, v in enumerate(setseq):
2951 for i, v in enumerate(setseq):
2948 d.insert(v, v, cost=costs[i])
2952 d.insert(v, v, cost=costs[i])
2949
2953
2950 def dosets():
2954 def dosets():
2951 d = util.lrucachedict(size)
2955 d = util.lrucachedict(size)
2952 for v in setseq:
2956 for v in setseq:
2953 d[v] = v
2957 d[v] = v
2954
2958
2955 # Mixed mode randomly performs gets and sets with eviction.
2959 # Mixed mode randomly performs gets and sets with eviction.
2956 mixedops = []
2960 mixedops = []
2957 for i in _xrange(mixed):
2961 for i in _xrange(mixed):
2958 r = random.randint(0, 100)
2962 r = random.randint(0, 100)
2959 if r < mixedgetfreq:
2963 if r < mixedgetfreq:
2960 op = 0
2964 op = 0
2961 else:
2965 else:
2962 op = 1
2966 op = 1
2963
2967
2964 mixedops.append((op,
2968 mixedops.append((op,
2965 random.randint(0, size * 2),
2969 random.randint(0, size * 2),
2966 random.choice(costrange)))
2970 random.choice(costrange)))
2967
2971
2968 def domixed():
2972 def domixed():
2969 d = util.lrucachedict(size)
2973 d = util.lrucachedict(size)
2970
2974
2971 for op, v, cost in mixedops:
2975 for op, v, cost in mixedops:
2972 if op == 0:
2976 if op == 0:
2973 try:
2977 try:
2974 d[v]
2978 d[v]
2975 except KeyError:
2979 except KeyError:
2976 pass
2980 pass
2977 else:
2981 else:
2978 d[v] = v
2982 d[v] = v
2979
2983
2980 def domixedcost():
2984 def domixedcost():
2981 d = util.lrucachedict(size, maxcost=costlimit)
2985 d = util.lrucachedict(size, maxcost=costlimit)
2982
2986
2983 for op, v, cost in mixedops:
2987 for op, v, cost in mixedops:
2984 if op == 0:
2988 if op == 0:
2985 try:
2989 try:
2986 d[v]
2990 d[v]
2987 except KeyError:
2991 except KeyError:
2988 pass
2992 pass
2989 else:
2993 else:
2990 d.insert(v, v, cost=cost)
2994 d.insert(v, v, cost=cost)
2991
2995
2992 benches = [
2996 benches = [
2993 (doinit, b'init'),
2997 (doinit, b'init'),
2994 ]
2998 ]
2995
2999
2996 if costlimit:
3000 if costlimit:
2997 benches.extend([
3001 benches.extend([
2998 (dogetscost, b'gets w/ cost limit'),
3002 (dogetscost, b'gets w/ cost limit'),
2999 (doinsertscost, b'inserts w/ cost limit'),
3003 (doinsertscost, b'inserts w/ cost limit'),
3000 (domixedcost, b'mixed w/ cost limit'),
3004 (domixedcost, b'mixed w/ cost limit'),
3001 ])
3005 ])
3002 else:
3006 else:
3003 benches.extend([
3007 benches.extend([
3004 (dogets, b'gets'),
3008 (dogets, b'gets'),
3005 (doinserts, b'inserts'),
3009 (doinserts, b'inserts'),
3006 (dosets, b'sets'),
3010 (dosets, b'sets'),
3007 (domixed, b'mixed')
3011 (domixed, b'mixed')
3008 ])
3012 ])
3009
3013
3010 for fn, title in benches:
3014 for fn, title in benches:
3011 timer, fm = gettimer(ui, opts)
3015 timer, fm = gettimer(ui, opts)
3012 timer(fn, title=title)
3016 timer(fn, title=title)
3013 fm.end()
3017 fm.end()
3014
3018
3015 @command(b'perfwrite', formatteropts)
3019 @command(b'perfwrite', formatteropts)
3016 def perfwrite(ui, repo, **opts):
3020 def perfwrite(ui, repo, **opts):
3017 """microbenchmark ui.write
3021 """microbenchmark ui.write
3018 """
3022 """
3019 opts = _byteskwargs(opts)
3023 opts = _byteskwargs(opts)
3020
3024
3021 timer, fm = gettimer(ui, opts)
3025 timer, fm = gettimer(ui, opts)
3022 def write():
3026 def write():
3023 for i in range(100000):
3027 for i in range(100000):
3024 ui.write((b'Testing write performance\n'))
3028 ui.write((b'Testing write performance\n'))
3025 timer(write)
3029 timer(write)
3026 fm.end()
3030 fm.end()
3027
3031
3028 def uisetup(ui):
3032 def uisetup(ui):
3029 if (util.safehasattr(cmdutil, b'openrevlog') and
3033 if (util.safehasattr(cmdutil, b'openrevlog') and
3030 not util.safehasattr(commands, b'debugrevlogopts')):
3034 not util.safehasattr(commands, b'debugrevlogopts')):
3031 # for "historical portability":
3035 # for "historical portability":
3032 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3036 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3033 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3037 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3034 # openrevlog() should cause failure, because it has been
3038 # openrevlog() should cause failure, because it has been
3035 # available since 3.5 (or 49c583ca48c4).
3039 # available since 3.5 (or 49c583ca48c4).
3036 def openrevlog(orig, repo, cmd, file_, opts):
3040 def openrevlog(orig, repo, cmd, file_, opts):
3037 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3041 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3038 raise error.Abort(b"This version doesn't support --dir option",
3042 raise error.Abort(b"This version doesn't support --dir option",
3039 hint=b"use 3.5 or later")
3043 hint=b"use 3.5 or later")
3040 return orig(repo, cmd, file_, opts)
3044 return orig(repo, cmd, file_, opts)
3041 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3045 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3042
3046
3043 @command(b'perfprogress', formatteropts + [
3047 @command(b'perfprogress', formatteropts + [
3044 (b'', b'topic', b'topic', b'topic for progress messages'),
3048 (b'', b'topic', b'topic', b'topic for progress messages'),
3045 (b'c', b'total', 1000000, b'total value we are progressing to'),
3049 (b'c', b'total', 1000000, b'total value we are progressing to'),
3046 ], norepo=True)
3050 ], norepo=True)
3047 def perfprogress(ui, topic=None, total=None, **opts):
3051 def perfprogress(ui, topic=None, total=None, **opts):
3048 """printing of progress bars"""
3052 """printing of progress bars"""
3049 opts = _byteskwargs(opts)
3053 opts = _byteskwargs(opts)
3050
3054
3051 timer, fm = gettimer(ui, opts)
3055 timer, fm = gettimer(ui, opts)
3052
3056
3053 def doprogress():
3057 def doprogress():
3054 with ui.makeprogress(topic, total=total) as progress:
3058 with ui.makeprogress(topic, total=total) as progress:
3055 for i in pycompat.xrange(total):
3059 for i in pycompat.xrange(total):
3056 progress.increment()
3060 progress.increment()
3057
3061
3058 timer(doprogress)
3062 timer(doprogress)
3059 fm.end()
3063 fm.end()
@@ -1,2095 +1,2100 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11 import itertools
11 import itertools
12 import struct
12 import struct
13 import weakref
13 import weakref
14
14
15 from .i18n import _
15 from .i18n import _
16 from .node import (
16 from .node import (
17 bin,
17 bin,
18 hex,
18 hex,
19 nullid,
19 nullid,
20 nullrev,
20 nullrev,
21 )
21 )
22 from . import (
22 from . import (
23 error,
23 error,
24 mdiff,
24 mdiff,
25 policy,
25 policy,
26 pycompat,
26 pycompat,
27 repository,
27 repository,
28 revlog,
28 revlog,
29 util,
29 util,
30 )
30 )
31 from .utils import (
31 from .utils import (
32 interfaceutil,
32 interfaceutil,
33 )
33 )
34
34
35 parsers = policy.importmod(r'parsers')
35 parsers = policy.importmod(r'parsers')
36 propertycache = util.propertycache
36 propertycache = util.propertycache
37
37
38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
40
40
41 def _parse(data):
41 def _parse(data):
42 # This method does a little bit of excessive-looking
42 # This method does a little bit of excessive-looking
43 # precondition checking. This is so that the behavior of this
43 # precondition checking. This is so that the behavior of this
44 # class exactly matches its C counterpart to try and help
44 # class exactly matches its C counterpart to try and help
45 # prevent surprise breakage for anyone that develops against
45 # prevent surprise breakage for anyone that develops against
46 # the pure version.
46 # the pure version.
47 if data and data[-1:] != '\n':
47 if data and data[-1:] != '\n':
48 raise ValueError('Manifest did not end in a newline.')
48 raise ValueError('Manifest did not end in a newline.')
49 prev = None
49 prev = None
50 for l in data.splitlines():
50 for l in data.splitlines():
51 if prev is not None and prev > l:
51 if prev is not None and prev > l:
52 raise ValueError('Manifest lines not in sorted order.')
52 raise ValueError('Manifest lines not in sorted order.')
53 prev = l
53 prev = l
54 f, n = l.split('\0')
54 f, n = l.split('\0')
55 if len(n) > 40:
55 if len(n) > 40:
56 yield f, bin(n[:40]), n[40:]
56 yield f, bin(n[:40]), n[40:]
57 else:
57 else:
58 yield f, bin(n), ''
58 yield f, bin(n), ''
59
59
60 def _text(it):
60 def _text(it):
61 files = []
61 files = []
62 lines = []
62 lines = []
63 for f, n, fl in it:
63 for f, n, fl in it:
64 files.append(f)
64 files.append(f)
65 # if this is changed to support newlines in filenames,
65 # if this is changed to support newlines in filenames,
66 # be sure to check the templates/ dir again (especially *-raw.tmpl)
66 # be sure to check the templates/ dir again (especially *-raw.tmpl)
67 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
67 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
68
68
69 _checkforbidden(files)
69 _checkforbidden(files)
70 return ''.join(lines)
70 return ''.join(lines)
71
71
72 class lazymanifestiter(object):
72 class lazymanifestiter(object):
73 def __init__(self, lm):
73 def __init__(self, lm):
74 self.pos = 0
74 self.pos = 0
75 self.lm = lm
75 self.lm = lm
76
76
77 def __iter__(self):
77 def __iter__(self):
78 return self
78 return self
79
79
80 def next(self):
80 def next(self):
81 try:
81 try:
82 data, pos = self.lm._get(self.pos)
82 data, pos = self.lm._get(self.pos)
83 except IndexError:
83 except IndexError:
84 raise StopIteration
84 raise StopIteration
85 if pos == -1:
85 if pos == -1:
86 self.pos += 1
86 self.pos += 1
87 return data[0]
87 return data[0]
88 self.pos += 1
88 self.pos += 1
89 zeropos = data.find('\x00', pos)
89 zeropos = data.find('\x00', pos)
90 return data[pos:zeropos]
90 return data[pos:zeropos]
91
91
92 __next__ = next
92 __next__ = next
93
93
94 class lazymanifestiterentries(object):
94 class lazymanifestiterentries(object):
95 def __init__(self, lm):
95 def __init__(self, lm):
96 self.lm = lm
96 self.lm = lm
97 self.pos = 0
97 self.pos = 0
98
98
99 def __iter__(self):
99 def __iter__(self):
100 return self
100 return self
101
101
102 def next(self):
102 def next(self):
103 try:
103 try:
104 data, pos = self.lm._get(self.pos)
104 data, pos = self.lm._get(self.pos)
105 except IndexError:
105 except IndexError:
106 raise StopIteration
106 raise StopIteration
107 if pos == -1:
107 if pos == -1:
108 self.pos += 1
108 self.pos += 1
109 return data
109 return data
110 zeropos = data.find('\x00', pos)
110 zeropos = data.find('\x00', pos)
111 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
111 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
112 zeropos + 1, 40)
112 zeropos + 1, 40)
113 flags = self.lm._getflags(data, self.pos, zeropos)
113 flags = self.lm._getflags(data, self.pos, zeropos)
114 self.pos += 1
114 self.pos += 1
115 return (data[pos:zeropos], hashval, flags)
115 return (data[pos:zeropos], hashval, flags)
116
116
117 __next__ = next
117 __next__ = next
118
118
119 def unhexlify(data, extra, pos, length):
119 def unhexlify(data, extra, pos, length):
120 s = bin(data[pos:pos + length])
120 s = bin(data[pos:pos + length])
121 if extra:
121 if extra:
122 s += chr(extra & 0xff)
122 s += chr(extra & 0xff)
123 return s
123 return s
124
124
125 def _cmp(a, b):
125 def _cmp(a, b):
126 return (a > b) - (a < b)
126 return (a > b) - (a < b)
127
127
128 class _lazymanifest(object):
128 class _lazymanifest(object):
129 """A pure python manifest backed by a byte string. It is supplimented with
129 """A pure python manifest backed by a byte string. It is supplimented with
130 internal lists as it is modified, until it is compacted back to a pure byte
130 internal lists as it is modified, until it is compacted back to a pure byte
131 string.
131 string.
132
132
133 ``data`` is the initial manifest data.
133 ``data`` is the initial manifest data.
134
134
135 ``positions`` is a list of offsets, one per manifest entry. Positive
135 ``positions`` is a list of offsets, one per manifest entry. Positive
136 values are offsets into ``data``, negative values are offsets into the
136 values are offsets into ``data``, negative values are offsets into the
137 ``extradata`` list. When an entry is removed, its entry is dropped from
137 ``extradata`` list. When an entry is removed, its entry is dropped from
138 ``positions``. The values are encoded such that when walking the list and
138 ``positions``. The values are encoded such that when walking the list and
139 indexing into ``data`` or ``extradata`` as appropriate, the entries are
139 indexing into ``data`` or ``extradata`` as appropriate, the entries are
140 sorted by filename.
140 sorted by filename.
141
141
142 ``extradata`` is a list of (key, hash, flags) for entries that were added or
142 ``extradata`` is a list of (key, hash, flags) for entries that were added or
143 modified since the manifest was created or compacted.
143 modified since the manifest was created or compacted.
144 """
144 """
145 def __init__(self, data, positions=None, extrainfo=None, extradata=None,
145 def __init__(self, data, positions=None, extrainfo=None, extradata=None,
146 hasremovals=False):
146 hasremovals=False):
147 if positions is None:
147 if positions is None:
148 self.positions = self.findlines(data)
148 self.positions = self.findlines(data)
149 self.extrainfo = [0] * len(self.positions)
149 self.extrainfo = [0] * len(self.positions)
150 self.data = data
150 self.data = data
151 self.extradata = []
151 self.extradata = []
152 self.hasremovals = False
152 self.hasremovals = False
153 else:
153 else:
154 self.positions = positions[:]
154 self.positions = positions[:]
155 self.extrainfo = extrainfo[:]
155 self.extrainfo = extrainfo[:]
156 self.extradata = extradata[:]
156 self.extradata = extradata[:]
157 self.data = data
157 self.data = data
158 self.hasremovals = hasremovals
158 self.hasremovals = hasremovals
159
159
160 def findlines(self, data):
160 def findlines(self, data):
161 if not data:
161 if not data:
162 return []
162 return []
163 pos = data.find("\n")
163 pos = data.find("\n")
164 if pos == -1 or data[-1:] != '\n':
164 if pos == -1 or data[-1:] != '\n':
165 raise ValueError("Manifest did not end in a newline.")
165 raise ValueError("Manifest did not end in a newline.")
166 positions = [0]
166 positions = [0]
167 prev = data[:data.find('\x00')]
167 prev = data[:data.find('\x00')]
168 while pos < len(data) - 1 and pos != -1:
168 while pos < len(data) - 1 and pos != -1:
169 positions.append(pos + 1)
169 positions.append(pos + 1)
170 nexts = data[pos + 1:data.find('\x00', pos + 1)]
170 nexts = data[pos + 1:data.find('\x00', pos + 1)]
171 if nexts < prev:
171 if nexts < prev:
172 raise ValueError("Manifest lines not in sorted order.")
172 raise ValueError("Manifest lines not in sorted order.")
173 prev = nexts
173 prev = nexts
174 pos = data.find("\n", pos + 1)
174 pos = data.find("\n", pos + 1)
175 return positions
175 return positions
176
176
177 def _get(self, index):
177 def _get(self, index):
178 # get the position encoded in pos:
178 # get the position encoded in pos:
179 # positive number is an index in 'data'
179 # positive number is an index in 'data'
180 # negative number is in extrapieces
180 # negative number is in extrapieces
181 pos = self.positions[index]
181 pos = self.positions[index]
182 if pos >= 0:
182 if pos >= 0:
183 return self.data, pos
183 return self.data, pos
184 return self.extradata[-pos - 1], -1
184 return self.extradata[-pos - 1], -1
185
185
186 def _getkey(self, pos):
186 def _getkey(self, pos):
187 if pos >= 0:
187 if pos >= 0:
188 return self.data[pos:self.data.find('\x00', pos + 1)]
188 return self.data[pos:self.data.find('\x00', pos + 1)]
189 return self.extradata[-pos - 1][0]
189 return self.extradata[-pos - 1][0]
190
190
191 def bsearch(self, key):
191 def bsearch(self, key):
192 first = 0
192 first = 0
193 last = len(self.positions) - 1
193 last = len(self.positions) - 1
194
194
195 while first <= last:
195 while first <= last:
196 midpoint = (first + last)//2
196 midpoint = (first + last)//2
197 nextpos = self.positions[midpoint]
197 nextpos = self.positions[midpoint]
198 candidate = self._getkey(nextpos)
198 candidate = self._getkey(nextpos)
199 r = _cmp(key, candidate)
199 r = _cmp(key, candidate)
200 if r == 0:
200 if r == 0:
201 return midpoint
201 return midpoint
202 else:
202 else:
203 if r < 0:
203 if r < 0:
204 last = midpoint - 1
204 last = midpoint - 1
205 else:
205 else:
206 first = midpoint + 1
206 first = midpoint + 1
207 return -1
207 return -1
208
208
209 def bsearch2(self, key):
209 def bsearch2(self, key):
210 # same as the above, but will always return the position
210 # same as the above, but will always return the position
211 # done for performance reasons
211 # done for performance reasons
212 first = 0
212 first = 0
213 last = len(self.positions) - 1
213 last = len(self.positions) - 1
214
214
215 while first <= last:
215 while first <= last:
216 midpoint = (first + last)//2
216 midpoint = (first + last)//2
217 nextpos = self.positions[midpoint]
217 nextpos = self.positions[midpoint]
218 candidate = self._getkey(nextpos)
218 candidate = self._getkey(nextpos)
219 r = _cmp(key, candidate)
219 r = _cmp(key, candidate)
220 if r == 0:
220 if r == 0:
221 return (midpoint, True)
221 return (midpoint, True)
222 else:
222 else:
223 if r < 0:
223 if r < 0:
224 last = midpoint - 1
224 last = midpoint - 1
225 else:
225 else:
226 first = midpoint + 1
226 first = midpoint + 1
227 return (first, False)
227 return (first, False)
228
228
229 def __contains__(self, key):
229 def __contains__(self, key):
230 return self.bsearch(key) != -1
230 return self.bsearch(key) != -1
231
231
232 def _getflags(self, data, needle, pos):
232 def _getflags(self, data, needle, pos):
233 start = pos + 41
233 start = pos + 41
234 end = data.find("\n", start)
234 end = data.find("\n", start)
235 if end == -1:
235 if end == -1:
236 end = len(data) - 1
236 end = len(data) - 1
237 if start == end:
237 if start == end:
238 return ''
238 return ''
239 return self.data[start:end]
239 return self.data[start:end]
240
240
241 def __getitem__(self, key):
241 def __getitem__(self, key):
242 if not isinstance(key, bytes):
242 if not isinstance(key, bytes):
243 raise TypeError("getitem: manifest keys must be a bytes.")
243 raise TypeError("getitem: manifest keys must be a bytes.")
244 needle = self.bsearch(key)
244 needle = self.bsearch(key)
245 if needle == -1:
245 if needle == -1:
246 raise KeyError
246 raise KeyError
247 data, pos = self._get(needle)
247 data, pos = self._get(needle)
248 if pos == -1:
248 if pos == -1:
249 return (data[1], data[2])
249 return (data[1], data[2])
250 zeropos = data.find('\x00', pos)
250 zeropos = data.find('\x00', pos)
251 assert 0 <= needle <= len(self.positions)
251 assert 0 <= needle <= len(self.positions)
252 assert len(self.extrainfo) == len(self.positions)
252 assert len(self.extrainfo) == len(self.positions)
253 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
253 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
254 flags = self._getflags(data, needle, zeropos)
254 flags = self._getflags(data, needle, zeropos)
255 return (hashval, flags)
255 return (hashval, flags)
256
256
257 def __delitem__(self, key):
257 def __delitem__(self, key):
258 needle, found = self.bsearch2(key)
258 needle, found = self.bsearch2(key)
259 if not found:
259 if not found:
260 raise KeyError
260 raise KeyError
261 cur = self.positions[needle]
261 cur = self.positions[needle]
262 self.positions = self.positions[:needle] + self.positions[needle + 1:]
262 self.positions = self.positions[:needle] + self.positions[needle + 1:]
263 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
263 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
264 if cur >= 0:
264 if cur >= 0:
265 # This does NOT unsort the list as far as the search functions are
265 # This does NOT unsort the list as far as the search functions are
266 # concerned, as they only examine lines mapped by self.positions.
266 # concerned, as they only examine lines mapped by self.positions.
267 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
267 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
268 self.hasremovals = True
268 self.hasremovals = True
269
269
270 def __setitem__(self, key, value):
270 def __setitem__(self, key, value):
271 if not isinstance(key, bytes):
271 if not isinstance(key, bytes):
272 raise TypeError("setitem: manifest keys must be a byte string.")
272 raise TypeError("setitem: manifest keys must be a byte string.")
273 if not isinstance(value, tuple) or len(value) != 2:
273 if not isinstance(value, tuple) or len(value) != 2:
274 raise TypeError("Manifest values must be a tuple of (node, flags).")
274 raise TypeError("Manifest values must be a tuple of (node, flags).")
275 hashval = value[0]
275 hashval = value[0]
276 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
276 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
277 raise TypeError("node must be a 20-byte byte string")
277 raise TypeError("node must be a 20-byte byte string")
278 flags = value[1]
278 flags = value[1]
279 if len(hashval) == 22:
279 if len(hashval) == 22:
280 hashval = hashval[:-1]
280 hashval = hashval[:-1]
281 if not isinstance(flags, bytes) or len(flags) > 1:
281 if not isinstance(flags, bytes) or len(flags) > 1:
282 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
282 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
283 needle, found = self.bsearch2(key)
283 needle, found = self.bsearch2(key)
284 if found:
284 if found:
285 # put the item
285 # put the item
286 pos = self.positions[needle]
286 pos = self.positions[needle]
287 if pos < 0:
287 if pos < 0:
288 self.extradata[-pos - 1] = (key, hashval, value[1])
288 self.extradata[-pos - 1] = (key, hashval, value[1])
289 else:
289 else:
290 # just don't bother
290 # just don't bother
291 self.extradata.append((key, hashval, value[1]))
291 self.extradata.append((key, hashval, value[1]))
292 self.positions[needle] = -len(self.extradata)
292 self.positions[needle] = -len(self.extradata)
293 else:
293 else:
294 # not found, put it in with extra positions
294 # not found, put it in with extra positions
295 self.extradata.append((key, hashval, value[1]))
295 self.extradata.append((key, hashval, value[1]))
296 self.positions = (self.positions[:needle] + [-len(self.extradata)]
296 self.positions = (self.positions[:needle] + [-len(self.extradata)]
297 + self.positions[needle:])
297 + self.positions[needle:])
298 self.extrainfo = (self.extrainfo[:needle] + [0] +
298 self.extrainfo = (self.extrainfo[:needle] + [0] +
299 self.extrainfo[needle:])
299 self.extrainfo[needle:])
300
300
301 def copy(self):
301 def copy(self):
302 # XXX call _compact like in C?
302 # XXX call _compact like in C?
303 return _lazymanifest(self.data, self.positions, self.extrainfo,
303 return _lazymanifest(self.data, self.positions, self.extrainfo,
304 self.extradata, self.hasremovals)
304 self.extradata, self.hasremovals)
305
305
306 def _compact(self):
306 def _compact(self):
307 # hopefully not called TOO often
307 # hopefully not called TOO often
308 if len(self.extradata) == 0 and not self.hasremovals:
308 if len(self.extradata) == 0 and not self.hasremovals:
309 return
309 return
310 l = []
310 l = []
311 i = 0
311 i = 0
312 offset = 0
312 offset = 0
313 self.extrainfo = [0] * len(self.positions)
313 self.extrainfo = [0] * len(self.positions)
314 while i < len(self.positions):
314 while i < len(self.positions):
315 if self.positions[i] >= 0:
315 if self.positions[i] >= 0:
316 cur = self.positions[i]
316 cur = self.positions[i]
317 last_cut = cur
317 last_cut = cur
318
318
319 # Collect all contiguous entries in the buffer at the current
319 # Collect all contiguous entries in the buffer at the current
320 # offset, breaking out only for added/modified items held in
320 # offset, breaking out only for added/modified items held in
321 # extradata, or a deleted line prior to the next position.
321 # extradata, or a deleted line prior to the next position.
322 while True:
322 while True:
323 self.positions[i] = offset
323 self.positions[i] = offset
324 i += 1
324 i += 1
325 if i == len(self.positions) or self.positions[i] < 0:
325 if i == len(self.positions) or self.positions[i] < 0:
326 break
326 break
327
327
328 # A removed file has no positions[] entry, but does have an
328 # A removed file has no positions[] entry, but does have an
329 # overwritten first byte. Break out and find the end of the
329 # overwritten first byte. Break out and find the end of the
330 # current good entry/entries if there is a removed file
330 # current good entry/entries if there is a removed file
331 # before the next position.
331 # before the next position.
332 if (self.hasremovals
332 if (self.hasremovals
333 and self.data.find('\n\x00', cur,
333 and self.data.find('\n\x00', cur,
334 self.positions[i]) != -1):
334 self.positions[i]) != -1):
335 break
335 break
336
336
337 offset += self.positions[i] - cur
337 offset += self.positions[i] - cur
338 cur = self.positions[i]
338 cur = self.positions[i]
339 end_cut = self.data.find('\n', cur)
339 end_cut = self.data.find('\n', cur)
340 if end_cut != -1:
340 if end_cut != -1:
341 end_cut += 1
341 end_cut += 1
342 offset += end_cut - cur
342 offset += end_cut - cur
343 l.append(self.data[last_cut:end_cut])
343 l.append(self.data[last_cut:end_cut])
344 else:
344 else:
345 while i < len(self.positions) and self.positions[i] < 0:
345 while i < len(self.positions) and self.positions[i] < 0:
346 cur = self.positions[i]
346 cur = self.positions[i]
347 t = self.extradata[-cur - 1]
347 t = self.extradata[-cur - 1]
348 l.append(self._pack(t))
348 l.append(self._pack(t))
349 self.positions[i] = offset
349 self.positions[i] = offset
350 if len(t[1]) > 20:
350 if len(t[1]) > 20:
351 self.extrainfo[i] = ord(t[1][21])
351 self.extrainfo[i] = ord(t[1][21])
352 offset += len(l[-1])
352 offset += len(l[-1])
353 i += 1
353 i += 1
354 self.data = ''.join(l)
354 self.data = ''.join(l)
355 self.hasremovals = False
355 self.hasremovals = False
356 self.extradata = []
356 self.extradata = []
357
357
358 def _pack(self, d):
358 def _pack(self, d):
359 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
359 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
360
360
361 def text(self):
361 def text(self):
362 self._compact()
362 self._compact()
363 return self.data
363 return self.data
364
364
365 def diff(self, m2, clean=False):
365 def diff(self, m2, clean=False):
366 '''Finds changes between the current manifest and m2.'''
366 '''Finds changes between the current manifest and m2.'''
367 # XXX think whether efficiency matters here
367 # XXX think whether efficiency matters here
368 diff = {}
368 diff = {}
369
369
370 for fn, e1, flags in self.iterentries():
370 for fn, e1, flags in self.iterentries():
371 if fn not in m2:
371 if fn not in m2:
372 diff[fn] = (e1, flags), (None, '')
372 diff[fn] = (e1, flags), (None, '')
373 else:
373 else:
374 e2 = m2[fn]
374 e2 = m2[fn]
375 if (e1, flags) != e2:
375 if (e1, flags) != e2:
376 diff[fn] = (e1, flags), e2
376 diff[fn] = (e1, flags), e2
377 elif clean:
377 elif clean:
378 diff[fn] = None
378 diff[fn] = None
379
379
380 for fn, e2, flags in m2.iterentries():
380 for fn, e2, flags in m2.iterentries():
381 if fn not in self:
381 if fn not in self:
382 diff[fn] = (None, ''), (e2, flags)
382 diff[fn] = (None, ''), (e2, flags)
383
383
384 return diff
384 return diff
385
385
386 def iterentries(self):
386 def iterentries(self):
387 return lazymanifestiterentries(self)
387 return lazymanifestiterentries(self)
388
388
389 def iterkeys(self):
389 def iterkeys(self):
390 return lazymanifestiter(self)
390 return lazymanifestiter(self)
391
391
392 def __iter__(self):
392 def __iter__(self):
393 return lazymanifestiter(self)
393 return lazymanifestiter(self)
394
394
395 def __len__(self):
395 def __len__(self):
396 return len(self.positions)
396 return len(self.positions)
397
397
398 def filtercopy(self, filterfn):
398 def filtercopy(self, filterfn):
399 # XXX should be optimized
399 # XXX should be optimized
400 c = _lazymanifest('')
400 c = _lazymanifest('')
401 for f, n, fl in self.iterentries():
401 for f, n, fl in self.iterentries():
402 if filterfn(f):
402 if filterfn(f):
403 c[f] = n, fl
403 c[f] = n, fl
404 return c
404 return c
405
405
406 try:
406 try:
407 _lazymanifest = parsers.lazymanifest
407 _lazymanifest = parsers.lazymanifest
408 except AttributeError:
408 except AttributeError:
409 pass
409 pass
410
410
411 @interfaceutil.implementer(repository.imanifestdict)
411 @interfaceutil.implementer(repository.imanifestdict)
412 class manifestdict(object):
412 class manifestdict(object):
413 def __init__(self, data=''):
413 def __init__(self, data=''):
414 self._lm = _lazymanifest(data)
414 self._lm = _lazymanifest(data)
415
415
416 def __getitem__(self, key):
416 def __getitem__(self, key):
417 return self._lm[key][0]
417 return self._lm[key][0]
418
418
419 def find(self, key):
419 def find(self, key):
420 return self._lm[key]
420 return self._lm[key]
421
421
422 def __len__(self):
422 def __len__(self):
423 return len(self._lm)
423 return len(self._lm)
424
424
425 def __nonzero__(self):
425 def __nonzero__(self):
426 # nonzero is covered by the __len__ function, but implementing it here
426 # nonzero is covered by the __len__ function, but implementing it here
427 # makes it easier for extensions to override.
427 # makes it easier for extensions to override.
428 return len(self._lm) != 0
428 return len(self._lm) != 0
429
429
430 __bool__ = __nonzero__
430 __bool__ = __nonzero__
431
431
432 def __setitem__(self, key, node):
432 def __setitem__(self, key, node):
433 self._lm[key] = node, self.flags(key, '')
433 self._lm[key] = node, self.flags(key, '')
434
434
435 def __contains__(self, key):
435 def __contains__(self, key):
436 if key is None:
436 if key is None:
437 return False
437 return False
438 return key in self._lm
438 return key in self._lm
439
439
440 def __delitem__(self, key):
440 def __delitem__(self, key):
441 del self._lm[key]
441 del self._lm[key]
442
442
443 def __iter__(self):
443 def __iter__(self):
444 return self._lm.__iter__()
444 return self._lm.__iter__()
445
445
446 def iterkeys(self):
446 def iterkeys(self):
447 return self._lm.iterkeys()
447 return self._lm.iterkeys()
448
448
449 def keys(self):
449 def keys(self):
450 return list(self.iterkeys())
450 return list(self.iterkeys())
451
451
452 def filesnotin(self, m2, match=None):
452 def filesnotin(self, m2, match=None):
453 '''Set of files in this manifest that are not in the other'''
453 '''Set of files in this manifest that are not in the other'''
454 if match:
454 if match:
455 m1 = self.matches(match)
455 m1 = self.matches(match)
456 m2 = m2.matches(match)
456 m2 = m2.matches(match)
457 return m1.filesnotin(m2)
457 return m1.filesnotin(m2)
458 diff = self.diff(m2)
458 diff = self.diff(m2)
459 files = set(filepath
459 files = set(filepath
460 for filepath, hashflags in diff.iteritems()
460 for filepath, hashflags in diff.iteritems()
461 if hashflags[1][0] is None)
461 if hashflags[1][0] is None)
462 return files
462 return files
463
463
464 @propertycache
464 @propertycache
465 def _dirs(self):
465 def _dirs(self):
466 return util.dirs(self)
466 return util.dirs(self)
467
467
468 def dirs(self):
468 def dirs(self):
469 return self._dirs
469 return self._dirs
470
470
471 def hasdir(self, dir):
471 def hasdir(self, dir):
472 return dir in self._dirs
472 return dir in self._dirs
473
473
474 def _filesfastpath(self, match):
474 def _filesfastpath(self, match):
475 '''Checks whether we can correctly and quickly iterate over matcher
475 '''Checks whether we can correctly and quickly iterate over matcher
476 files instead of over manifest files.'''
476 files instead of over manifest files.'''
477 files = match.files()
477 files = match.files()
478 return (len(files) < 100 and (match.isexact() or
478 return (len(files) < 100 and (match.isexact() or
479 (match.prefix() and all(fn in self for fn in files))))
479 (match.prefix() and all(fn in self for fn in files))))
480
480
481 def walk(self, match):
481 def walk(self, match):
482 '''Generates matching file names.
482 '''Generates matching file names.
483
483
484 Equivalent to manifest.matches(match).iterkeys(), but without creating
484 Equivalent to manifest.matches(match).iterkeys(), but without creating
485 an entirely new manifest.
485 an entirely new manifest.
486
486
487 It also reports nonexistent files by marking them bad with match.bad().
487 It also reports nonexistent files by marking them bad with match.bad().
488 '''
488 '''
489 if match.always():
489 if match.always():
490 for f in iter(self):
490 for f in iter(self):
491 yield f
491 yield f
492 return
492 return
493
493
494 fset = set(match.files())
494 fset = set(match.files())
495
495
496 # avoid the entire walk if we're only looking for specific files
496 # avoid the entire walk if we're only looking for specific files
497 if self._filesfastpath(match):
497 if self._filesfastpath(match):
498 for fn in sorted(fset):
498 for fn in sorted(fset):
499 yield fn
499 yield fn
500 return
500 return
501
501
502 for fn in self:
502 for fn in self:
503 if fn in fset:
503 if fn in fset:
504 # specified pattern is the exact name
504 # specified pattern is the exact name
505 fset.remove(fn)
505 fset.remove(fn)
506 if match(fn):
506 if match(fn):
507 yield fn
507 yield fn
508
508
509 # for dirstate.walk, files=[''] means "walk the whole tree".
509 # for dirstate.walk, files=[''] means "walk the whole tree".
510 # follow that here, too
510 # follow that here, too
511 fset.discard('')
511 fset.discard('')
512
512
513 for fn in sorted(fset):
513 for fn in sorted(fset):
514 if not self.hasdir(fn):
514 if not self.hasdir(fn):
515 match.bad(fn, None)
515 match.bad(fn, None)
516
516
517 def matches(self, match):
517 def matches(self, match):
518 '''generate a new manifest filtered by the match argument'''
518 '''generate a new manifest filtered by the match argument'''
519 if match.always():
519 if match.always():
520 return self.copy()
520 return self.copy()
521
521
522 if self._filesfastpath(match):
522 if self._filesfastpath(match):
523 m = manifestdict()
523 m = manifestdict()
524 lm = self._lm
524 lm = self._lm
525 for fn in match.files():
525 for fn in match.files():
526 if fn in lm:
526 if fn in lm:
527 m._lm[fn] = lm[fn]
527 m._lm[fn] = lm[fn]
528 return m
528 return m
529
529
530 m = manifestdict()
530 m = manifestdict()
531 m._lm = self._lm.filtercopy(match)
531 m._lm = self._lm.filtercopy(match)
532 return m
532 return m
533
533
534 def diff(self, m2, match=None, clean=False):
534 def diff(self, m2, match=None, clean=False):
535 '''Finds changes between the current manifest and m2.
535 '''Finds changes between the current manifest and m2.
536
536
537 Args:
537 Args:
538 m2: the manifest to which this manifest should be compared.
538 m2: the manifest to which this manifest should be compared.
539 clean: if true, include files unchanged between these manifests
539 clean: if true, include files unchanged between these manifests
540 with a None value in the returned dictionary.
540 with a None value in the returned dictionary.
541
541
542 The result is returned as a dict with filename as key and
542 The result is returned as a dict with filename as key and
543 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
543 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
544 nodeid in the current/other manifest and fl1/fl2 is the flag
544 nodeid in the current/other manifest and fl1/fl2 is the flag
545 in the current/other manifest. Where the file does not exist,
545 in the current/other manifest. Where the file does not exist,
546 the nodeid will be None and the flags will be the empty
546 the nodeid will be None and the flags will be the empty
547 string.
547 string.
548 '''
548 '''
549 if match:
549 if match:
550 m1 = self.matches(match)
550 m1 = self.matches(match)
551 m2 = m2.matches(match)
551 m2 = m2.matches(match)
552 return m1.diff(m2, clean=clean)
552 return m1.diff(m2, clean=clean)
553 return self._lm.diff(m2._lm, clean)
553 return self._lm.diff(m2._lm, clean)
554
554
555 def setflag(self, key, flag):
555 def setflag(self, key, flag):
556 self._lm[key] = self[key], flag
556 self._lm[key] = self[key], flag
557
557
558 def get(self, key, default=None):
558 def get(self, key, default=None):
559 try:
559 try:
560 return self._lm[key][0]
560 return self._lm[key][0]
561 except KeyError:
561 except KeyError:
562 return default
562 return default
563
563
564 def flags(self, key, default=''):
564 def flags(self, key, default=''):
565 try:
565 try:
566 return self._lm[key][1]
566 return self._lm[key][1]
567 except KeyError:
567 except KeyError:
568 return default
568 return default
569
569
570 def copy(self):
570 def copy(self):
571 c = manifestdict()
571 c = manifestdict()
572 c._lm = self._lm.copy()
572 c._lm = self._lm.copy()
573 return c
573 return c
574
574
575 def items(self):
575 def items(self):
576 return (x[:2] for x in self._lm.iterentries())
576 return (x[:2] for x in self._lm.iterentries())
577
577
578 def iteritems(self):
578 def iteritems(self):
579 return (x[:2] for x in self._lm.iterentries())
579 return (x[:2] for x in self._lm.iterentries())
580
580
581 def iterentries(self):
581 def iterentries(self):
582 return self._lm.iterentries()
582 return self._lm.iterentries()
583
583
584 def text(self):
584 def text(self):
585 # most likely uses native version
585 # most likely uses native version
586 return self._lm.text()
586 return self._lm.text()
587
587
588 def fastdelta(self, base, changes):
588 def fastdelta(self, base, changes):
589 """Given a base manifest text as a bytearray and a list of changes
589 """Given a base manifest text as a bytearray and a list of changes
590 relative to that text, compute a delta that can be used by revlog.
590 relative to that text, compute a delta that can be used by revlog.
591 """
591 """
592 delta = []
592 delta = []
593 dstart = None
593 dstart = None
594 dend = None
594 dend = None
595 dline = [""]
595 dline = [""]
596 start = 0
596 start = 0
597 # zero copy representation of base as a buffer
597 # zero copy representation of base as a buffer
598 addbuf = util.buffer(base)
598 addbuf = util.buffer(base)
599
599
600 changes = list(changes)
600 changes = list(changes)
601 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
601 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
602 # start with a readonly loop that finds the offset of
602 # start with a readonly loop that finds the offset of
603 # each line and creates the deltas
603 # each line and creates the deltas
604 for f, todelete in changes:
604 for f, todelete in changes:
605 # bs will either be the index of the item or the insert point
605 # bs will either be the index of the item or the insert point
606 start, end = _msearch(addbuf, f, start)
606 start, end = _msearch(addbuf, f, start)
607 if not todelete:
607 if not todelete:
608 h, fl = self._lm[f]
608 h, fl = self._lm[f]
609 l = "%s\0%s%s\n" % (f, hex(h), fl)
609 l = "%s\0%s%s\n" % (f, hex(h), fl)
610 else:
610 else:
611 if start == end:
611 if start == end:
612 # item we want to delete was not found, error out
612 # item we want to delete was not found, error out
613 raise AssertionError(
613 raise AssertionError(
614 _("failed to remove %s from manifest") % f)
614 _("failed to remove %s from manifest") % f)
615 l = ""
615 l = ""
616 if dstart is not None and dstart <= start and dend >= start:
616 if dstart is not None and dstart <= start and dend >= start:
617 if dend < end:
617 if dend < end:
618 dend = end
618 dend = end
619 if l:
619 if l:
620 dline.append(l)
620 dline.append(l)
621 else:
621 else:
622 if dstart is not None:
622 if dstart is not None:
623 delta.append([dstart, dend, "".join(dline)])
623 delta.append([dstart, dend, "".join(dline)])
624 dstart = start
624 dstart = start
625 dend = end
625 dend = end
626 dline = [l]
626 dline = [l]
627
627
628 if dstart is not None:
628 if dstart is not None:
629 delta.append([dstart, dend, "".join(dline)])
629 delta.append([dstart, dend, "".join(dline)])
630 # apply the delta to the base, and get a delta for addrevision
630 # apply the delta to the base, and get a delta for addrevision
631 deltatext, arraytext = _addlistdelta(base, delta)
631 deltatext, arraytext = _addlistdelta(base, delta)
632 else:
632 else:
633 # For large changes, it's much cheaper to just build the text and
633 # For large changes, it's much cheaper to just build the text and
634 # diff it.
634 # diff it.
635 arraytext = bytearray(self.text())
635 arraytext = bytearray(self.text())
636 deltatext = mdiff.textdiff(
636 deltatext = mdiff.textdiff(
637 util.buffer(base), util.buffer(arraytext))
637 util.buffer(base), util.buffer(arraytext))
638
638
639 return arraytext, deltatext
639 return arraytext, deltatext
640
640
641 def _msearch(m, s, lo=0, hi=None):
641 def _msearch(m, s, lo=0, hi=None):
642 '''return a tuple (start, end) that says where to find s within m.
642 '''return a tuple (start, end) that says where to find s within m.
643
643
644 If the string is found m[start:end] are the line containing
644 If the string is found m[start:end] are the line containing
645 that string. If start == end the string was not found and
645 that string. If start == end the string was not found and
646 they indicate the proper sorted insertion point.
646 they indicate the proper sorted insertion point.
647
647
648 m should be a buffer, a memoryview or a byte string.
648 m should be a buffer, a memoryview or a byte string.
649 s is a byte string'''
649 s is a byte string'''
650 def advance(i, c):
650 def advance(i, c):
651 while i < lenm and m[i:i + 1] != c:
651 while i < lenm and m[i:i + 1] != c:
652 i += 1
652 i += 1
653 return i
653 return i
654 if not s:
654 if not s:
655 return (lo, lo)
655 return (lo, lo)
656 lenm = len(m)
656 lenm = len(m)
657 if not hi:
657 if not hi:
658 hi = lenm
658 hi = lenm
659 while lo < hi:
659 while lo < hi:
660 mid = (lo + hi) // 2
660 mid = (lo + hi) // 2
661 start = mid
661 start = mid
662 while start > 0 and m[start - 1:start] != '\n':
662 while start > 0 and m[start - 1:start] != '\n':
663 start -= 1
663 start -= 1
664 end = advance(start, '\0')
664 end = advance(start, '\0')
665 if bytes(m[start:end]) < s:
665 if bytes(m[start:end]) < s:
666 # we know that after the null there are 40 bytes of sha1
666 # we know that after the null there are 40 bytes of sha1
667 # this translates to the bisect lo = mid + 1
667 # this translates to the bisect lo = mid + 1
668 lo = advance(end + 40, '\n') + 1
668 lo = advance(end + 40, '\n') + 1
669 else:
669 else:
670 # this translates to the bisect hi = mid
670 # this translates to the bisect hi = mid
671 hi = start
671 hi = start
672 end = advance(lo, '\0')
672 end = advance(lo, '\0')
673 found = m[lo:end]
673 found = m[lo:end]
674 if s == found:
674 if s == found:
675 # we know that after the null there are 40 bytes of sha1
675 # we know that after the null there are 40 bytes of sha1
676 end = advance(end + 40, '\n')
676 end = advance(end + 40, '\n')
677 return (lo, end + 1)
677 return (lo, end + 1)
678 else:
678 else:
679 return (lo, lo)
679 return (lo, lo)
680
680
681 def _checkforbidden(l):
681 def _checkforbidden(l):
682 """Check filenames for illegal characters."""
682 """Check filenames for illegal characters."""
683 for f in l:
683 for f in l:
684 if '\n' in f or '\r' in f:
684 if '\n' in f or '\r' in f:
685 raise error.StorageError(
685 raise error.StorageError(
686 _("'\\n' and '\\r' disallowed in filenames: %r")
686 _("'\\n' and '\\r' disallowed in filenames: %r")
687 % pycompat.bytestr(f))
687 % pycompat.bytestr(f))
688
688
689
689
690 # apply the changes collected during the bisect loop to our addlist
690 # apply the changes collected during the bisect loop to our addlist
691 # return a delta suitable for addrevision
691 # return a delta suitable for addrevision
692 def _addlistdelta(addlist, x):
692 def _addlistdelta(addlist, x):
693 # for large addlist arrays, building a new array is cheaper
693 # for large addlist arrays, building a new array is cheaper
694 # than repeatedly modifying the existing one
694 # than repeatedly modifying the existing one
695 currentposition = 0
695 currentposition = 0
696 newaddlist = bytearray()
696 newaddlist = bytearray()
697
697
698 for start, end, content in x:
698 for start, end, content in x:
699 newaddlist += addlist[currentposition:start]
699 newaddlist += addlist[currentposition:start]
700 if content:
700 if content:
701 newaddlist += bytearray(content)
701 newaddlist += bytearray(content)
702
702
703 currentposition = end
703 currentposition = end
704
704
705 newaddlist += addlist[currentposition:]
705 newaddlist += addlist[currentposition:]
706
706
707 deltatext = "".join(struct.pack(">lll", start, end, len(content))
707 deltatext = "".join(struct.pack(">lll", start, end, len(content))
708 + content for start, end, content in x)
708 + content for start, end, content in x)
709 return deltatext, newaddlist
709 return deltatext, newaddlist
710
710
711 def _splittopdir(f):
711 def _splittopdir(f):
712 if '/' in f:
712 if '/' in f:
713 dir, subpath = f.split('/', 1)
713 dir, subpath = f.split('/', 1)
714 return dir + '/', subpath
714 return dir + '/', subpath
715 else:
715 else:
716 return '', f
716 return '', f
717
717
718 _noop = lambda s: None
718 _noop = lambda s: None
719
719
720 class treemanifest(object):
720 class treemanifest(object):
721 def __init__(self, dir='', text=''):
721 def __init__(self, dir='', text=''):
722 self._dir = dir
722 self._dir = dir
723 self._node = nullid
723 self._node = nullid
724 self._loadfunc = _noop
724 self._loadfunc = _noop
725 self._copyfunc = _noop
725 self._copyfunc = _noop
726 self._dirty = False
726 self._dirty = False
727 self._dirs = {}
727 self._dirs = {}
728 self._lazydirs = {}
728 self._lazydirs = {}
729 # Using _lazymanifest here is a little slower than plain old dicts
729 # Using _lazymanifest here is a little slower than plain old dicts
730 self._files = {}
730 self._files = {}
731 self._flags = {}
731 self._flags = {}
732 if text:
732 if text:
733 def readsubtree(subdir, subm):
733 def readsubtree(subdir, subm):
734 raise AssertionError('treemanifest constructor only accepts '
734 raise AssertionError('treemanifest constructor only accepts '
735 'flat manifests')
735 'flat manifests')
736 self.parse(text, readsubtree)
736 self.parse(text, readsubtree)
737 self._dirty = True # Mark flat manifest dirty after parsing
737 self._dirty = True # Mark flat manifest dirty after parsing
738
738
739 def _subpath(self, path):
739 def _subpath(self, path):
740 return self._dir + path
740 return self._dir + path
741
741
742 def _loadalllazy(self):
742 def _loadalllazy(self):
743 selfdirs = self._dirs
743 selfdirs = self._dirs
744 for d, (path, node, readsubtree, docopy) in self._lazydirs.iteritems():
744 for d, (path, node, readsubtree, docopy) in self._lazydirs.iteritems():
745 if docopy:
745 if docopy:
746 selfdirs[d] = readsubtree(path, node).copy()
746 selfdirs[d] = readsubtree(path, node).copy()
747 else:
747 else:
748 selfdirs[d] = readsubtree(path, node)
748 selfdirs[d] = readsubtree(path, node)
749 self._lazydirs = {}
749 self._lazydirs = {}
750
750
751 def _loadlazy(self, d):
751 def _loadlazy(self, d):
752 v = self._lazydirs.get(d)
752 v = self._lazydirs.get(d)
753 if v:
753 if v:
754 path, node, readsubtree, docopy = v
754 path, node, readsubtree, docopy = v
755 if docopy:
755 if docopy:
756 self._dirs[d] = readsubtree(path, node).copy()
756 self._dirs[d] = readsubtree(path, node).copy()
757 else:
757 else:
758 self._dirs[d] = readsubtree(path, node)
758 self._dirs[d] = readsubtree(path, node)
759 del self._lazydirs[d]
759 del self._lazydirs[d]
760
760
761 def _loadchildrensetlazy(self, visit):
761 def _loadchildrensetlazy(self, visit):
762 if not visit:
762 if not visit:
763 return None
763 return None
764 if visit == 'all' or visit == 'this':
764 if visit == 'all' or visit == 'this':
765 self._loadalllazy()
765 self._loadalllazy()
766 return None
766 return None
767
767
768 loadlazy = self._loadlazy
768 loadlazy = self._loadlazy
769 for k in visit:
769 for k in visit:
770 loadlazy(k + '/')
770 loadlazy(k + '/')
771 return visit
771 return visit
772
772
773 def _loaddifflazy(self, t1, t2):
773 def _loaddifflazy(self, t1, t2):
774 """load items in t1 and t2 if they're needed for diffing.
774 """load items in t1 and t2 if they're needed for diffing.
775
775
776 The criteria currently is:
776 The criteria currently is:
777 - if it's not present in _lazydirs in either t1 or t2, load it in the
777 - if it's not present in _lazydirs in either t1 or t2, load it in the
778 other (it may already be loaded or it may not exist, doesn't matter)
778 other (it may already be loaded or it may not exist, doesn't matter)
779 - if it's present in _lazydirs in both, compare the nodeid; if it
779 - if it's present in _lazydirs in both, compare the nodeid; if it
780 differs, load it in both
780 differs, load it in both
781 """
781 """
782 toloadlazy = []
782 toloadlazy = []
783 for d, v1 in t1._lazydirs.iteritems():
783 for d, v1 in t1._lazydirs.iteritems():
784 v2 = t2._lazydirs.get(d)
784 v2 = t2._lazydirs.get(d)
785 if not v2 or v2[1] != v1[1]:
785 if not v2 or v2[1] != v1[1]:
786 toloadlazy.append(d)
786 toloadlazy.append(d)
787 for d, v1 in t2._lazydirs.iteritems():
787 for d, v1 in t2._lazydirs.iteritems():
788 if d not in t1._lazydirs:
788 if d not in t1._lazydirs:
789 toloadlazy.append(d)
789 toloadlazy.append(d)
790
790
791 for d in toloadlazy:
791 for d in toloadlazy:
792 t1._loadlazy(d)
792 t1._loadlazy(d)
793 t2._loadlazy(d)
793 t2._loadlazy(d)
794
794
795 def __len__(self):
795 def __len__(self):
796 self._load()
796 self._load()
797 size = len(self._files)
797 size = len(self._files)
798 self._loadalllazy()
798 self._loadalllazy()
799 for m in self._dirs.values():
799 for m in self._dirs.values():
800 size += m.__len__()
800 size += m.__len__()
801 return size
801 return size
802
802
803 def __nonzero__(self):
803 def __nonzero__(self):
804 # Faster than "__len() != 0" since it avoids loading sub-manifests
804 # Faster than "__len() != 0" since it avoids loading sub-manifests
805 return not self._isempty()
805 return not self._isempty()
806
806
807 __bool__ = __nonzero__
807 __bool__ = __nonzero__
808
808
809 def _isempty(self):
809 def _isempty(self):
810 self._load() # for consistency; already loaded by all callers
810 self._load() # for consistency; already loaded by all callers
811 # See if we can skip loading everything.
811 # See if we can skip loading everything.
812 if self._files or (self._dirs and
812 if self._files or (self._dirs and
813 any(not m._isempty() for m in self._dirs.values())):
813 any(not m._isempty() for m in self._dirs.values())):
814 return False
814 return False
815 self._loadalllazy()
815 self._loadalllazy()
816 return (not self._dirs or
816 return (not self._dirs or
817 all(m._isempty() for m in self._dirs.values()))
817 all(m._isempty() for m in self._dirs.values()))
818
818
819 def __repr__(self):
819 def __repr__(self):
820 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
820 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
821 (self._dir, hex(self._node),
821 (self._dir, hex(self._node),
822 bool(self._loadfunc is _noop),
822 bool(self._loadfunc is _noop),
823 self._dirty, id(self)))
823 self._dirty, id(self)))
824
824
825 def dir(self):
825 def dir(self):
826 '''The directory that this tree manifest represents, including a
826 '''The directory that this tree manifest represents, including a
827 trailing '/'. Empty string for the repo root directory.'''
827 trailing '/'. Empty string for the repo root directory.'''
828 return self._dir
828 return self._dir
829
829
830 def node(self):
830 def node(self):
831 '''This node of this instance. nullid for unsaved instances. Should
831 '''This node of this instance. nullid for unsaved instances. Should
832 be updated when the instance is read or written from a revlog.
832 be updated when the instance is read or written from a revlog.
833 '''
833 '''
834 assert not self._dirty
834 assert not self._dirty
835 return self._node
835 return self._node
836
836
837 def setnode(self, node):
837 def setnode(self, node):
838 self._node = node
838 self._node = node
839 self._dirty = False
839 self._dirty = False
840
840
841 def iterentries(self):
841 def iterentries(self):
842 self._load()
842 self._load()
843 self._loadalllazy()
843 self._loadalllazy()
844 for p, n in sorted(itertools.chain(self._dirs.items(),
844 for p, n in sorted(itertools.chain(self._dirs.items(),
845 self._files.items())):
845 self._files.items())):
846 if p in self._files:
846 if p in self._files:
847 yield self._subpath(p), n, self._flags.get(p, '')
847 yield self._subpath(p), n, self._flags.get(p, '')
848 else:
848 else:
849 for x in n.iterentries():
849 for x in n.iterentries():
850 yield x
850 yield x
851
851
852 def items(self):
852 def items(self):
853 self._load()
853 self._load()
854 self._loadalllazy()
854 self._loadalllazy()
855 for p, n in sorted(itertools.chain(self._dirs.items(),
855 for p, n in sorted(itertools.chain(self._dirs.items(),
856 self._files.items())):
856 self._files.items())):
857 if p in self._files:
857 if p in self._files:
858 yield self._subpath(p), n
858 yield self._subpath(p), n
859 else:
859 else:
860 for f, sn in n.iteritems():
860 for f, sn in n.iteritems():
861 yield f, sn
861 yield f, sn
862
862
863 iteritems = items
863 iteritems = items
864
864
865 def iterkeys(self):
865 def iterkeys(self):
866 self._load()
866 self._load()
867 self._loadalllazy()
867 self._loadalllazy()
868 for p in sorted(itertools.chain(self._dirs, self._files)):
868 for p in sorted(itertools.chain(self._dirs, self._files)):
869 if p in self._files:
869 if p in self._files:
870 yield self._subpath(p)
870 yield self._subpath(p)
871 else:
871 else:
872 for f in self._dirs[p]:
872 for f in self._dirs[p]:
873 yield f
873 yield f
874
874
875 def keys(self):
875 def keys(self):
876 return list(self.iterkeys())
876 return list(self.iterkeys())
877
877
878 def __iter__(self):
878 def __iter__(self):
879 return self.iterkeys()
879 return self.iterkeys()
880
880
881 def __contains__(self, f):
881 def __contains__(self, f):
882 if f is None:
882 if f is None:
883 return False
883 return False
884 self._load()
884 self._load()
885 dir, subpath = _splittopdir(f)
885 dir, subpath = _splittopdir(f)
886 if dir:
886 if dir:
887 self._loadlazy(dir)
887 self._loadlazy(dir)
888
888
889 if dir not in self._dirs:
889 if dir not in self._dirs:
890 return False
890 return False
891
891
892 return self._dirs[dir].__contains__(subpath)
892 return self._dirs[dir].__contains__(subpath)
893 else:
893 else:
894 return f in self._files
894 return f in self._files
895
895
896 def get(self, f, default=None):
896 def get(self, f, default=None):
897 self._load()
897 self._load()
898 dir, subpath = _splittopdir(f)
898 dir, subpath = _splittopdir(f)
899 if dir:
899 if dir:
900 self._loadlazy(dir)
900 self._loadlazy(dir)
901
901
902 if dir not in self._dirs:
902 if dir not in self._dirs:
903 return default
903 return default
904 return self._dirs[dir].get(subpath, default)
904 return self._dirs[dir].get(subpath, default)
905 else:
905 else:
906 return self._files.get(f, default)
906 return self._files.get(f, default)
907
907
908 def __getitem__(self, f):
908 def __getitem__(self, f):
909 self._load()
909 self._load()
910 dir, subpath = _splittopdir(f)
910 dir, subpath = _splittopdir(f)
911 if dir:
911 if dir:
912 self._loadlazy(dir)
912 self._loadlazy(dir)
913
913
914 return self._dirs[dir].__getitem__(subpath)
914 return self._dirs[dir].__getitem__(subpath)
915 else:
915 else:
916 return self._files[f]
916 return self._files[f]
917
917
918 def flags(self, f):
918 def flags(self, f):
919 self._load()
919 self._load()
920 dir, subpath = _splittopdir(f)
920 dir, subpath = _splittopdir(f)
921 if dir:
921 if dir:
922 self._loadlazy(dir)
922 self._loadlazy(dir)
923
923
924 if dir not in self._dirs:
924 if dir not in self._dirs:
925 return ''
925 return ''
926 return self._dirs[dir].flags(subpath)
926 return self._dirs[dir].flags(subpath)
927 else:
927 else:
928 if f in self._lazydirs or f in self._dirs:
928 if f in self._lazydirs or f in self._dirs:
929 return ''
929 return ''
930 return self._flags.get(f, '')
930 return self._flags.get(f, '')
931
931
932 def find(self, f):
932 def find(self, f):
933 self._load()
933 self._load()
934 dir, subpath = _splittopdir(f)
934 dir, subpath = _splittopdir(f)
935 if dir:
935 if dir:
936 self._loadlazy(dir)
936 self._loadlazy(dir)
937
937
938 return self._dirs[dir].find(subpath)
938 return self._dirs[dir].find(subpath)
939 else:
939 else:
940 return self._files[f], self._flags.get(f, '')
940 return self._files[f], self._flags.get(f, '')
941
941
942 def __delitem__(self, f):
942 def __delitem__(self, f):
943 self._load()
943 self._load()
944 dir, subpath = _splittopdir(f)
944 dir, subpath = _splittopdir(f)
945 if dir:
945 if dir:
946 self._loadlazy(dir)
946 self._loadlazy(dir)
947
947
948 self._dirs[dir].__delitem__(subpath)
948 self._dirs[dir].__delitem__(subpath)
949 # If the directory is now empty, remove it
949 # If the directory is now empty, remove it
950 if self._dirs[dir]._isempty():
950 if self._dirs[dir]._isempty():
951 del self._dirs[dir]
951 del self._dirs[dir]
952 else:
952 else:
953 del self._files[f]
953 del self._files[f]
954 if f in self._flags:
954 if f in self._flags:
955 del self._flags[f]
955 del self._flags[f]
956 self._dirty = True
956 self._dirty = True
957
957
958 def __setitem__(self, f, n):
958 def __setitem__(self, f, n):
959 assert n is not None
959 assert n is not None
960 self._load()
960 self._load()
961 dir, subpath = _splittopdir(f)
961 dir, subpath = _splittopdir(f)
962 if dir:
962 if dir:
963 self._loadlazy(dir)
963 self._loadlazy(dir)
964 if dir not in self._dirs:
964 if dir not in self._dirs:
965 self._dirs[dir] = treemanifest(self._subpath(dir))
965 self._dirs[dir] = treemanifest(self._subpath(dir))
966 self._dirs[dir].__setitem__(subpath, n)
966 self._dirs[dir].__setitem__(subpath, n)
967 else:
967 else:
968 self._files[f] = n[:21] # to match manifestdict's behavior
968 self._files[f] = n[:21] # to match manifestdict's behavior
969 self._dirty = True
969 self._dirty = True
970
970
971 def _load(self):
971 def _load(self):
972 if self._loadfunc is not _noop:
972 if self._loadfunc is not _noop:
973 lf, self._loadfunc = self._loadfunc, _noop
973 lf, self._loadfunc = self._loadfunc, _noop
974 lf(self)
974 lf(self)
975 elif self._copyfunc is not _noop:
975 elif self._copyfunc is not _noop:
976 cf, self._copyfunc = self._copyfunc, _noop
976 cf, self._copyfunc = self._copyfunc, _noop
977 cf(self)
977 cf(self)
978
978
979 def setflag(self, f, flags):
979 def setflag(self, f, flags):
980 """Set the flags (symlink, executable) for path f."""
980 """Set the flags (symlink, executable) for path f."""
981 self._load()
981 self._load()
982 dir, subpath = _splittopdir(f)
982 dir, subpath = _splittopdir(f)
983 if dir:
983 if dir:
984 self._loadlazy(dir)
984 self._loadlazy(dir)
985 if dir not in self._dirs:
985 if dir not in self._dirs:
986 self._dirs[dir] = treemanifest(self._subpath(dir))
986 self._dirs[dir] = treemanifest(self._subpath(dir))
987 self._dirs[dir].setflag(subpath, flags)
987 self._dirs[dir].setflag(subpath, flags)
988 else:
988 else:
989 self._flags[f] = flags
989 self._flags[f] = flags
990 self._dirty = True
990 self._dirty = True
991
991
992 def copy(self):
992 def copy(self):
993 copy = treemanifest(self._dir)
993 copy = treemanifest(self._dir)
994 copy._node = self._node
994 copy._node = self._node
995 copy._dirty = self._dirty
995 copy._dirty = self._dirty
996 if self._copyfunc is _noop:
996 if self._copyfunc is _noop:
997 def _copyfunc(s):
997 def _copyfunc(s):
998 self._load()
998 self._load()
999 s._lazydirs = {d: (p, n, r, True) for
999 s._lazydirs = {d: (p, n, r, True) for
1000 d, (p, n, r, c) in self._lazydirs.iteritems()}
1000 d, (p, n, r, c) in self._lazydirs.iteritems()}
1001 sdirs = s._dirs
1001 sdirs = s._dirs
1002 for d, v in self._dirs.iteritems():
1002 for d, v in self._dirs.iteritems():
1003 sdirs[d] = v.copy()
1003 sdirs[d] = v.copy()
1004 s._files = dict.copy(self._files)
1004 s._files = dict.copy(self._files)
1005 s._flags = dict.copy(self._flags)
1005 s._flags = dict.copy(self._flags)
1006 if self._loadfunc is _noop:
1006 if self._loadfunc is _noop:
1007 _copyfunc(copy)
1007 _copyfunc(copy)
1008 else:
1008 else:
1009 copy._copyfunc = _copyfunc
1009 copy._copyfunc = _copyfunc
1010 else:
1010 else:
1011 copy._copyfunc = self._copyfunc
1011 copy._copyfunc = self._copyfunc
1012 return copy
1012 return copy
1013
1013
1014 def filesnotin(self, m2, match=None):
1014 def filesnotin(self, m2, match=None):
1015 '''Set of files in this manifest that are not in the other'''
1015 '''Set of files in this manifest that are not in the other'''
1016 if match and not match.always():
1016 if match and not match.always():
1017 m1 = self.matches(match)
1017 m1 = self.matches(match)
1018 m2 = m2.matches(match)
1018 m2 = m2.matches(match)
1019 return m1.filesnotin(m2)
1019 return m1.filesnotin(m2)
1020
1020
1021 files = set()
1021 files = set()
1022 def _filesnotin(t1, t2):
1022 def _filesnotin(t1, t2):
1023 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1023 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1024 return
1024 return
1025 t1._load()
1025 t1._load()
1026 t2._load()
1026 t2._load()
1027 self._loaddifflazy(t1, t2)
1027 self._loaddifflazy(t1, t2)
1028 for d, m1 in t1._dirs.iteritems():
1028 for d, m1 in t1._dirs.iteritems():
1029 if d in t2._dirs:
1029 if d in t2._dirs:
1030 m2 = t2._dirs[d]
1030 m2 = t2._dirs[d]
1031 _filesnotin(m1, m2)
1031 _filesnotin(m1, m2)
1032 else:
1032 else:
1033 files.update(m1.iterkeys())
1033 files.update(m1.iterkeys())
1034
1034
1035 for fn in t1._files:
1035 for fn in t1._files:
1036 if fn not in t2._files:
1036 if fn not in t2._files:
1037 files.add(t1._subpath(fn))
1037 files.add(t1._subpath(fn))
1038
1038
1039 _filesnotin(self, m2)
1039 _filesnotin(self, m2)
1040 return files
1040 return files
1041
1041
1042 @propertycache
1042 @propertycache
1043 def _alldirs(self):
1043 def _alldirs(self):
1044 return util.dirs(self)
1044 return util.dirs(self)
1045
1045
1046 def dirs(self):
1046 def dirs(self):
1047 return self._alldirs
1047 return self._alldirs
1048
1048
1049 def hasdir(self, dir):
1049 def hasdir(self, dir):
1050 self._load()
1050 self._load()
1051 topdir, subdir = _splittopdir(dir)
1051 topdir, subdir = _splittopdir(dir)
1052 if topdir:
1052 if topdir:
1053 self._loadlazy(topdir)
1053 self._loadlazy(topdir)
1054 if topdir in self._dirs:
1054 if topdir in self._dirs:
1055 return self._dirs[topdir].hasdir(subdir)
1055 return self._dirs[topdir].hasdir(subdir)
1056 return False
1056 return False
1057 dirslash = dir + '/'
1057 dirslash = dir + '/'
1058 return dirslash in self._dirs or dirslash in self._lazydirs
1058 return dirslash in self._dirs or dirslash in self._lazydirs
1059
1059
1060 def walk(self, match):
1060 def walk(self, match):
1061 '''Generates matching file names.
1061 '''Generates matching file names.
1062
1062
1063 Equivalent to manifest.matches(match).iterkeys(), but without creating
1063 Equivalent to manifest.matches(match).iterkeys(), but without creating
1064 an entirely new manifest.
1064 an entirely new manifest.
1065
1065
1066 It also reports nonexistent files by marking them bad with match.bad().
1066 It also reports nonexistent files by marking them bad with match.bad().
1067 '''
1067 '''
1068 if match.always():
1068 if match.always():
1069 for f in iter(self):
1069 for f in iter(self):
1070 yield f
1070 yield f
1071 return
1071 return
1072
1072
1073 fset = set(match.files())
1073 fset = set(match.files())
1074
1074
1075 for fn in self._walk(match):
1075 for fn in self._walk(match):
1076 if fn in fset:
1076 if fn in fset:
1077 # specified pattern is the exact name
1077 # specified pattern is the exact name
1078 fset.remove(fn)
1078 fset.remove(fn)
1079 yield fn
1079 yield fn
1080
1080
1081 # for dirstate.walk, files=[''] means "walk the whole tree".
1081 # for dirstate.walk, files=[''] means "walk the whole tree".
1082 # follow that here, too
1082 # follow that here, too
1083 fset.discard('')
1083 fset.discard('')
1084
1084
1085 for fn in sorted(fset):
1085 for fn in sorted(fset):
1086 if not self.hasdir(fn):
1086 if not self.hasdir(fn):
1087 match.bad(fn, None)
1087 match.bad(fn, None)
1088
1088
1089 def _walk(self, match):
1089 def _walk(self, match):
1090 '''Recursively generates matching file names for walk().'''
1090 '''Recursively generates matching file names for walk().'''
1091 visit = match.visitchildrenset(self._dir[:-1])
1091 visit = match.visitchildrenset(self._dir[:-1])
1092 if not visit:
1092 if not visit:
1093 return
1093 return
1094
1094
1095 # yield this dir's files and walk its submanifests
1095 # yield this dir's files and walk its submanifests
1096 self._load()
1096 self._load()
1097 visit = self._loadchildrensetlazy(visit)
1097 visit = self._loadchildrensetlazy(visit)
1098 for p in sorted(list(self._dirs) + list(self._files)):
1098 for p in sorted(list(self._dirs) + list(self._files)):
1099 if p in self._files:
1099 if p in self._files:
1100 fullp = self._subpath(p)
1100 fullp = self._subpath(p)
1101 if match(fullp):
1101 if match(fullp):
1102 yield fullp
1102 yield fullp
1103 else:
1103 else:
1104 if not visit or p[:-1] in visit:
1104 if not visit or p[:-1] in visit:
1105 for f in self._dirs[p]._walk(match):
1105 for f in self._dirs[p]._walk(match):
1106 yield f
1106 yield f
1107
1107
1108 def matches(self, match):
1108 def matches(self, match):
1109 '''generate a new manifest filtered by the match argument'''
1109 '''generate a new manifest filtered by the match argument'''
1110 if match.always():
1110 if match.always():
1111 return self.copy()
1111 return self.copy()
1112
1112
1113 return self._matches(match)
1113 return self._matches(match)
1114
1114
1115 def _matches(self, match):
1115 def _matches(self, match):
1116 '''recursively generate a new manifest filtered by the match argument.
1116 '''recursively generate a new manifest filtered by the match argument.
1117 '''
1117 '''
1118
1118
1119 visit = match.visitchildrenset(self._dir[:-1])
1119 visit = match.visitchildrenset(self._dir[:-1])
1120 if visit == 'all':
1120 if visit == 'all':
1121 return self.copy()
1121 return self.copy()
1122 ret = treemanifest(self._dir)
1122 ret = treemanifest(self._dir)
1123 if not visit:
1123 if not visit:
1124 return ret
1124 return ret
1125
1125
1126 self._load()
1126 self._load()
1127 for fn in self._files:
1127 for fn in self._files:
1128 # While visitchildrenset *usually* lists only subdirs, this is
1128 # While visitchildrenset *usually* lists only subdirs, this is
1129 # actually up to the matcher and may have some files in the set().
1129 # actually up to the matcher and may have some files in the set().
1130 # If visit == 'this', we should obviously look at the files in this
1130 # If visit == 'this', we should obviously look at the files in this
1131 # directory; if visit is a set, and fn is in it, we should inspect
1131 # directory; if visit is a set, and fn is in it, we should inspect
1132 # fn (but no need to inspect things not in the set).
1132 # fn (but no need to inspect things not in the set).
1133 if visit != 'this' and fn not in visit:
1133 if visit != 'this' and fn not in visit:
1134 continue
1134 continue
1135 fullp = self._subpath(fn)
1135 fullp = self._subpath(fn)
1136 # visitchildrenset isn't perfect, we still need to call the regular
1136 # visitchildrenset isn't perfect, we still need to call the regular
1137 # matcher code to further filter results.
1137 # matcher code to further filter results.
1138 if not match(fullp):
1138 if not match(fullp):
1139 continue
1139 continue
1140 ret._files[fn] = self._files[fn]
1140 ret._files[fn] = self._files[fn]
1141 if fn in self._flags:
1141 if fn in self._flags:
1142 ret._flags[fn] = self._flags[fn]
1142 ret._flags[fn] = self._flags[fn]
1143
1143
1144 visit = self._loadchildrensetlazy(visit)
1144 visit = self._loadchildrensetlazy(visit)
1145 for dir, subm in self._dirs.iteritems():
1145 for dir, subm in self._dirs.iteritems():
1146 if visit and dir[:-1] not in visit:
1146 if visit and dir[:-1] not in visit:
1147 continue
1147 continue
1148 m = subm._matches(match)
1148 m = subm._matches(match)
1149 if not m._isempty():
1149 if not m._isempty():
1150 ret._dirs[dir] = m
1150 ret._dirs[dir] = m
1151
1151
1152 if not ret._isempty():
1152 if not ret._isempty():
1153 ret._dirty = True
1153 ret._dirty = True
1154 return ret
1154 return ret
1155
1155
1156 def diff(self, m2, match=None, clean=False):
1156 def diff(self, m2, match=None, clean=False):
1157 '''Finds changes between the current manifest and m2.
1157 '''Finds changes between the current manifest and m2.
1158
1158
1159 Args:
1159 Args:
1160 m2: the manifest to which this manifest should be compared.
1160 m2: the manifest to which this manifest should be compared.
1161 clean: if true, include files unchanged between these manifests
1161 clean: if true, include files unchanged between these manifests
1162 with a None value in the returned dictionary.
1162 with a None value in the returned dictionary.
1163
1163
1164 The result is returned as a dict with filename as key and
1164 The result is returned as a dict with filename as key and
1165 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1165 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1166 nodeid in the current/other manifest and fl1/fl2 is the flag
1166 nodeid in the current/other manifest and fl1/fl2 is the flag
1167 in the current/other manifest. Where the file does not exist,
1167 in the current/other manifest. Where the file does not exist,
1168 the nodeid will be None and the flags will be the empty
1168 the nodeid will be None and the flags will be the empty
1169 string.
1169 string.
1170 '''
1170 '''
1171 if match and not match.always():
1171 if match and not match.always():
1172 m1 = self.matches(match)
1172 m1 = self.matches(match)
1173 m2 = m2.matches(match)
1173 m2 = m2.matches(match)
1174 return m1.diff(m2, clean=clean)
1174 return m1.diff(m2, clean=clean)
1175 result = {}
1175 result = {}
1176 emptytree = treemanifest()
1176 emptytree = treemanifest()
1177
1177
1178 def _iterativediff(t1, t2, stack):
1178 def _iterativediff(t1, t2, stack):
1179 """compares two tree manifests and append new tree-manifests which
1179 """compares two tree manifests and append new tree-manifests which
1180 needs to be compared to stack"""
1180 needs to be compared to stack"""
1181 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1181 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1182 return
1182 return
1183 t1._load()
1183 t1._load()
1184 t2._load()
1184 t2._load()
1185 self._loaddifflazy(t1, t2)
1185 self._loaddifflazy(t1, t2)
1186
1186
1187 for d, m1 in t1._dirs.iteritems():
1187 for d, m1 in t1._dirs.iteritems():
1188 m2 = t2._dirs.get(d, emptytree)
1188 m2 = t2._dirs.get(d, emptytree)
1189 stack.append((m1, m2))
1189 stack.append((m1, m2))
1190
1190
1191 for d, m2 in t2._dirs.iteritems():
1191 for d, m2 in t2._dirs.iteritems():
1192 if d not in t1._dirs:
1192 if d not in t1._dirs:
1193 stack.append((emptytree, m2))
1193 stack.append((emptytree, m2))
1194
1194
1195 for fn, n1 in t1._files.iteritems():
1195 for fn, n1 in t1._files.iteritems():
1196 fl1 = t1._flags.get(fn, '')
1196 fl1 = t1._flags.get(fn, '')
1197 n2 = t2._files.get(fn, None)
1197 n2 = t2._files.get(fn, None)
1198 fl2 = t2._flags.get(fn, '')
1198 fl2 = t2._flags.get(fn, '')
1199 if n1 != n2 or fl1 != fl2:
1199 if n1 != n2 or fl1 != fl2:
1200 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1200 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1201 elif clean:
1201 elif clean:
1202 result[t1._subpath(fn)] = None
1202 result[t1._subpath(fn)] = None
1203
1203
1204 for fn, n2 in t2._files.iteritems():
1204 for fn, n2 in t2._files.iteritems():
1205 if fn not in t1._files:
1205 if fn not in t1._files:
1206 fl2 = t2._flags.get(fn, '')
1206 fl2 = t2._flags.get(fn, '')
1207 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1207 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1208
1208
1209 stackls = []
1209 stackls = []
1210 _iterativediff(self, m2, stackls)
1210 _iterativediff(self, m2, stackls)
1211 while stackls:
1211 while stackls:
1212 t1, t2 = stackls.pop()
1212 t1, t2 = stackls.pop()
1213 # stackls is populated in the function call
1213 # stackls is populated in the function call
1214 _iterativediff(t1, t2, stackls)
1214 _iterativediff(t1, t2, stackls)
1215 return result
1215 return result
1216
1216
1217 def unmodifiedsince(self, m2):
1217 def unmodifiedsince(self, m2):
1218 return not self._dirty and not m2._dirty and self._node == m2._node
1218 return not self._dirty and not m2._dirty and self._node == m2._node
1219
1219
1220 def parse(self, text, readsubtree):
1220 def parse(self, text, readsubtree):
1221 selflazy = self._lazydirs
1221 selflazy = self._lazydirs
1222 subpath = self._subpath
1222 subpath = self._subpath
1223 for f, n, fl in _parse(text):
1223 for f, n, fl in _parse(text):
1224 if fl == 't':
1224 if fl == 't':
1225 f = f + '/'
1225 f = f + '/'
1226 # False below means "doesn't need to be copied" and can use the
1226 # False below means "doesn't need to be copied" and can use the
1227 # cached value from readsubtree directly.
1227 # cached value from readsubtree directly.
1228 selflazy[f] = (subpath(f), n, readsubtree, False)
1228 selflazy[f] = (subpath(f), n, readsubtree, False)
1229 elif '/' in f:
1229 elif '/' in f:
1230 # This is a flat manifest, so use __setitem__ and setflag rather
1230 # This is a flat manifest, so use __setitem__ and setflag rather
1231 # than assigning directly to _files and _flags, so we can
1231 # than assigning directly to _files and _flags, so we can
1232 # assign a path in a subdirectory, and to mark dirty (compared
1232 # assign a path in a subdirectory, and to mark dirty (compared
1233 # to nullid).
1233 # to nullid).
1234 self[f] = n
1234 self[f] = n
1235 if fl:
1235 if fl:
1236 self.setflag(f, fl)
1236 self.setflag(f, fl)
1237 else:
1237 else:
1238 # Assigning to _files and _flags avoids marking as dirty,
1238 # Assigning to _files and _flags avoids marking as dirty,
1239 # and should be a little faster.
1239 # and should be a little faster.
1240 self._files[f] = n
1240 self._files[f] = n
1241 if fl:
1241 if fl:
1242 self._flags[f] = fl
1242 self._flags[f] = fl
1243
1243
1244 def text(self):
1244 def text(self):
1245 """Get the full data of this manifest as a bytestring."""
1245 """Get the full data of this manifest as a bytestring."""
1246 self._load()
1246 self._load()
1247 return _text(self.iterentries())
1247 return _text(self.iterentries())
1248
1248
1249 def dirtext(self):
1249 def dirtext(self):
1250 """Get the full data of this directory as a bytestring. Make sure that
1250 """Get the full data of this directory as a bytestring. Make sure that
1251 any submanifests have been written first, so their nodeids are correct.
1251 any submanifests have been written first, so their nodeids are correct.
1252 """
1252 """
1253 self._load()
1253 self._load()
1254 flags = self.flags
1254 flags = self.flags
1255 lazydirs = [(d[:-1], v[1], 't') for d, v in self._lazydirs.iteritems()]
1255 lazydirs = [(d[:-1], v[1], 't') for d, v in self._lazydirs.iteritems()]
1256 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1256 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1257 files = [(f, self._files[f], flags(f)) for f in self._files]
1257 files = [(f, self._files[f], flags(f)) for f in self._files]
1258 return _text(sorted(dirs + files + lazydirs))
1258 return _text(sorted(dirs + files + lazydirs))
1259
1259
1260 def read(self, gettext, readsubtree):
1260 def read(self, gettext, readsubtree):
1261 def _load_for_read(s):
1261 def _load_for_read(s):
1262 s.parse(gettext(), readsubtree)
1262 s.parse(gettext(), readsubtree)
1263 s._dirty = False
1263 s._dirty = False
1264 self._loadfunc = _load_for_read
1264 self._loadfunc = _load_for_read
1265
1265
1266 def writesubtrees(self, m1, m2, writesubtree, match):
1266 def writesubtrees(self, m1, m2, writesubtree, match):
1267 self._load() # for consistency; should never have any effect here
1267 self._load() # for consistency; should never have any effect here
1268 m1._load()
1268 m1._load()
1269 m2._load()
1269 m2._load()
1270 emptytree = treemanifest()
1270 emptytree = treemanifest()
1271 def getnode(m, d):
1271 def getnode(m, d):
1272 ld = m._lazydirs.get(d)
1272 ld = m._lazydirs.get(d)
1273 if ld:
1273 if ld:
1274 return ld[1]
1274 return ld[1]
1275 return m._dirs.get(d, emptytree)._node
1275 return m._dirs.get(d, emptytree)._node
1276
1276
1277 # let's skip investigating things that `match` says we do not need.
1277 # let's skip investigating things that `match` says we do not need.
1278 visit = match.visitchildrenset(self._dir[:-1])
1278 visit = match.visitchildrenset(self._dir[:-1])
1279 visit = self._loadchildrensetlazy(visit)
1279 visit = self._loadchildrensetlazy(visit)
1280 if visit == 'this' or visit == 'all':
1280 if visit == 'this' or visit == 'all':
1281 visit = None
1281 visit = None
1282 for d, subm in self._dirs.iteritems():
1282 for d, subm in self._dirs.iteritems():
1283 if visit and d[:-1] not in visit:
1283 if visit and d[:-1] not in visit:
1284 continue
1284 continue
1285 subp1 = getnode(m1, d)
1285 subp1 = getnode(m1, d)
1286 subp2 = getnode(m2, d)
1286 subp2 = getnode(m2, d)
1287 if subp1 == nullid:
1287 if subp1 == nullid:
1288 subp1, subp2 = subp2, subp1
1288 subp1, subp2 = subp2, subp1
1289 writesubtree(subm, subp1, subp2, match)
1289 writesubtree(subm, subp1, subp2, match)
1290
1290
1291 def walksubtrees(self, matcher=None):
1291 def walksubtrees(self, matcher=None):
1292 """Returns an iterator of the subtrees of this manifest, including this
1292 """Returns an iterator of the subtrees of this manifest, including this
1293 manifest itself.
1293 manifest itself.
1294
1294
1295 If `matcher` is provided, it only returns subtrees that match.
1295 If `matcher` is provided, it only returns subtrees that match.
1296 """
1296 """
1297 if matcher and not matcher.visitdir(self._dir[:-1]):
1297 if matcher and not matcher.visitdir(self._dir[:-1]):
1298 return
1298 return
1299 if not matcher or matcher(self._dir[:-1]):
1299 if not matcher or matcher(self._dir[:-1]):
1300 yield self
1300 yield self
1301
1301
1302 self._load()
1302 self._load()
1303 # OPT: use visitchildrenset to avoid loading everything.
1303 # OPT: use visitchildrenset to avoid loading everything.
1304 self._loadalllazy()
1304 self._loadalllazy()
1305 for d, subm in self._dirs.iteritems():
1305 for d, subm in self._dirs.iteritems():
1306 for subtree in subm.walksubtrees(matcher=matcher):
1306 for subtree in subm.walksubtrees(matcher=matcher):
1307 yield subtree
1307 yield subtree
1308
1308
1309 class manifestfulltextcache(util.lrucachedict):
1309 class manifestfulltextcache(util.lrucachedict):
1310 """File-backed LRU cache for the manifest cache
1310 """File-backed LRU cache for the manifest cache
1311
1311
1312 File consists of entries, up to EOF:
1312 File consists of entries, up to EOF:
1313
1313
1314 - 20 bytes node, 4 bytes length, <length> manifest data
1314 - 20 bytes node, 4 bytes length, <length> manifest data
1315
1315
1316 These are written in reverse cache order (oldest to newest).
1316 These are written in reverse cache order (oldest to newest).
1317
1317
1318 """
1318 """
1319
1319
1320 _file = 'manifestfulltextcache'
1320 _file = 'manifestfulltextcache'
1321
1321
1322 def __init__(self, max):
1322 def __init__(self, max):
1323 super(manifestfulltextcache, self).__init__(max)
1323 super(manifestfulltextcache, self).__init__(max)
1324 self._dirty = False
1324 self._dirty = False
1325 self._read = False
1325 self._read = False
1326 self._opener = None
1326 self._opener = None
1327
1327
1328 def read(self):
1328 def read(self):
1329 if self._read or self._opener is None:
1329 if self._read or self._opener is None:
1330 return
1330 return
1331
1331
1332 try:
1332 try:
1333 with self._opener(self._file) as fp:
1333 with self._opener(self._file) as fp:
1334 set = super(manifestfulltextcache, self).__setitem__
1334 set = super(manifestfulltextcache, self).__setitem__
1335 # ignore trailing data, this is a cache, corruption is skipped
1335 # ignore trailing data, this is a cache, corruption is skipped
1336 while True:
1336 while True:
1337 node = fp.read(20)
1337 node = fp.read(20)
1338 if len(node) < 20:
1338 if len(node) < 20:
1339 break
1339 break
1340 try:
1340 try:
1341 size = struct.unpack('>L', fp.read(4))[0]
1341 size = struct.unpack('>L', fp.read(4))[0]
1342 except struct.error:
1342 except struct.error:
1343 break
1343 break
1344 value = bytearray(fp.read(size))
1344 value = bytearray(fp.read(size))
1345 if len(value) != size:
1345 if len(value) != size:
1346 break
1346 break
1347 set(node, value)
1347 set(node, value)
1348 except IOError:
1348 except IOError:
1349 # the file is allowed to be missing
1349 # the file is allowed to be missing
1350 pass
1350 pass
1351
1351
1352 self._read = True
1352 self._read = True
1353 self._dirty = False
1353 self._dirty = False
1354
1354
1355 def write(self):
1355 def write(self):
1356 if not self._dirty or self._opener is None:
1356 if not self._dirty or self._opener is None:
1357 return
1357 return
1358 # rotate backwards to the first used node
1358 # rotate backwards to the first used node
1359 with self._opener(self._file, 'w', atomictemp=True, checkambig=True
1359 with self._opener(self._file, 'w', atomictemp=True, checkambig=True
1360 ) as fp:
1360 ) as fp:
1361 node = self._head.prev
1361 node = self._head.prev
1362 while True:
1362 while True:
1363 if node.key in self._cache:
1363 if node.key in self._cache:
1364 fp.write(node.key)
1364 fp.write(node.key)
1365 fp.write(struct.pack('>L', len(node.value)))
1365 fp.write(struct.pack('>L', len(node.value)))
1366 fp.write(node.value)
1366 fp.write(node.value)
1367 if node is self._head:
1367 if node is self._head:
1368 break
1368 break
1369 node = node.prev
1369 node = node.prev
1370
1370
1371 def __len__(self):
1371 def __len__(self):
1372 if not self._read:
1372 if not self._read:
1373 self.read()
1373 self.read()
1374 return super(manifestfulltextcache, self).__len__()
1374 return super(manifestfulltextcache, self).__len__()
1375
1375
1376 def __contains__(self, k):
1376 def __contains__(self, k):
1377 if not self._read:
1377 if not self._read:
1378 self.read()
1378 self.read()
1379 return super(manifestfulltextcache, self).__contains__(k)
1379 return super(manifestfulltextcache, self).__contains__(k)
1380
1380
1381 def __iter__(self):
1381 def __iter__(self):
1382 if not self._read:
1382 if not self._read:
1383 self.read()
1383 self.read()
1384 return super(manifestfulltextcache, self).__iter__()
1384 return super(manifestfulltextcache, self).__iter__()
1385
1385
1386 def __getitem__(self, k):
1386 def __getitem__(self, k):
1387 if not self._read:
1387 if not self._read:
1388 self.read()
1388 self.read()
1389 # the cache lru order can change on read
1389 # the cache lru order can change on read
1390 setdirty = self._cache.get(k) is not self._head
1390 setdirty = self._cache.get(k) is not self._head
1391 value = super(manifestfulltextcache, self).__getitem__(k)
1391 value = super(manifestfulltextcache, self).__getitem__(k)
1392 if setdirty:
1392 if setdirty:
1393 self._dirty = True
1393 self._dirty = True
1394 return value
1394 return value
1395
1395
1396 def __setitem__(self, k, v):
1396 def __setitem__(self, k, v):
1397 if not self._read:
1397 if not self._read:
1398 self.read()
1398 self.read()
1399 super(manifestfulltextcache, self).__setitem__(k, v)
1399 super(manifestfulltextcache, self).__setitem__(k, v)
1400 self._dirty = True
1400 self._dirty = True
1401
1401
1402 def __delitem__(self, k):
1402 def __delitem__(self, k):
1403 if not self._read:
1403 if not self._read:
1404 self.read()
1404 self.read()
1405 super(manifestfulltextcache, self).__delitem__(k)
1405 super(manifestfulltextcache, self).__delitem__(k)
1406 self._dirty = True
1406 self._dirty = True
1407
1407
1408 def get(self, k, default=None):
1408 def get(self, k, default=None):
1409 if not self._read:
1409 if not self._read:
1410 self.read()
1410 self.read()
1411 return super(manifestfulltextcache, self).get(k, default=default)
1411 return super(manifestfulltextcache, self).get(k, default=default)
1412
1412
1413 def clear(self, clear_persisted_data=False):
1413 def clear(self, clear_persisted_data=False):
1414 super(manifestfulltextcache, self).clear()
1414 super(manifestfulltextcache, self).clear()
1415 if clear_persisted_data:
1415 if clear_persisted_data:
1416 self._dirty = True
1416 self._dirty = True
1417 self.write()
1417 self.write()
1418 self._read = False
1418 self._read = False
1419
1419
1420 # and upper bound of what we expect from compression
1421 # (real live value seems to be "3")
1422 MAXCOMPRESSION = 10
1423
1420 @interfaceutil.implementer(repository.imanifeststorage)
1424 @interfaceutil.implementer(repository.imanifeststorage)
1421 class manifestrevlog(object):
1425 class manifestrevlog(object):
1422 '''A revlog that stores manifest texts. This is responsible for caching the
1426 '''A revlog that stores manifest texts. This is responsible for caching the
1423 full-text manifest contents.
1427 full-text manifest contents.
1424 '''
1428 '''
1425 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1429 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1426 treemanifest=False):
1430 treemanifest=False):
1427 """Constructs a new manifest revlog
1431 """Constructs a new manifest revlog
1428
1432
1429 `indexfile` - used by extensions to have two manifests at once, like
1433 `indexfile` - used by extensions to have two manifests at once, like
1430 when transitioning between flatmanifeset and treemanifests.
1434 when transitioning between flatmanifeset and treemanifests.
1431
1435
1432 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1436 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1433 options can also be used to make this a tree manifest revlog. The opener
1437 options can also be used to make this a tree manifest revlog. The opener
1434 option takes precedence, so if it is set to True, we ignore whatever
1438 option takes precedence, so if it is set to True, we ignore whatever
1435 value is passed in to the constructor.
1439 value is passed in to the constructor.
1436 """
1440 """
1437 # During normal operations, we expect to deal with not more than four
1441 # During normal operations, we expect to deal with not more than four
1438 # revs at a time (such as during commit --amend). When rebasing large
1442 # revs at a time (such as during commit --amend). When rebasing large
1439 # stacks of commits, the number can go up, hence the config knob below.
1443 # stacks of commits, the number can go up, hence the config knob below.
1440 cachesize = 4
1444 cachesize = 4
1441 optiontreemanifest = False
1445 optiontreemanifest = False
1442 opts = getattr(opener, 'options', None)
1446 opts = getattr(opener, 'options', None)
1443 if opts is not None:
1447 if opts is not None:
1444 cachesize = opts.get('manifestcachesize', cachesize)
1448 cachesize = opts.get('manifestcachesize', cachesize)
1445 optiontreemanifest = opts.get('treemanifest', False)
1449 optiontreemanifest = opts.get('treemanifest', False)
1446
1450
1447 self._treeondisk = optiontreemanifest or treemanifest
1451 self._treeondisk = optiontreemanifest or treemanifest
1448
1452
1449 self._fulltextcache = manifestfulltextcache(cachesize)
1453 self._fulltextcache = manifestfulltextcache(cachesize)
1450
1454
1451 if tree:
1455 if tree:
1452 assert self._treeondisk, 'opts is %r' % opts
1456 assert self._treeondisk, 'opts is %r' % opts
1453
1457
1454 if indexfile is None:
1458 if indexfile is None:
1455 indexfile = '00manifest.i'
1459 indexfile = '00manifest.i'
1456 if tree:
1460 if tree:
1457 indexfile = "meta/" + tree + indexfile
1461 indexfile = "meta/" + tree + indexfile
1458
1462
1459 self.tree = tree
1463 self.tree = tree
1460
1464
1461 # The dirlogcache is kept on the root manifest log
1465 # The dirlogcache is kept on the root manifest log
1462 if tree:
1466 if tree:
1463 self._dirlogcache = dirlogcache
1467 self._dirlogcache = dirlogcache
1464 else:
1468 else:
1465 self._dirlogcache = {'': self}
1469 self._dirlogcache = {'': self}
1466
1470
1467 self._revlog = revlog.revlog(opener, indexfile,
1471 self._revlog = revlog.revlog(opener, indexfile,
1468 # only root indexfile is cached
1472 # only root indexfile is cached
1469 checkambig=not bool(tree),
1473 checkambig=not bool(tree),
1470 mmaplargeindex=True)
1474 mmaplargeindex=True,
1475 upperboundcomp=MAXCOMPRESSION)
1471
1476
1472 self.index = self._revlog.index
1477 self.index = self._revlog.index
1473 self.version = self._revlog.version
1478 self.version = self._revlog.version
1474 self._generaldelta = self._revlog._generaldelta
1479 self._generaldelta = self._revlog._generaldelta
1475
1480
1476 def _setupmanifestcachehooks(self, repo):
1481 def _setupmanifestcachehooks(self, repo):
1477 """Persist the manifestfulltextcache on lock release"""
1482 """Persist the manifestfulltextcache on lock release"""
1478 if not util.safehasattr(repo, '_wlockref'):
1483 if not util.safehasattr(repo, '_wlockref'):
1479 return
1484 return
1480
1485
1481 self._fulltextcache._opener = repo.wcachevfs
1486 self._fulltextcache._opener = repo.wcachevfs
1482 if repo._currentlock(repo._wlockref) is None:
1487 if repo._currentlock(repo._wlockref) is None:
1483 return
1488 return
1484
1489
1485 reporef = weakref.ref(repo)
1490 reporef = weakref.ref(repo)
1486 manifestrevlogref = weakref.ref(self)
1491 manifestrevlogref = weakref.ref(self)
1487
1492
1488 def persistmanifestcache():
1493 def persistmanifestcache():
1489 repo = reporef()
1494 repo = reporef()
1490 self = manifestrevlogref()
1495 self = manifestrevlogref()
1491 if repo is None or self is None:
1496 if repo is None or self is None:
1492 return
1497 return
1493 if repo.manifestlog.getstorage(b'') is not self:
1498 if repo.manifestlog.getstorage(b'') is not self:
1494 # there's a different manifest in play now, abort
1499 # there's a different manifest in play now, abort
1495 return
1500 return
1496 self._fulltextcache.write()
1501 self._fulltextcache.write()
1497
1502
1498 repo._afterlock(persistmanifestcache)
1503 repo._afterlock(persistmanifestcache)
1499
1504
1500 @property
1505 @property
1501 def fulltextcache(self):
1506 def fulltextcache(self):
1502 return self._fulltextcache
1507 return self._fulltextcache
1503
1508
1504 def clearcaches(self, clear_persisted_data=False):
1509 def clearcaches(self, clear_persisted_data=False):
1505 self._revlog.clearcaches()
1510 self._revlog.clearcaches()
1506 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1511 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1507 self._dirlogcache = {self.tree: self}
1512 self._dirlogcache = {self.tree: self}
1508
1513
1509 def dirlog(self, d):
1514 def dirlog(self, d):
1510 if d:
1515 if d:
1511 assert self._treeondisk
1516 assert self._treeondisk
1512 if d not in self._dirlogcache:
1517 if d not in self._dirlogcache:
1513 mfrevlog = manifestrevlog(self.opener, d,
1518 mfrevlog = manifestrevlog(self.opener, d,
1514 self._dirlogcache,
1519 self._dirlogcache,
1515 treemanifest=self._treeondisk)
1520 treemanifest=self._treeondisk)
1516 self._dirlogcache[d] = mfrevlog
1521 self._dirlogcache[d] = mfrevlog
1517 return self._dirlogcache[d]
1522 return self._dirlogcache[d]
1518
1523
1519 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None,
1524 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None,
1520 match=None):
1525 match=None):
1521 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1526 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1522 # If our first parent is in the manifest cache, we can
1527 # If our first parent is in the manifest cache, we can
1523 # compute a delta here using properties we know about the
1528 # compute a delta here using properties we know about the
1524 # manifest up-front, which may save time later for the
1529 # manifest up-front, which may save time later for the
1525 # revlog layer.
1530 # revlog layer.
1526
1531
1527 _checkforbidden(added)
1532 _checkforbidden(added)
1528 # combine the changed lists into one sorted iterator
1533 # combine the changed lists into one sorted iterator
1529 work = heapq.merge([(x, False) for x in sorted(added)],
1534 work = heapq.merge([(x, False) for x in sorted(added)],
1530 [(x, True) for x in sorted(removed)])
1535 [(x, True) for x in sorted(removed)])
1531
1536
1532 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1537 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1533 cachedelta = self._revlog.rev(p1), deltatext
1538 cachedelta = self._revlog.rev(p1), deltatext
1534 text = util.buffer(arraytext)
1539 text = util.buffer(arraytext)
1535 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1540 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1536 cachedelta)
1541 cachedelta)
1537 else:
1542 else:
1538 # The first parent manifest isn't already loaded, so we'll
1543 # The first parent manifest isn't already loaded, so we'll
1539 # just encode a fulltext of the manifest and pass that
1544 # just encode a fulltext of the manifest and pass that
1540 # through to the revlog layer, and let it handle the delta
1545 # through to the revlog layer, and let it handle the delta
1541 # process.
1546 # process.
1542 if self._treeondisk:
1547 if self._treeondisk:
1543 assert readtree, "readtree must be set for treemanifest writes"
1548 assert readtree, "readtree must be set for treemanifest writes"
1544 assert match, "match must be specified for treemanifest writes"
1549 assert match, "match must be specified for treemanifest writes"
1545 m1 = readtree(self.tree, p1)
1550 m1 = readtree(self.tree, p1)
1546 m2 = readtree(self.tree, p2)
1551 m2 = readtree(self.tree, p2)
1547 n = self._addtree(m, transaction, link, m1, m2, readtree,
1552 n = self._addtree(m, transaction, link, m1, m2, readtree,
1548 match=match)
1553 match=match)
1549 arraytext = None
1554 arraytext = None
1550 else:
1555 else:
1551 text = m.text()
1556 text = m.text()
1552 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1557 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1553 arraytext = bytearray(text)
1558 arraytext = bytearray(text)
1554
1559
1555 if arraytext is not None:
1560 if arraytext is not None:
1556 self.fulltextcache[n] = arraytext
1561 self.fulltextcache[n] = arraytext
1557
1562
1558 return n
1563 return n
1559
1564
1560 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1565 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1561 # If the manifest is unchanged compared to one parent,
1566 # If the manifest is unchanged compared to one parent,
1562 # don't write a new revision
1567 # don't write a new revision
1563 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1568 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1564 m2)):
1569 m2)):
1565 return m.node()
1570 return m.node()
1566 def writesubtree(subm, subp1, subp2, match):
1571 def writesubtree(subm, subp1, subp2, match):
1567 sublog = self.dirlog(subm.dir())
1572 sublog = self.dirlog(subm.dir())
1568 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1573 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1569 readtree=readtree, match=match)
1574 readtree=readtree, match=match)
1570 m.writesubtrees(m1, m2, writesubtree, match)
1575 m.writesubtrees(m1, m2, writesubtree, match)
1571 text = m.dirtext()
1576 text = m.dirtext()
1572 n = None
1577 n = None
1573 if self.tree != '':
1578 if self.tree != '':
1574 # Double-check whether contents are unchanged to one parent
1579 # Double-check whether contents are unchanged to one parent
1575 if text == m1.dirtext():
1580 if text == m1.dirtext():
1576 n = m1.node()
1581 n = m1.node()
1577 elif text == m2.dirtext():
1582 elif text == m2.dirtext():
1578 n = m2.node()
1583 n = m2.node()
1579
1584
1580 if not n:
1585 if not n:
1581 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1586 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1582 m2.node())
1587 m2.node())
1583
1588
1584 # Save nodeid so parent manifest can calculate its nodeid
1589 # Save nodeid so parent manifest can calculate its nodeid
1585 m.setnode(n)
1590 m.setnode(n)
1586 return n
1591 return n
1587
1592
1588 def __len__(self):
1593 def __len__(self):
1589 return len(self._revlog)
1594 return len(self._revlog)
1590
1595
1591 def __iter__(self):
1596 def __iter__(self):
1592 return self._revlog.__iter__()
1597 return self._revlog.__iter__()
1593
1598
1594 def rev(self, node):
1599 def rev(self, node):
1595 return self._revlog.rev(node)
1600 return self._revlog.rev(node)
1596
1601
1597 def node(self, rev):
1602 def node(self, rev):
1598 return self._revlog.node(rev)
1603 return self._revlog.node(rev)
1599
1604
1600 def lookup(self, value):
1605 def lookup(self, value):
1601 return self._revlog.lookup(value)
1606 return self._revlog.lookup(value)
1602
1607
1603 def parentrevs(self, rev):
1608 def parentrevs(self, rev):
1604 return self._revlog.parentrevs(rev)
1609 return self._revlog.parentrevs(rev)
1605
1610
1606 def parents(self, node):
1611 def parents(self, node):
1607 return self._revlog.parents(node)
1612 return self._revlog.parents(node)
1608
1613
1609 def linkrev(self, rev):
1614 def linkrev(self, rev):
1610 return self._revlog.linkrev(rev)
1615 return self._revlog.linkrev(rev)
1611
1616
1612 def checksize(self):
1617 def checksize(self):
1613 return self._revlog.checksize()
1618 return self._revlog.checksize()
1614
1619
1615 def revision(self, node, _df=None, raw=False):
1620 def revision(self, node, _df=None, raw=False):
1616 return self._revlog.revision(node, _df=_df, raw=raw)
1621 return self._revlog.revision(node, _df=_df, raw=raw)
1617
1622
1618 def revdiff(self, rev1, rev2):
1623 def revdiff(self, rev1, rev2):
1619 return self._revlog.revdiff(rev1, rev2)
1624 return self._revlog.revdiff(rev1, rev2)
1620
1625
1621 def cmp(self, node, text):
1626 def cmp(self, node, text):
1622 return self._revlog.cmp(node, text)
1627 return self._revlog.cmp(node, text)
1623
1628
1624 def deltaparent(self, rev):
1629 def deltaparent(self, rev):
1625 return self._revlog.deltaparent(rev)
1630 return self._revlog.deltaparent(rev)
1626
1631
1627 def emitrevisions(self, nodes, nodesorder=None,
1632 def emitrevisions(self, nodes, nodesorder=None,
1628 revisiondata=False, assumehaveparentrevisions=False,
1633 revisiondata=False, assumehaveparentrevisions=False,
1629 deltamode=repository.CG_DELTAMODE_STD):
1634 deltamode=repository.CG_DELTAMODE_STD):
1630 return self._revlog.emitrevisions(
1635 return self._revlog.emitrevisions(
1631 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
1636 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
1632 assumehaveparentrevisions=assumehaveparentrevisions,
1637 assumehaveparentrevisions=assumehaveparentrevisions,
1633 deltamode=deltamode)
1638 deltamode=deltamode)
1634
1639
1635 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1640 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1636 return self._revlog.addgroup(deltas, linkmapper, transaction,
1641 return self._revlog.addgroup(deltas, linkmapper, transaction,
1637 addrevisioncb=addrevisioncb)
1642 addrevisioncb=addrevisioncb)
1638
1643
1639 def rawsize(self, rev):
1644 def rawsize(self, rev):
1640 return self._revlog.rawsize(rev)
1645 return self._revlog.rawsize(rev)
1641
1646
1642 def getstrippoint(self, minlink):
1647 def getstrippoint(self, minlink):
1643 return self._revlog.getstrippoint(minlink)
1648 return self._revlog.getstrippoint(minlink)
1644
1649
1645 def strip(self, minlink, transaction):
1650 def strip(self, minlink, transaction):
1646 return self._revlog.strip(minlink, transaction)
1651 return self._revlog.strip(minlink, transaction)
1647
1652
1648 def files(self):
1653 def files(self):
1649 return self._revlog.files()
1654 return self._revlog.files()
1650
1655
1651 def clone(self, tr, destrevlog, **kwargs):
1656 def clone(self, tr, destrevlog, **kwargs):
1652 if not isinstance(destrevlog, manifestrevlog):
1657 if not isinstance(destrevlog, manifestrevlog):
1653 raise error.ProgrammingError('expected manifestrevlog to clone()')
1658 raise error.ProgrammingError('expected manifestrevlog to clone()')
1654
1659
1655 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1660 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1656
1661
1657 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
1662 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
1658 revisionscount=False, trackedsize=False,
1663 revisionscount=False, trackedsize=False,
1659 storedsize=False):
1664 storedsize=False):
1660 return self._revlog.storageinfo(
1665 return self._revlog.storageinfo(
1661 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
1666 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
1662 revisionscount=revisionscount, trackedsize=trackedsize,
1667 revisionscount=revisionscount, trackedsize=trackedsize,
1663 storedsize=storedsize)
1668 storedsize=storedsize)
1664
1669
1665 @property
1670 @property
1666 def indexfile(self):
1671 def indexfile(self):
1667 return self._revlog.indexfile
1672 return self._revlog.indexfile
1668
1673
1669 @indexfile.setter
1674 @indexfile.setter
1670 def indexfile(self, value):
1675 def indexfile(self, value):
1671 self._revlog.indexfile = value
1676 self._revlog.indexfile = value
1672
1677
1673 @property
1678 @property
1674 def opener(self):
1679 def opener(self):
1675 return self._revlog.opener
1680 return self._revlog.opener
1676
1681
1677 @opener.setter
1682 @opener.setter
1678 def opener(self, value):
1683 def opener(self, value):
1679 self._revlog.opener = value
1684 self._revlog.opener = value
1680
1685
1681 @interfaceutil.implementer(repository.imanifestlog)
1686 @interfaceutil.implementer(repository.imanifestlog)
1682 class manifestlog(object):
1687 class manifestlog(object):
1683 """A collection class representing the collection of manifest snapshots
1688 """A collection class representing the collection of manifest snapshots
1684 referenced by commits in the repository.
1689 referenced by commits in the repository.
1685
1690
1686 In this situation, 'manifest' refers to the abstract concept of a snapshot
1691 In this situation, 'manifest' refers to the abstract concept of a snapshot
1687 of the list of files in the given commit. Consumers of the output of this
1692 of the list of files in the given commit. Consumers of the output of this
1688 class do not care about the implementation details of the actual manifests
1693 class do not care about the implementation details of the actual manifests
1689 they receive (i.e. tree or flat or lazily loaded, etc)."""
1694 they receive (i.e. tree or flat or lazily loaded, etc)."""
1690 def __init__(self, opener, repo, rootstore, narrowmatch):
1695 def __init__(self, opener, repo, rootstore, narrowmatch):
1691 usetreemanifest = False
1696 usetreemanifest = False
1692 cachesize = 4
1697 cachesize = 4
1693
1698
1694 opts = getattr(opener, 'options', None)
1699 opts = getattr(opener, 'options', None)
1695 if opts is not None:
1700 if opts is not None:
1696 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1701 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1697 cachesize = opts.get('manifestcachesize', cachesize)
1702 cachesize = opts.get('manifestcachesize', cachesize)
1698
1703
1699 self._treemanifests = usetreemanifest
1704 self._treemanifests = usetreemanifest
1700
1705
1701 self._rootstore = rootstore
1706 self._rootstore = rootstore
1702 self._rootstore._setupmanifestcachehooks(repo)
1707 self._rootstore._setupmanifestcachehooks(repo)
1703 self._narrowmatch = narrowmatch
1708 self._narrowmatch = narrowmatch
1704
1709
1705 # A cache of the manifestctx or treemanifestctx for each directory
1710 # A cache of the manifestctx or treemanifestctx for each directory
1706 self._dirmancache = {}
1711 self._dirmancache = {}
1707 self._dirmancache[''] = util.lrucachedict(cachesize)
1712 self._dirmancache[''] = util.lrucachedict(cachesize)
1708
1713
1709 self._cachesize = cachesize
1714 self._cachesize = cachesize
1710
1715
1711 def __getitem__(self, node):
1716 def __getitem__(self, node):
1712 """Retrieves the manifest instance for the given node. Throws a
1717 """Retrieves the manifest instance for the given node. Throws a
1713 LookupError if not found.
1718 LookupError if not found.
1714 """
1719 """
1715 return self.get('', node)
1720 return self.get('', node)
1716
1721
1717 def get(self, tree, node, verify=True):
1722 def get(self, tree, node, verify=True):
1718 """Retrieves the manifest instance for the given node. Throws a
1723 """Retrieves the manifest instance for the given node. Throws a
1719 LookupError if not found.
1724 LookupError if not found.
1720
1725
1721 `verify` - if True an exception will be thrown if the node is not in
1726 `verify` - if True an exception will be thrown if the node is not in
1722 the revlog
1727 the revlog
1723 """
1728 """
1724 if node in self._dirmancache.get(tree, ()):
1729 if node in self._dirmancache.get(tree, ()):
1725 return self._dirmancache[tree][node]
1730 return self._dirmancache[tree][node]
1726
1731
1727 if not self._narrowmatch.always():
1732 if not self._narrowmatch.always():
1728 if not self._narrowmatch.visitdir(tree[:-1]):
1733 if not self._narrowmatch.visitdir(tree[:-1]):
1729 return excludeddirmanifestctx(tree, node)
1734 return excludeddirmanifestctx(tree, node)
1730 if tree:
1735 if tree:
1731 if self._rootstore._treeondisk:
1736 if self._rootstore._treeondisk:
1732 if verify:
1737 if verify:
1733 # Side-effect is LookupError is raised if node doesn't
1738 # Side-effect is LookupError is raised if node doesn't
1734 # exist.
1739 # exist.
1735 self.getstorage(tree).rev(node)
1740 self.getstorage(tree).rev(node)
1736
1741
1737 m = treemanifestctx(self, tree, node)
1742 m = treemanifestctx(self, tree, node)
1738 else:
1743 else:
1739 raise error.Abort(
1744 raise error.Abort(
1740 _("cannot ask for manifest directory '%s' in a flat "
1745 _("cannot ask for manifest directory '%s' in a flat "
1741 "manifest") % tree)
1746 "manifest") % tree)
1742 else:
1747 else:
1743 if verify:
1748 if verify:
1744 # Side-effect is LookupError is raised if node doesn't exist.
1749 # Side-effect is LookupError is raised if node doesn't exist.
1745 self._rootstore.rev(node)
1750 self._rootstore.rev(node)
1746
1751
1747 if self._treemanifests:
1752 if self._treemanifests:
1748 m = treemanifestctx(self, '', node)
1753 m = treemanifestctx(self, '', node)
1749 else:
1754 else:
1750 m = manifestctx(self, node)
1755 m = manifestctx(self, node)
1751
1756
1752 if node != nullid:
1757 if node != nullid:
1753 mancache = self._dirmancache.get(tree)
1758 mancache = self._dirmancache.get(tree)
1754 if not mancache:
1759 if not mancache:
1755 mancache = util.lrucachedict(self._cachesize)
1760 mancache = util.lrucachedict(self._cachesize)
1756 self._dirmancache[tree] = mancache
1761 self._dirmancache[tree] = mancache
1757 mancache[node] = m
1762 mancache[node] = m
1758 return m
1763 return m
1759
1764
1760 def getstorage(self, tree):
1765 def getstorage(self, tree):
1761 return self._rootstore.dirlog(tree)
1766 return self._rootstore.dirlog(tree)
1762
1767
1763 def clearcaches(self, clear_persisted_data=False):
1768 def clearcaches(self, clear_persisted_data=False):
1764 self._dirmancache.clear()
1769 self._dirmancache.clear()
1765 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1770 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1766
1771
1767 def rev(self, node):
1772 def rev(self, node):
1768 return self._rootstore.rev(node)
1773 return self._rootstore.rev(node)
1769
1774
1770 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1775 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1771 class memmanifestctx(object):
1776 class memmanifestctx(object):
1772 def __init__(self, manifestlog):
1777 def __init__(self, manifestlog):
1773 self._manifestlog = manifestlog
1778 self._manifestlog = manifestlog
1774 self._manifestdict = manifestdict()
1779 self._manifestdict = manifestdict()
1775
1780
1776 def _storage(self):
1781 def _storage(self):
1777 return self._manifestlog.getstorage(b'')
1782 return self._manifestlog.getstorage(b'')
1778
1783
1779 def new(self):
1784 def new(self):
1780 return memmanifestctx(self._manifestlog)
1785 return memmanifestctx(self._manifestlog)
1781
1786
1782 def copy(self):
1787 def copy(self):
1783 memmf = memmanifestctx(self._manifestlog)
1788 memmf = memmanifestctx(self._manifestlog)
1784 memmf._manifestdict = self.read().copy()
1789 memmf._manifestdict = self.read().copy()
1785 return memmf
1790 return memmf
1786
1791
1787 def read(self):
1792 def read(self):
1788 return self._manifestdict
1793 return self._manifestdict
1789
1794
1790 def write(self, transaction, link, p1, p2, added, removed, match=None):
1795 def write(self, transaction, link, p1, p2, added, removed, match=None):
1791 return self._storage().add(self._manifestdict, transaction, link,
1796 return self._storage().add(self._manifestdict, transaction, link,
1792 p1, p2, added, removed, match=match)
1797 p1, p2, added, removed, match=match)
1793
1798
1794 @interfaceutil.implementer(repository.imanifestrevisionstored)
1799 @interfaceutil.implementer(repository.imanifestrevisionstored)
1795 class manifestctx(object):
1800 class manifestctx(object):
1796 """A class representing a single revision of a manifest, including its
1801 """A class representing a single revision of a manifest, including its
1797 contents, its parent revs, and its linkrev.
1802 contents, its parent revs, and its linkrev.
1798 """
1803 """
1799 def __init__(self, manifestlog, node):
1804 def __init__(self, manifestlog, node):
1800 self._manifestlog = manifestlog
1805 self._manifestlog = manifestlog
1801 self._data = None
1806 self._data = None
1802
1807
1803 self._node = node
1808 self._node = node
1804
1809
1805 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1810 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1806 # but let's add it later when something needs it and we can load it
1811 # but let's add it later when something needs it and we can load it
1807 # lazily.
1812 # lazily.
1808 #self.p1, self.p2 = store.parents(node)
1813 #self.p1, self.p2 = store.parents(node)
1809 #rev = store.rev(node)
1814 #rev = store.rev(node)
1810 #self.linkrev = store.linkrev(rev)
1815 #self.linkrev = store.linkrev(rev)
1811
1816
1812 def _storage(self):
1817 def _storage(self):
1813 return self._manifestlog.getstorage(b'')
1818 return self._manifestlog.getstorage(b'')
1814
1819
1815 def node(self):
1820 def node(self):
1816 return self._node
1821 return self._node
1817
1822
1818 def new(self):
1823 def new(self):
1819 return memmanifestctx(self._manifestlog)
1824 return memmanifestctx(self._manifestlog)
1820
1825
1821 def copy(self):
1826 def copy(self):
1822 memmf = memmanifestctx(self._manifestlog)
1827 memmf = memmanifestctx(self._manifestlog)
1823 memmf._manifestdict = self.read().copy()
1828 memmf._manifestdict = self.read().copy()
1824 return memmf
1829 return memmf
1825
1830
1826 @propertycache
1831 @propertycache
1827 def parents(self):
1832 def parents(self):
1828 return self._storage().parents(self._node)
1833 return self._storage().parents(self._node)
1829
1834
1830 def read(self):
1835 def read(self):
1831 if self._data is None:
1836 if self._data is None:
1832 if self._node == nullid:
1837 if self._node == nullid:
1833 self._data = manifestdict()
1838 self._data = manifestdict()
1834 else:
1839 else:
1835 store = self._storage()
1840 store = self._storage()
1836 if self._node in store.fulltextcache:
1841 if self._node in store.fulltextcache:
1837 text = pycompat.bytestr(store.fulltextcache[self._node])
1842 text = pycompat.bytestr(store.fulltextcache[self._node])
1838 else:
1843 else:
1839 text = store.revision(self._node)
1844 text = store.revision(self._node)
1840 arraytext = bytearray(text)
1845 arraytext = bytearray(text)
1841 store.fulltextcache[self._node] = arraytext
1846 store.fulltextcache[self._node] = arraytext
1842 self._data = manifestdict(text)
1847 self._data = manifestdict(text)
1843 return self._data
1848 return self._data
1844
1849
1845 def readfast(self, shallow=False):
1850 def readfast(self, shallow=False):
1846 '''Calls either readdelta or read, based on which would be less work.
1851 '''Calls either readdelta or read, based on which would be less work.
1847 readdelta is called if the delta is against the p1, and therefore can be
1852 readdelta is called if the delta is against the p1, and therefore can be
1848 read quickly.
1853 read quickly.
1849
1854
1850 If `shallow` is True, nothing changes since this is a flat manifest.
1855 If `shallow` is True, nothing changes since this is a flat manifest.
1851 '''
1856 '''
1852 store = self._storage()
1857 store = self._storage()
1853 r = store.rev(self._node)
1858 r = store.rev(self._node)
1854 deltaparent = store.deltaparent(r)
1859 deltaparent = store.deltaparent(r)
1855 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
1860 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
1856 return self.readdelta()
1861 return self.readdelta()
1857 return self.read()
1862 return self.read()
1858
1863
1859 def readdelta(self, shallow=False):
1864 def readdelta(self, shallow=False):
1860 '''Returns a manifest containing just the entries that are present
1865 '''Returns a manifest containing just the entries that are present
1861 in this manifest, but not in its p1 manifest. This is efficient to read
1866 in this manifest, but not in its p1 manifest. This is efficient to read
1862 if the revlog delta is already p1.
1867 if the revlog delta is already p1.
1863
1868
1864 Changing the value of `shallow` has no effect on flat manifests.
1869 Changing the value of `shallow` has no effect on flat manifests.
1865 '''
1870 '''
1866 store = self._storage()
1871 store = self._storage()
1867 r = store.rev(self._node)
1872 r = store.rev(self._node)
1868 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1873 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1869 return manifestdict(d)
1874 return manifestdict(d)
1870
1875
1871 def find(self, key):
1876 def find(self, key):
1872 return self.read().find(key)
1877 return self.read().find(key)
1873
1878
1874 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1879 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1875 class memtreemanifestctx(object):
1880 class memtreemanifestctx(object):
1876 def __init__(self, manifestlog, dir=''):
1881 def __init__(self, manifestlog, dir=''):
1877 self._manifestlog = manifestlog
1882 self._manifestlog = manifestlog
1878 self._dir = dir
1883 self._dir = dir
1879 self._treemanifest = treemanifest()
1884 self._treemanifest = treemanifest()
1880
1885
1881 def _storage(self):
1886 def _storage(self):
1882 return self._manifestlog.getstorage(b'')
1887 return self._manifestlog.getstorage(b'')
1883
1888
1884 def new(self, dir=''):
1889 def new(self, dir=''):
1885 return memtreemanifestctx(self._manifestlog, dir=dir)
1890 return memtreemanifestctx(self._manifestlog, dir=dir)
1886
1891
1887 def copy(self):
1892 def copy(self):
1888 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1893 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1889 memmf._treemanifest = self._treemanifest.copy()
1894 memmf._treemanifest = self._treemanifest.copy()
1890 return memmf
1895 return memmf
1891
1896
1892 def read(self):
1897 def read(self):
1893 return self._treemanifest
1898 return self._treemanifest
1894
1899
1895 def write(self, transaction, link, p1, p2, added, removed, match=None):
1900 def write(self, transaction, link, p1, p2, added, removed, match=None):
1896 def readtree(dir, node):
1901 def readtree(dir, node):
1897 return self._manifestlog.get(dir, node).read()
1902 return self._manifestlog.get(dir, node).read()
1898 return self._storage().add(self._treemanifest, transaction, link,
1903 return self._storage().add(self._treemanifest, transaction, link,
1899 p1, p2, added, removed, readtree=readtree,
1904 p1, p2, added, removed, readtree=readtree,
1900 match=match)
1905 match=match)
1901
1906
1902 @interfaceutil.implementer(repository.imanifestrevisionstored)
1907 @interfaceutil.implementer(repository.imanifestrevisionstored)
1903 class treemanifestctx(object):
1908 class treemanifestctx(object):
1904 def __init__(self, manifestlog, dir, node):
1909 def __init__(self, manifestlog, dir, node):
1905 self._manifestlog = manifestlog
1910 self._manifestlog = manifestlog
1906 self._dir = dir
1911 self._dir = dir
1907 self._data = None
1912 self._data = None
1908
1913
1909 self._node = node
1914 self._node = node
1910
1915
1911 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1916 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1912 # we can instantiate treemanifestctx objects for directories we don't
1917 # we can instantiate treemanifestctx objects for directories we don't
1913 # have on disk.
1918 # have on disk.
1914 #self.p1, self.p2 = store.parents(node)
1919 #self.p1, self.p2 = store.parents(node)
1915 #rev = store.rev(node)
1920 #rev = store.rev(node)
1916 #self.linkrev = store.linkrev(rev)
1921 #self.linkrev = store.linkrev(rev)
1917
1922
1918 def _storage(self):
1923 def _storage(self):
1919 narrowmatch = self._manifestlog._narrowmatch
1924 narrowmatch = self._manifestlog._narrowmatch
1920 if not narrowmatch.always():
1925 if not narrowmatch.always():
1921 if not narrowmatch.visitdir(self._dir[:-1]):
1926 if not narrowmatch.visitdir(self._dir[:-1]):
1922 return excludedmanifestrevlog(self._dir)
1927 return excludedmanifestrevlog(self._dir)
1923 return self._manifestlog.getstorage(self._dir)
1928 return self._manifestlog.getstorage(self._dir)
1924
1929
1925 def read(self):
1930 def read(self):
1926 if self._data is None:
1931 if self._data is None:
1927 store = self._storage()
1932 store = self._storage()
1928 if self._node == nullid:
1933 if self._node == nullid:
1929 self._data = treemanifest()
1934 self._data = treemanifest()
1930 # TODO accessing non-public API
1935 # TODO accessing non-public API
1931 elif store._treeondisk:
1936 elif store._treeondisk:
1932 m = treemanifest(dir=self._dir)
1937 m = treemanifest(dir=self._dir)
1933 def gettext():
1938 def gettext():
1934 return store.revision(self._node)
1939 return store.revision(self._node)
1935 def readsubtree(dir, subm):
1940 def readsubtree(dir, subm):
1936 # Set verify to False since we need to be able to create
1941 # Set verify to False since we need to be able to create
1937 # subtrees for trees that don't exist on disk.
1942 # subtrees for trees that don't exist on disk.
1938 return self._manifestlog.get(dir, subm, verify=False).read()
1943 return self._manifestlog.get(dir, subm, verify=False).read()
1939 m.read(gettext, readsubtree)
1944 m.read(gettext, readsubtree)
1940 m.setnode(self._node)
1945 m.setnode(self._node)
1941 self._data = m
1946 self._data = m
1942 else:
1947 else:
1943 if self._node in store.fulltextcache:
1948 if self._node in store.fulltextcache:
1944 text = pycompat.bytestr(store.fulltextcache[self._node])
1949 text = pycompat.bytestr(store.fulltextcache[self._node])
1945 else:
1950 else:
1946 text = store.revision(self._node)
1951 text = store.revision(self._node)
1947 arraytext = bytearray(text)
1952 arraytext = bytearray(text)
1948 store.fulltextcache[self._node] = arraytext
1953 store.fulltextcache[self._node] = arraytext
1949 self._data = treemanifest(dir=self._dir, text=text)
1954 self._data = treemanifest(dir=self._dir, text=text)
1950
1955
1951 return self._data
1956 return self._data
1952
1957
1953 def node(self):
1958 def node(self):
1954 return self._node
1959 return self._node
1955
1960
1956 def new(self, dir=''):
1961 def new(self, dir=''):
1957 return memtreemanifestctx(self._manifestlog, dir=dir)
1962 return memtreemanifestctx(self._manifestlog, dir=dir)
1958
1963
1959 def copy(self):
1964 def copy(self):
1960 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1965 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1961 memmf._treemanifest = self.read().copy()
1966 memmf._treemanifest = self.read().copy()
1962 return memmf
1967 return memmf
1963
1968
1964 @propertycache
1969 @propertycache
1965 def parents(self):
1970 def parents(self):
1966 return self._storage().parents(self._node)
1971 return self._storage().parents(self._node)
1967
1972
1968 def readdelta(self, shallow=False):
1973 def readdelta(self, shallow=False):
1969 '''Returns a manifest containing just the entries that are present
1974 '''Returns a manifest containing just the entries that are present
1970 in this manifest, but not in its p1 manifest. This is efficient to read
1975 in this manifest, but not in its p1 manifest. This is efficient to read
1971 if the revlog delta is already p1.
1976 if the revlog delta is already p1.
1972
1977
1973 If `shallow` is True, this will read the delta for this directory,
1978 If `shallow` is True, this will read the delta for this directory,
1974 without recursively reading subdirectory manifests. Instead, any
1979 without recursively reading subdirectory manifests. Instead, any
1975 subdirectory entry will be reported as it appears in the manifest, i.e.
1980 subdirectory entry will be reported as it appears in the manifest, i.e.
1976 the subdirectory will be reported among files and distinguished only by
1981 the subdirectory will be reported among files and distinguished only by
1977 its 't' flag.
1982 its 't' flag.
1978 '''
1983 '''
1979 store = self._storage()
1984 store = self._storage()
1980 if shallow:
1985 if shallow:
1981 r = store.rev(self._node)
1986 r = store.rev(self._node)
1982 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1987 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1983 return manifestdict(d)
1988 return manifestdict(d)
1984 else:
1989 else:
1985 # Need to perform a slow delta
1990 # Need to perform a slow delta
1986 r0 = store.deltaparent(store.rev(self._node))
1991 r0 = store.deltaparent(store.rev(self._node))
1987 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
1992 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
1988 m1 = self.read()
1993 m1 = self.read()
1989 md = treemanifest(dir=self._dir)
1994 md = treemanifest(dir=self._dir)
1990 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1995 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1991 if n1:
1996 if n1:
1992 md[f] = n1
1997 md[f] = n1
1993 if fl1:
1998 if fl1:
1994 md.setflag(f, fl1)
1999 md.setflag(f, fl1)
1995 return md
2000 return md
1996
2001
1997 def readfast(self, shallow=False):
2002 def readfast(self, shallow=False):
1998 '''Calls either readdelta or read, based on which would be less work.
2003 '''Calls either readdelta or read, based on which would be less work.
1999 readdelta is called if the delta is against the p1, and therefore can be
2004 readdelta is called if the delta is against the p1, and therefore can be
2000 read quickly.
2005 read quickly.
2001
2006
2002 If `shallow` is True, it only returns the entries from this manifest,
2007 If `shallow` is True, it only returns the entries from this manifest,
2003 and not any submanifests.
2008 and not any submanifests.
2004 '''
2009 '''
2005 store = self._storage()
2010 store = self._storage()
2006 r = store.rev(self._node)
2011 r = store.rev(self._node)
2007 deltaparent = store.deltaparent(r)
2012 deltaparent = store.deltaparent(r)
2008 if (deltaparent != nullrev and
2013 if (deltaparent != nullrev and
2009 deltaparent in store.parentrevs(r)):
2014 deltaparent in store.parentrevs(r)):
2010 return self.readdelta(shallow=shallow)
2015 return self.readdelta(shallow=shallow)
2011
2016
2012 if shallow:
2017 if shallow:
2013 return manifestdict(store.revision(self._node))
2018 return manifestdict(store.revision(self._node))
2014 else:
2019 else:
2015 return self.read()
2020 return self.read()
2016
2021
2017 def find(self, key):
2022 def find(self, key):
2018 return self.read().find(key)
2023 return self.read().find(key)
2019
2024
2020 class excludeddir(treemanifest):
2025 class excludeddir(treemanifest):
2021 """Stand-in for a directory that is excluded from the repository.
2026 """Stand-in for a directory that is excluded from the repository.
2022
2027
2023 With narrowing active on a repository that uses treemanifests,
2028 With narrowing active on a repository that uses treemanifests,
2024 some of the directory revlogs will be excluded from the resulting
2029 some of the directory revlogs will be excluded from the resulting
2025 clone. This is a huge storage win for clients, but means we need
2030 clone. This is a huge storage win for clients, but means we need
2026 some sort of pseudo-manifest to surface to internals so we can
2031 some sort of pseudo-manifest to surface to internals so we can
2027 detect a merge conflict outside the narrowspec. That's what this
2032 detect a merge conflict outside the narrowspec. That's what this
2028 class is: it stands in for a directory whose node is known, but
2033 class is: it stands in for a directory whose node is known, but
2029 whose contents are unknown.
2034 whose contents are unknown.
2030 """
2035 """
2031 def __init__(self, dir, node):
2036 def __init__(self, dir, node):
2032 super(excludeddir, self).__init__(dir)
2037 super(excludeddir, self).__init__(dir)
2033 self._node = node
2038 self._node = node
2034 # Add an empty file, which will be included by iterators and such,
2039 # Add an empty file, which will be included by iterators and such,
2035 # appearing as the directory itself (i.e. something like "dir/")
2040 # appearing as the directory itself (i.e. something like "dir/")
2036 self._files[''] = node
2041 self._files[''] = node
2037 self._flags[''] = 't'
2042 self._flags[''] = 't'
2038
2043
2039 # Manifests outside the narrowspec should never be modified, so avoid
2044 # Manifests outside the narrowspec should never be modified, so avoid
2040 # copying. This makes a noticeable difference when there are very many
2045 # copying. This makes a noticeable difference when there are very many
2041 # directories outside the narrowspec. Also, it makes sense for the copy to
2046 # directories outside the narrowspec. Also, it makes sense for the copy to
2042 # be of the same type as the original, which would not happen with the
2047 # be of the same type as the original, which would not happen with the
2043 # super type's copy().
2048 # super type's copy().
2044 def copy(self):
2049 def copy(self):
2045 return self
2050 return self
2046
2051
2047 class excludeddirmanifestctx(treemanifestctx):
2052 class excludeddirmanifestctx(treemanifestctx):
2048 """context wrapper for excludeddir - see that docstring for rationale"""
2053 """context wrapper for excludeddir - see that docstring for rationale"""
2049 def __init__(self, dir, node):
2054 def __init__(self, dir, node):
2050 self._dir = dir
2055 self._dir = dir
2051 self._node = node
2056 self._node = node
2052
2057
2053 def read(self):
2058 def read(self):
2054 return excludeddir(self._dir, self._node)
2059 return excludeddir(self._dir, self._node)
2055
2060
2056 def write(self, *args):
2061 def write(self, *args):
2057 raise error.ProgrammingError(
2062 raise error.ProgrammingError(
2058 'attempt to write manifest from excluded dir %s' % self._dir)
2063 'attempt to write manifest from excluded dir %s' % self._dir)
2059
2064
2060 class excludedmanifestrevlog(manifestrevlog):
2065 class excludedmanifestrevlog(manifestrevlog):
2061 """Stand-in for excluded treemanifest revlogs.
2066 """Stand-in for excluded treemanifest revlogs.
2062
2067
2063 When narrowing is active on a treemanifest repository, we'll have
2068 When narrowing is active on a treemanifest repository, we'll have
2064 references to directories we can't see due to the revlog being
2069 references to directories we can't see due to the revlog being
2065 skipped. This class exists to conform to the manifestrevlog
2070 skipped. This class exists to conform to the manifestrevlog
2066 interface for those directories and proactively prevent writes to
2071 interface for those directories and proactively prevent writes to
2067 outside the narrowspec.
2072 outside the narrowspec.
2068 """
2073 """
2069
2074
2070 def __init__(self, dir):
2075 def __init__(self, dir):
2071 self._dir = dir
2076 self._dir = dir
2072
2077
2073 def __len__(self):
2078 def __len__(self):
2074 raise error.ProgrammingError(
2079 raise error.ProgrammingError(
2075 'attempt to get length of excluded dir %s' % self._dir)
2080 'attempt to get length of excluded dir %s' % self._dir)
2076
2081
2077 def rev(self, node):
2082 def rev(self, node):
2078 raise error.ProgrammingError(
2083 raise error.ProgrammingError(
2079 'attempt to get rev from excluded dir %s' % self._dir)
2084 'attempt to get rev from excluded dir %s' % self._dir)
2080
2085
2081 def linkrev(self, node):
2086 def linkrev(self, node):
2082 raise error.ProgrammingError(
2087 raise error.ProgrammingError(
2083 'attempt to get linkrev from excluded dir %s' % self._dir)
2088 'attempt to get linkrev from excluded dir %s' % self._dir)
2084
2089
2085 def node(self, rev):
2090 def node(self, rev):
2086 raise error.ProgrammingError(
2091 raise error.ProgrammingError(
2087 'attempt to get node from excluded dir %s' % self._dir)
2092 'attempt to get node from excluded dir %s' % self._dir)
2088
2093
2089 def add(self, *args, **kwargs):
2094 def add(self, *args, **kwargs):
2090 # We should never write entries in dirlogs outside the narrow clone.
2095 # We should never write entries in dirlogs outside the narrow clone.
2091 # However, the method still gets called from writesubtree() in
2096 # However, the method still gets called from writesubtree() in
2092 # _addtree(), so we need to handle it. We should possibly make that
2097 # _addtree(), so we need to handle it. We should possibly make that
2093 # avoid calling add() with a clean manifest (_dirty is always False
2098 # avoid calling add() with a clean manifest (_dirty is always False
2094 # in excludeddir instances).
2099 # in excludeddir instances).
2095 pass
2100 pass
@@ -1,2684 +1,2690 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 rustancestor = policy.importrust(r'ancestor')
100 rustancestor = policy.importrust(r'ancestor')
101 rustdagop = policy.importrust(r'dagop')
101 rustdagop = policy.importrust(r'dagop')
102
102
103 # Aliased for performance.
103 # Aliased for performance.
104 _zlibdecompress = zlib.decompress
104 _zlibdecompress = zlib.decompress
105
105
106 # max size of revlog with inline data
106 # max size of revlog with inline data
107 _maxinline = 131072
107 _maxinline = 131072
108 _chunksize = 1048576
108 _chunksize = 1048576
109
109
110 # Store flag processors (cf. 'addflagprocessor()' to register)
110 # Store flag processors (cf. 'addflagprocessor()' to register)
111 _flagprocessors = {
111 _flagprocessors = {
112 REVIDX_ISCENSORED: None,
112 REVIDX_ISCENSORED: None,
113 }
113 }
114
114
115 # Flag processors for REVIDX_ELLIPSIS.
115 # Flag processors for REVIDX_ELLIPSIS.
116 def ellipsisreadprocessor(rl, text):
116 def ellipsisreadprocessor(rl, text):
117 return text, False
117 return text, False
118
118
119 def ellipsiswriteprocessor(rl, text):
119 def ellipsiswriteprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsisrawprocessor(rl, text):
122 def ellipsisrawprocessor(rl, text):
123 return False
123 return False
124
124
125 ellipsisprocessor = (
125 ellipsisprocessor = (
126 ellipsisreadprocessor,
126 ellipsisreadprocessor,
127 ellipsiswriteprocessor,
127 ellipsiswriteprocessor,
128 ellipsisrawprocessor,
128 ellipsisrawprocessor,
129 )
129 )
130
130
131 def addflagprocessor(flag, processor):
131 def addflagprocessor(flag, processor):
132 """Register a flag processor on a revision data flag.
132 """Register a flag processor on a revision data flag.
133
133
134 Invariant:
134 Invariant:
135 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
135 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
136 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
136 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
137 - Only one flag processor can be registered on a specific flag.
137 - Only one flag processor can be registered on a specific flag.
138 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
138 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
139 following signatures:
139 following signatures:
140 - (read) f(self, rawtext) -> text, bool
140 - (read) f(self, rawtext) -> text, bool
141 - (write) f(self, text) -> rawtext, bool
141 - (write) f(self, text) -> rawtext, bool
142 - (raw) f(self, rawtext) -> bool
142 - (raw) f(self, rawtext) -> bool
143 "text" is presented to the user. "rawtext" is stored in revlog data, not
143 "text" is presented to the user. "rawtext" is stored in revlog data, not
144 directly visible to the user.
144 directly visible to the user.
145 The boolean returned by these transforms is used to determine whether
145 The boolean returned by these transforms is used to determine whether
146 the returned text can be used for hash integrity checking. For example,
146 the returned text can be used for hash integrity checking. For example,
147 if "write" returns False, then "text" is used to generate hash. If
147 if "write" returns False, then "text" is used to generate hash. If
148 "write" returns True, that basically means "rawtext" returned by "write"
148 "write" returns True, that basically means "rawtext" returned by "write"
149 should be used to generate hash. Usually, "write" and "read" return
149 should be used to generate hash. Usually, "write" and "read" return
150 different booleans. And "raw" returns a same boolean as "write".
150 different booleans. And "raw" returns a same boolean as "write".
151
151
152 Note: The 'raw' transform is used for changegroup generation and in some
152 Note: The 'raw' transform is used for changegroup generation and in some
153 debug commands. In this case the transform only indicates whether the
153 debug commands. In this case the transform only indicates whether the
154 contents can be used for hash integrity checks.
154 contents can be used for hash integrity checks.
155 """
155 """
156 _insertflagprocessor(flag, processor, _flagprocessors)
156 _insertflagprocessor(flag, processor, _flagprocessors)
157
157
158 def _insertflagprocessor(flag, processor, flagprocessors):
158 def _insertflagprocessor(flag, processor, flagprocessors):
159 if not flag & REVIDX_KNOWN_FLAGS:
159 if not flag & REVIDX_KNOWN_FLAGS:
160 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
160 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
161 raise error.ProgrammingError(msg)
161 raise error.ProgrammingError(msg)
162 if flag not in REVIDX_FLAGS_ORDER:
162 if flag not in REVIDX_FLAGS_ORDER:
163 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
163 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag in flagprocessors:
165 if flag in flagprocessors:
166 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
166 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
167 raise error.Abort(msg)
167 raise error.Abort(msg)
168 flagprocessors[flag] = processor
168 flagprocessors[flag] = processor
169
169
170 def getoffset(q):
170 def getoffset(q):
171 return int(q >> 16)
171 return int(q >> 16)
172
172
173 def gettype(q):
173 def gettype(q):
174 return int(q & 0xFFFF)
174 return int(q & 0xFFFF)
175
175
176 def offset_type(offset, type):
176 def offset_type(offset, type):
177 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
177 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
178 raise ValueError('unknown revlog index flags')
178 raise ValueError('unknown revlog index flags')
179 return int(int(offset) << 16 | type)
179 return int(int(offset) << 16 | type)
180
180
181 @attr.s(slots=True, frozen=True)
181 @attr.s(slots=True, frozen=True)
182 class _revisioninfo(object):
182 class _revisioninfo(object):
183 """Information about a revision that allows building its fulltext
183 """Information about a revision that allows building its fulltext
184 node: expected hash of the revision
184 node: expected hash of the revision
185 p1, p2: parent revs of the revision
185 p1, p2: parent revs of the revision
186 btext: built text cache consisting of a one-element list
186 btext: built text cache consisting of a one-element list
187 cachedelta: (baserev, uncompressed_delta) or None
187 cachedelta: (baserev, uncompressed_delta) or None
188 flags: flags associated to the revision storage
188 flags: flags associated to the revision storage
189
189
190 One of btext[0] or cachedelta must be set.
190 One of btext[0] or cachedelta must be set.
191 """
191 """
192 node = attr.ib()
192 node = attr.ib()
193 p1 = attr.ib()
193 p1 = attr.ib()
194 p2 = attr.ib()
194 p2 = attr.ib()
195 btext = attr.ib()
195 btext = attr.ib()
196 textlen = attr.ib()
196 textlen = attr.ib()
197 cachedelta = attr.ib()
197 cachedelta = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199
199
200 @interfaceutil.implementer(repository.irevisiondelta)
200 @interfaceutil.implementer(repository.irevisiondelta)
201 @attr.s(slots=True)
201 @attr.s(slots=True)
202 class revlogrevisiondelta(object):
202 class revlogrevisiondelta(object):
203 node = attr.ib()
203 node = attr.ib()
204 p1node = attr.ib()
204 p1node = attr.ib()
205 p2node = attr.ib()
205 p2node = attr.ib()
206 basenode = attr.ib()
206 basenode = attr.ib()
207 flags = attr.ib()
207 flags = attr.ib()
208 baserevisionsize = attr.ib()
208 baserevisionsize = attr.ib()
209 revision = attr.ib()
209 revision = attr.ib()
210 delta = attr.ib()
210 delta = attr.ib()
211 linknode = attr.ib(default=None)
211 linknode = attr.ib(default=None)
212
212
213 @interfaceutil.implementer(repository.iverifyproblem)
213 @interfaceutil.implementer(repository.iverifyproblem)
214 @attr.s(frozen=True)
214 @attr.s(frozen=True)
215 class revlogproblem(object):
215 class revlogproblem(object):
216 warning = attr.ib(default=None)
216 warning = attr.ib(default=None)
217 error = attr.ib(default=None)
217 error = attr.ib(default=None)
218 node = attr.ib(default=None)
218 node = attr.ib(default=None)
219
219
220 # index v0:
220 # index v0:
221 # 4 bytes: offset
221 # 4 bytes: offset
222 # 4 bytes: compressed length
222 # 4 bytes: compressed length
223 # 4 bytes: base rev
223 # 4 bytes: base rev
224 # 4 bytes: link rev
224 # 4 bytes: link rev
225 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 1 nodeid
226 # 20 bytes: parent 2 nodeid
226 # 20 bytes: parent 2 nodeid
227 # 20 bytes: nodeid
227 # 20 bytes: nodeid
228 indexformatv0 = struct.Struct(">4l20s20s20s")
228 indexformatv0 = struct.Struct(">4l20s20s20s")
229 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_pack = indexformatv0.pack
230 indexformatv0_unpack = indexformatv0.unpack
230 indexformatv0_unpack = indexformatv0.unpack
231
231
232 class revlogoldindex(list):
232 class revlogoldindex(list):
233 def __getitem__(self, i):
233 def __getitem__(self, i):
234 if i == -1:
234 if i == -1:
235 return (0, 0, 0, -1, -1, -1, -1, nullid)
235 return (0, 0, 0, -1, -1, -1, -1, nullid)
236 return list.__getitem__(self, i)
236 return list.__getitem__(self, i)
237
237
238 class revlogoldio(object):
238 class revlogoldio(object):
239 def __init__(self):
239 def __init__(self):
240 self.size = indexformatv0.size
240 self.size = indexformatv0.size
241
241
242 def parseindex(self, data, inline):
242 def parseindex(self, data, inline):
243 s = self.size
243 s = self.size
244 index = []
244 index = []
245 nodemap = {nullid: nullrev}
245 nodemap = {nullid: nullrev}
246 n = off = 0
246 n = off = 0
247 l = len(data)
247 l = len(data)
248 while off + s <= l:
248 while off + s <= l:
249 cur = data[off:off + s]
249 cur = data[off:off + s]
250 off += s
250 off += s
251 e = indexformatv0_unpack(cur)
251 e = indexformatv0_unpack(cur)
252 # transform to revlogv1 format
252 # transform to revlogv1 format
253 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
253 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
254 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
254 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
255 index.append(e2)
255 index.append(e2)
256 nodemap[e[6]] = n
256 nodemap[e[6]] = n
257 n += 1
257 n += 1
258
258
259 return revlogoldindex(index), nodemap, None
259 return revlogoldindex(index), nodemap, None
260
260
261 def packentry(self, entry, node, version, rev):
261 def packentry(self, entry, node, version, rev):
262 if gettype(entry[0]):
262 if gettype(entry[0]):
263 raise error.RevlogError(_('index entry flags need revlog '
263 raise error.RevlogError(_('index entry flags need revlog '
264 'version 1'))
264 'version 1'))
265 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
265 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
266 node(entry[5]), node(entry[6]), entry[7])
266 node(entry[5]), node(entry[6]), entry[7])
267 return indexformatv0_pack(*e2)
267 return indexformatv0_pack(*e2)
268
268
269 # index ng:
269 # index ng:
270 # 6 bytes: offset
270 # 6 bytes: offset
271 # 2 bytes: flags
271 # 2 bytes: flags
272 # 4 bytes: compressed length
272 # 4 bytes: compressed length
273 # 4 bytes: uncompressed length
273 # 4 bytes: uncompressed length
274 # 4 bytes: base rev
274 # 4 bytes: base rev
275 # 4 bytes: link rev
275 # 4 bytes: link rev
276 # 4 bytes: parent 1 rev
276 # 4 bytes: parent 1 rev
277 # 4 bytes: parent 2 rev
277 # 4 bytes: parent 2 rev
278 # 32 bytes: nodeid
278 # 32 bytes: nodeid
279 indexformatng = struct.Struct(">Qiiiiii20s12x")
279 indexformatng = struct.Struct(">Qiiiiii20s12x")
280 indexformatng_pack = indexformatng.pack
280 indexformatng_pack = indexformatng.pack
281 versionformat = struct.Struct(">I")
281 versionformat = struct.Struct(">I")
282 versionformat_pack = versionformat.pack
282 versionformat_pack = versionformat.pack
283 versionformat_unpack = versionformat.unpack
283 versionformat_unpack = versionformat.unpack
284
284
285 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
285 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
286 # signed integer)
286 # signed integer)
287 _maxentrysize = 0x7fffffff
287 _maxentrysize = 0x7fffffff
288
288
289 class revlogio(object):
289 class revlogio(object):
290 def __init__(self):
290 def __init__(self):
291 self.size = indexformatng.size
291 self.size = indexformatng.size
292
292
293 def parseindex(self, data, inline):
293 def parseindex(self, data, inline):
294 # call the C implementation to parse the index data
294 # call the C implementation to parse the index data
295 index, cache = parsers.parse_index2(data, inline)
295 index, cache = parsers.parse_index2(data, inline)
296 return index, getattr(index, 'nodemap', None), cache
296 return index, getattr(index, 'nodemap', None), cache
297
297
298 def packentry(self, entry, node, version, rev):
298 def packentry(self, entry, node, version, rev):
299 p = indexformatng_pack(*entry)
299 p = indexformatng_pack(*entry)
300 if rev == 0:
300 if rev == 0:
301 p = versionformat_pack(version) + p[4:]
301 p = versionformat_pack(version) + p[4:]
302 return p
302 return p
303
303
304 class revlog(object):
304 class revlog(object):
305 """
305 """
306 the underlying revision storage object
306 the underlying revision storage object
307
307
308 A revlog consists of two parts, an index and the revision data.
308 A revlog consists of two parts, an index and the revision data.
309
309
310 The index is a file with a fixed record size containing
310 The index is a file with a fixed record size containing
311 information on each revision, including its nodeid (hash), the
311 information on each revision, including its nodeid (hash), the
312 nodeids of its parents, the position and offset of its data within
312 nodeids of its parents, the position and offset of its data within
313 the data file, and the revision it's based on. Finally, each entry
313 the data file, and the revision it's based on. Finally, each entry
314 contains a linkrev entry that can serve as a pointer to external
314 contains a linkrev entry that can serve as a pointer to external
315 data.
315 data.
316
316
317 The revision data itself is a linear collection of data chunks.
317 The revision data itself is a linear collection of data chunks.
318 Each chunk represents a revision and is usually represented as a
318 Each chunk represents a revision and is usually represented as a
319 delta against the previous chunk. To bound lookup time, runs of
319 delta against the previous chunk. To bound lookup time, runs of
320 deltas are limited to about 2 times the length of the original
320 deltas are limited to about 2 times the length of the original
321 version data. This makes retrieval of a version proportional to
321 version data. This makes retrieval of a version proportional to
322 its size, or O(1) relative to the number of revisions.
322 its size, or O(1) relative to the number of revisions.
323
323
324 Both pieces of the revlog are written to in an append-only
324 Both pieces of the revlog are written to in an append-only
325 fashion, which means we never need to rewrite a file to insert or
325 fashion, which means we never need to rewrite a file to insert or
326 remove data, and can use some simple techniques to avoid the need
326 remove data, and can use some simple techniques to avoid the need
327 for locking while reading.
327 for locking while reading.
328
328
329 If checkambig, indexfile is opened with checkambig=True at
329 If checkambig, indexfile is opened with checkambig=True at
330 writing, to avoid file stat ambiguity.
330 writing, to avoid file stat ambiguity.
331
331
332 If mmaplargeindex is True, and an mmapindexthreshold is set, the
332 If mmaplargeindex is True, and an mmapindexthreshold is set, the
333 index will be mmapped rather than read if it is larger than the
333 index will be mmapped rather than read if it is larger than the
334 configured threshold.
334 configured threshold.
335
335
336 If censorable is True, the revlog can have censored revisions.
336 If censorable is True, the revlog can have censored revisions.
337
338 If `upperboundcomp` is not None, this is the expected maximal gain from
339 compression for the data content.
337 """
340 """
338 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
339 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False,
343 upperboundcomp=None):
340 """
344 """
341 create a revlog object
345 create a revlog object
342
346
343 opener is a function that abstracts the file opening operation
347 opener is a function that abstracts the file opening operation
344 and can be used to implement COW semantics or the like.
348 and can be used to implement COW semantics or the like.
349
345 """
350 """
351 self.upperboundcomp = upperboundcomp
346 self.indexfile = indexfile
352 self.indexfile = indexfile
347 self.datafile = datafile or (indexfile[:-2] + ".d")
353 self.datafile = datafile or (indexfile[:-2] + ".d")
348 self.opener = opener
354 self.opener = opener
349 # When True, indexfile is opened with checkambig=True at writing, to
355 # When True, indexfile is opened with checkambig=True at writing, to
350 # avoid file stat ambiguity.
356 # avoid file stat ambiguity.
351 self._checkambig = checkambig
357 self._checkambig = checkambig
352 self._mmaplargeindex = mmaplargeindex
358 self._mmaplargeindex = mmaplargeindex
353 self._censorable = censorable
359 self._censorable = censorable
354 # 3-tuple of (node, rev, text) for a raw revision.
360 # 3-tuple of (node, rev, text) for a raw revision.
355 self._revisioncache = None
361 self._revisioncache = None
356 # Maps rev to chain base rev.
362 # Maps rev to chain base rev.
357 self._chainbasecache = util.lrucachedict(100)
363 self._chainbasecache = util.lrucachedict(100)
358 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
364 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
359 self._chunkcache = (0, '')
365 self._chunkcache = (0, '')
360 # How much data to read and cache into the raw revlog data cache.
366 # How much data to read and cache into the raw revlog data cache.
361 self._chunkcachesize = 65536
367 self._chunkcachesize = 65536
362 self._maxchainlen = None
368 self._maxchainlen = None
363 self._deltabothparents = True
369 self._deltabothparents = True
364 self.index = []
370 self.index = []
365 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
366 self._pcache = {}
372 self._pcache = {}
367 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
368 self._nodecache = {nullid: nullrev}
374 self._nodecache = {nullid: nullrev}
369 self._nodepos = None
375 self._nodepos = None
370 self._compengine = 'zlib'
376 self._compengine = 'zlib'
371 self._compengineopts = {}
377 self._compengineopts = {}
372 self._maxdeltachainspan = -1
378 self._maxdeltachainspan = -1
373 self._withsparseread = False
379 self._withsparseread = False
374 self._sparserevlog = False
380 self._sparserevlog = False
375 self._srdensitythreshold = 0.50
381 self._srdensitythreshold = 0.50
376 self._srmingapsize = 262144
382 self._srmingapsize = 262144
377
383
378 # Make copy of flag processors so each revlog instance can support
384 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
385 # custom flags.
380 self._flagprocessors = dict(_flagprocessors)
386 self._flagprocessors = dict(_flagprocessors)
381
387
382 # 2-tuple of file handles being used for active writing.
388 # 2-tuple of file handles being used for active writing.
383 self._writinghandles = None
389 self._writinghandles = None
384
390
385 self._loadindex()
391 self._loadindex()
386
392
387 def _loadindex(self):
393 def _loadindex(self):
388 mmapindexthreshold = None
394 mmapindexthreshold = None
389 opts = getattr(self.opener, 'options', {}) or {}
395 opts = getattr(self.opener, 'options', {}) or {}
390
396
391 if 'revlogv2' in opts:
397 if 'revlogv2' in opts:
392 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
393 elif 'revlogv1' in opts:
399 elif 'revlogv1' in opts:
394 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
395 if 'generaldelta' in opts:
401 if 'generaldelta' in opts:
396 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
397 elif getattr(self.opener, 'options', None) is not None:
403 elif getattr(self.opener, 'options', None) is not None:
398 # If options provided but no 'revlog*' found, the repository
404 # If options provided but no 'revlog*' found, the repository
399 # would have no 'requires' file in it, which means we have to
405 # would have no 'requires' file in it, which means we have to
400 # stick to the old format.
406 # stick to the old format.
401 newversionflags = REVLOGV0
407 newversionflags = REVLOGV0
402 else:
408 else:
403 newversionflags = REVLOG_DEFAULT_VERSION
409 newversionflags = REVLOG_DEFAULT_VERSION
404
410
405 if 'chunkcachesize' in opts:
411 if 'chunkcachesize' in opts:
406 self._chunkcachesize = opts['chunkcachesize']
412 self._chunkcachesize = opts['chunkcachesize']
407 if 'maxchainlen' in opts:
413 if 'maxchainlen' in opts:
408 self._maxchainlen = opts['maxchainlen']
414 self._maxchainlen = opts['maxchainlen']
409 if 'deltabothparents' in opts:
415 if 'deltabothparents' in opts:
410 self._deltabothparents = opts['deltabothparents']
416 self._deltabothparents = opts['deltabothparents']
411 self._lazydelta = bool(opts.get('lazydelta', True))
417 self._lazydelta = bool(opts.get('lazydelta', True))
412 self._lazydeltabase = False
418 self._lazydeltabase = False
413 if self._lazydelta:
419 if self._lazydelta:
414 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
415 if 'compengine' in opts:
421 if 'compengine' in opts:
416 self._compengine = opts['compengine']
422 self._compengine = opts['compengine']
417 if 'zlib.level' in opts:
423 if 'zlib.level' in opts:
418 self._compengineopts['zlib.level'] = opts['zlib.level']
424 self._compengineopts['zlib.level'] = opts['zlib.level']
419 if 'zstd.level' in opts:
425 if 'zstd.level' in opts:
420 self._compengineopts['zstd.level'] = opts['zstd.level']
426 self._compengineopts['zstd.level'] = opts['zstd.level']
421 if 'maxdeltachainspan' in opts:
427 if 'maxdeltachainspan' in opts:
422 self._maxdeltachainspan = opts['maxdeltachainspan']
428 self._maxdeltachainspan = opts['maxdeltachainspan']
423 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
429 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
424 mmapindexthreshold = opts['mmapindexthreshold']
430 mmapindexthreshold = opts['mmapindexthreshold']
425 self._sparserevlog = bool(opts.get('sparse-revlog', False))
431 self._sparserevlog = bool(opts.get('sparse-revlog', False))
426 withsparseread = bool(opts.get('with-sparse-read', False))
432 withsparseread = bool(opts.get('with-sparse-read', False))
427 # sparse-revlog forces sparse-read
433 # sparse-revlog forces sparse-read
428 self._withsparseread = self._sparserevlog or withsparseread
434 self._withsparseread = self._sparserevlog or withsparseread
429 if 'sparse-read-density-threshold' in opts:
435 if 'sparse-read-density-threshold' in opts:
430 self._srdensitythreshold = opts['sparse-read-density-threshold']
436 self._srdensitythreshold = opts['sparse-read-density-threshold']
431 if 'sparse-read-min-gap-size' in opts:
437 if 'sparse-read-min-gap-size' in opts:
432 self._srmingapsize = opts['sparse-read-min-gap-size']
438 self._srmingapsize = opts['sparse-read-min-gap-size']
433 if opts.get('enableellipsis'):
439 if opts.get('enableellipsis'):
434 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
440 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
435
441
436 # revlog v0 doesn't have flag processors
442 # revlog v0 doesn't have flag processors
437 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
443 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
438 _insertflagprocessor(flag, processor, self._flagprocessors)
444 _insertflagprocessor(flag, processor, self._flagprocessors)
439
445
440 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
441 raise error.RevlogError(_('revlog chunk cache size %r is not '
447 raise error.RevlogError(_('revlog chunk cache size %r is not '
442 'greater than 0') % self._chunkcachesize)
448 'greater than 0') % self._chunkcachesize)
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
449 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 raise error.RevlogError(_('revlog chunk cache size %r is not a '
450 raise error.RevlogError(_('revlog chunk cache size %r is not a '
445 'power of 2') % self._chunkcachesize)
451 'power of 2') % self._chunkcachesize)
446
452
447 indexdata = ''
453 indexdata = ''
448 self._initempty = True
454 self._initempty = True
449 try:
455 try:
450 with self._indexfp() as f:
456 with self._indexfp() as f:
451 if (mmapindexthreshold is not None and
457 if (mmapindexthreshold is not None and
452 self.opener.fstat(f).st_size >= mmapindexthreshold):
458 self.opener.fstat(f).st_size >= mmapindexthreshold):
453 # TODO: should .close() to release resources without
459 # TODO: should .close() to release resources without
454 # relying on Python GC
460 # relying on Python GC
455 indexdata = util.buffer(util.mmapread(f))
461 indexdata = util.buffer(util.mmapread(f))
456 else:
462 else:
457 indexdata = f.read()
463 indexdata = f.read()
458 if len(indexdata) > 0:
464 if len(indexdata) > 0:
459 versionflags = versionformat_unpack(indexdata[:4])[0]
465 versionflags = versionformat_unpack(indexdata[:4])[0]
460 self._initempty = False
466 self._initempty = False
461 else:
467 else:
462 versionflags = newversionflags
468 versionflags = newversionflags
463 except IOError as inst:
469 except IOError as inst:
464 if inst.errno != errno.ENOENT:
470 if inst.errno != errno.ENOENT:
465 raise
471 raise
466
472
467 versionflags = newversionflags
473 versionflags = newversionflags
468
474
469 self.version = versionflags
475 self.version = versionflags
470
476
471 flags = versionflags & ~0xFFFF
477 flags = versionflags & ~0xFFFF
472 fmt = versionflags & 0xFFFF
478 fmt = versionflags & 0xFFFF
473
479
474 if fmt == REVLOGV0:
480 if fmt == REVLOGV0:
475 if flags:
481 if flags:
476 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
477 'revlog %s') %
483 'revlog %s') %
478 (flags >> 16, fmt, self.indexfile))
484 (flags >> 16, fmt, self.indexfile))
479
485
480 self._inline = False
486 self._inline = False
481 self._generaldelta = False
487 self._generaldelta = False
482
488
483 elif fmt == REVLOGV1:
489 elif fmt == REVLOGV1:
484 if flags & ~REVLOGV1_FLAGS:
490 if flags & ~REVLOGV1_FLAGS:
485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
491 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
486 'revlog %s') %
492 'revlog %s') %
487 (flags >> 16, fmt, self.indexfile))
493 (flags >> 16, fmt, self.indexfile))
488
494
489 self._inline = versionflags & FLAG_INLINE_DATA
495 self._inline = versionflags & FLAG_INLINE_DATA
490 self._generaldelta = versionflags & FLAG_GENERALDELTA
496 self._generaldelta = versionflags & FLAG_GENERALDELTA
491
497
492 elif fmt == REVLOGV2:
498 elif fmt == REVLOGV2:
493 if flags & ~REVLOGV2_FLAGS:
499 if flags & ~REVLOGV2_FLAGS:
494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
500 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
495 'revlog %s') %
501 'revlog %s') %
496 (flags >> 16, fmt, self.indexfile))
502 (flags >> 16, fmt, self.indexfile))
497
503
498 self._inline = versionflags & FLAG_INLINE_DATA
504 self._inline = versionflags & FLAG_INLINE_DATA
499 # generaldelta implied by version 2 revlogs.
505 # generaldelta implied by version 2 revlogs.
500 self._generaldelta = True
506 self._generaldelta = True
501
507
502 else:
508 else:
503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
509 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
504 (fmt, self.indexfile))
510 (fmt, self.indexfile))
505 # sparse-revlog can't be on without general-delta (issue6056)
511 # sparse-revlog can't be on without general-delta (issue6056)
506 if not self._generaldelta:
512 if not self._generaldelta:
507 self._sparserevlog = False
513 self._sparserevlog = False
508
514
509 self._storedeltachains = True
515 self._storedeltachains = True
510
516
511 self._io = revlogio()
517 self._io = revlogio()
512 if self.version == REVLOGV0:
518 if self.version == REVLOGV0:
513 self._io = revlogoldio()
519 self._io = revlogoldio()
514 try:
520 try:
515 d = self._io.parseindex(indexdata, self._inline)
521 d = self._io.parseindex(indexdata, self._inline)
516 except (ValueError, IndexError):
522 except (ValueError, IndexError):
517 raise error.RevlogError(_("index %s is corrupted") %
523 raise error.RevlogError(_("index %s is corrupted") %
518 self.indexfile)
524 self.indexfile)
519 self.index, nodemap, self._chunkcache = d
525 self.index, nodemap, self._chunkcache = d
520 if nodemap is not None:
526 if nodemap is not None:
521 self.nodemap = self._nodecache = nodemap
527 self.nodemap = self._nodecache = nodemap
522 if not self._chunkcache:
528 if not self._chunkcache:
523 self._chunkclear()
529 self._chunkclear()
524 # revnum -> (chain-length, sum-delta-length)
530 # revnum -> (chain-length, sum-delta-length)
525 self._chaininfocache = {}
531 self._chaininfocache = {}
526 # revlog header -> revlog compressor
532 # revlog header -> revlog compressor
527 self._decompressors = {}
533 self._decompressors = {}
528
534
529 @util.propertycache
535 @util.propertycache
530 def _compressor(self):
536 def _compressor(self):
531 engine = util.compengines[self._compengine]
537 engine = util.compengines[self._compengine]
532 return engine.revlogcompressor(self._compengineopts)
538 return engine.revlogcompressor(self._compengineopts)
533
539
534 def _indexfp(self, mode='r'):
540 def _indexfp(self, mode='r'):
535 """file object for the revlog's index file"""
541 """file object for the revlog's index file"""
536 args = {r'mode': mode}
542 args = {r'mode': mode}
537 if mode != 'r':
543 if mode != 'r':
538 args[r'checkambig'] = self._checkambig
544 args[r'checkambig'] = self._checkambig
539 if mode == 'w':
545 if mode == 'w':
540 args[r'atomictemp'] = True
546 args[r'atomictemp'] = True
541 return self.opener(self.indexfile, **args)
547 return self.opener(self.indexfile, **args)
542
548
543 def _datafp(self, mode='r'):
549 def _datafp(self, mode='r'):
544 """file object for the revlog's data file"""
550 """file object for the revlog's data file"""
545 return self.opener(self.datafile, mode=mode)
551 return self.opener(self.datafile, mode=mode)
546
552
547 @contextlib.contextmanager
553 @contextlib.contextmanager
548 def _datareadfp(self, existingfp=None):
554 def _datareadfp(self, existingfp=None):
549 """file object suitable to read data"""
555 """file object suitable to read data"""
550 # Use explicit file handle, if given.
556 # Use explicit file handle, if given.
551 if existingfp is not None:
557 if existingfp is not None:
552 yield existingfp
558 yield existingfp
553
559
554 # Use a file handle being actively used for writes, if available.
560 # Use a file handle being actively used for writes, if available.
555 # There is some danger to doing this because reads will seek the
561 # There is some danger to doing this because reads will seek the
556 # file. However, _writeentry() performs a SEEK_END before all writes,
562 # file. However, _writeentry() performs a SEEK_END before all writes,
557 # so we should be safe.
563 # so we should be safe.
558 elif self._writinghandles:
564 elif self._writinghandles:
559 if self._inline:
565 if self._inline:
560 yield self._writinghandles[0]
566 yield self._writinghandles[0]
561 else:
567 else:
562 yield self._writinghandles[1]
568 yield self._writinghandles[1]
563
569
564 # Otherwise open a new file handle.
570 # Otherwise open a new file handle.
565 else:
571 else:
566 if self._inline:
572 if self._inline:
567 func = self._indexfp
573 func = self._indexfp
568 else:
574 else:
569 func = self._datafp
575 func = self._datafp
570 with func() as fp:
576 with func() as fp:
571 yield fp
577 yield fp
572
578
573 def tip(self):
579 def tip(self):
574 return self.node(len(self.index) - 1)
580 return self.node(len(self.index) - 1)
575 def __contains__(self, rev):
581 def __contains__(self, rev):
576 return 0 <= rev < len(self)
582 return 0 <= rev < len(self)
577 def __len__(self):
583 def __len__(self):
578 return len(self.index)
584 return len(self.index)
579 def __iter__(self):
585 def __iter__(self):
580 return iter(pycompat.xrange(len(self)))
586 return iter(pycompat.xrange(len(self)))
581 def revs(self, start=0, stop=None):
587 def revs(self, start=0, stop=None):
582 """iterate over all rev in this revlog (from start to stop)"""
588 """iterate over all rev in this revlog (from start to stop)"""
583 return storageutil.iterrevs(len(self), start=start, stop=stop)
589 return storageutil.iterrevs(len(self), start=start, stop=stop)
584
590
585 @util.propertycache
591 @util.propertycache
586 def nodemap(self):
592 def nodemap(self):
587 if self.index:
593 if self.index:
588 # populate mapping down to the initial node
594 # populate mapping down to the initial node
589 node0 = self.index[0][7] # get around changelog filtering
595 node0 = self.index[0][7] # get around changelog filtering
590 self.rev(node0)
596 self.rev(node0)
591 return self._nodecache
597 return self._nodecache
592
598
593 def hasnode(self, node):
599 def hasnode(self, node):
594 try:
600 try:
595 self.rev(node)
601 self.rev(node)
596 return True
602 return True
597 except KeyError:
603 except KeyError:
598 return False
604 return False
599
605
600 def candelta(self, baserev, rev):
606 def candelta(self, baserev, rev):
601 """whether two revisions (baserev, rev) can be delta-ed or not"""
607 """whether two revisions (baserev, rev) can be delta-ed or not"""
602 # Disable delta if either rev requires a content-changing flag
608 # Disable delta if either rev requires a content-changing flag
603 # processor (ex. LFS). This is because such flag processor can alter
609 # processor (ex. LFS). This is because such flag processor can alter
604 # the rawtext content that the delta will be based on, and two clients
610 # the rawtext content that the delta will be based on, and two clients
605 # could have a same revlog node with different flags (i.e. different
611 # could have a same revlog node with different flags (i.e. different
606 # rawtext contents) and the delta could be incompatible.
612 # rawtext contents) and the delta could be incompatible.
607 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
613 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
608 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
614 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
609 return False
615 return False
610 return True
616 return True
611
617
612 def clearcaches(self):
618 def clearcaches(self):
613 self._revisioncache = None
619 self._revisioncache = None
614 self._chainbasecache.clear()
620 self._chainbasecache.clear()
615 self._chunkcache = (0, '')
621 self._chunkcache = (0, '')
616 self._pcache = {}
622 self._pcache = {}
617
623
618 try:
624 try:
619 # If we are using the native C version, you are in a fun case
625 # If we are using the native C version, you are in a fun case
620 # where self.index, self.nodemap and self._nodecaches is the same
626 # where self.index, self.nodemap and self._nodecaches is the same
621 # object.
627 # object.
622 self._nodecache.clearcaches()
628 self._nodecache.clearcaches()
623 except AttributeError:
629 except AttributeError:
624 self._nodecache = {nullid: nullrev}
630 self._nodecache = {nullid: nullrev}
625 self._nodepos = None
631 self._nodepos = None
626
632
627 def rev(self, node):
633 def rev(self, node):
628 try:
634 try:
629 return self._nodecache[node]
635 return self._nodecache[node]
630 except TypeError:
636 except TypeError:
631 raise
637 raise
632 except error.RevlogError:
638 except error.RevlogError:
633 # parsers.c radix tree lookup failed
639 # parsers.c radix tree lookup failed
634 if node == wdirid or node in wdirfilenodeids:
640 if node == wdirid or node in wdirfilenodeids:
635 raise error.WdirUnsupported
641 raise error.WdirUnsupported
636 raise error.LookupError(node, self.indexfile, _('no node'))
642 raise error.LookupError(node, self.indexfile, _('no node'))
637 except KeyError:
643 except KeyError:
638 # pure python cache lookup failed
644 # pure python cache lookup failed
639 n = self._nodecache
645 n = self._nodecache
640 i = self.index
646 i = self.index
641 p = self._nodepos
647 p = self._nodepos
642 if p is None:
648 if p is None:
643 p = len(i) - 1
649 p = len(i) - 1
644 else:
650 else:
645 assert p < len(i)
651 assert p < len(i)
646 for r in pycompat.xrange(p, -1, -1):
652 for r in pycompat.xrange(p, -1, -1):
647 v = i[r][7]
653 v = i[r][7]
648 n[v] = r
654 n[v] = r
649 if v == node:
655 if v == node:
650 self._nodepos = r - 1
656 self._nodepos = r - 1
651 return r
657 return r
652 if node == wdirid or node in wdirfilenodeids:
658 if node == wdirid or node in wdirfilenodeids:
653 raise error.WdirUnsupported
659 raise error.WdirUnsupported
654 raise error.LookupError(node, self.indexfile, _('no node'))
660 raise error.LookupError(node, self.indexfile, _('no node'))
655
661
656 # Accessors for index entries.
662 # Accessors for index entries.
657
663
658 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
664 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
659 # are flags.
665 # are flags.
660 def start(self, rev):
666 def start(self, rev):
661 return int(self.index[rev][0] >> 16)
667 return int(self.index[rev][0] >> 16)
662
668
663 def flags(self, rev):
669 def flags(self, rev):
664 return self.index[rev][0] & 0xFFFF
670 return self.index[rev][0] & 0xFFFF
665
671
666 def length(self, rev):
672 def length(self, rev):
667 return self.index[rev][1]
673 return self.index[rev][1]
668
674
669 def rawsize(self, rev):
675 def rawsize(self, rev):
670 """return the length of the uncompressed text for a given revision"""
676 """return the length of the uncompressed text for a given revision"""
671 l = self.index[rev][2]
677 l = self.index[rev][2]
672 if l >= 0:
678 if l >= 0:
673 return l
679 return l
674
680
675 t = self.revision(rev, raw=True)
681 t = self.revision(rev, raw=True)
676 return len(t)
682 return len(t)
677
683
678 def size(self, rev):
684 def size(self, rev):
679 """length of non-raw text (processed by a "read" flag processor)"""
685 """length of non-raw text (processed by a "read" flag processor)"""
680 # fast path: if no "read" flag processor could change the content,
686 # fast path: if no "read" flag processor could change the content,
681 # size is rawsize. note: ELLIPSIS is known to not change the content.
687 # size is rawsize. note: ELLIPSIS is known to not change the content.
682 flags = self.flags(rev)
688 flags = self.flags(rev)
683 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
689 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
684 return self.rawsize(rev)
690 return self.rawsize(rev)
685
691
686 return len(self.revision(rev, raw=False))
692 return len(self.revision(rev, raw=False))
687
693
688 def chainbase(self, rev):
694 def chainbase(self, rev):
689 base = self._chainbasecache.get(rev)
695 base = self._chainbasecache.get(rev)
690 if base is not None:
696 if base is not None:
691 return base
697 return base
692
698
693 index = self.index
699 index = self.index
694 iterrev = rev
700 iterrev = rev
695 base = index[iterrev][3]
701 base = index[iterrev][3]
696 while base != iterrev:
702 while base != iterrev:
697 iterrev = base
703 iterrev = base
698 base = index[iterrev][3]
704 base = index[iterrev][3]
699
705
700 self._chainbasecache[rev] = base
706 self._chainbasecache[rev] = base
701 return base
707 return base
702
708
703 def linkrev(self, rev):
709 def linkrev(self, rev):
704 return self.index[rev][4]
710 return self.index[rev][4]
705
711
706 def parentrevs(self, rev):
712 def parentrevs(self, rev):
707 try:
713 try:
708 entry = self.index[rev]
714 entry = self.index[rev]
709 except IndexError:
715 except IndexError:
710 if rev == wdirrev:
716 if rev == wdirrev:
711 raise error.WdirUnsupported
717 raise error.WdirUnsupported
712 raise
718 raise
713
719
714 return entry[5], entry[6]
720 return entry[5], entry[6]
715
721
716 # fast parentrevs(rev) where rev isn't filtered
722 # fast parentrevs(rev) where rev isn't filtered
717 _uncheckedparentrevs = parentrevs
723 _uncheckedparentrevs = parentrevs
718
724
719 def node(self, rev):
725 def node(self, rev):
720 try:
726 try:
721 return self.index[rev][7]
727 return self.index[rev][7]
722 except IndexError:
728 except IndexError:
723 if rev == wdirrev:
729 if rev == wdirrev:
724 raise error.WdirUnsupported
730 raise error.WdirUnsupported
725 raise
731 raise
726
732
727 # Derived from index values.
733 # Derived from index values.
728
734
729 def end(self, rev):
735 def end(self, rev):
730 return self.start(rev) + self.length(rev)
736 return self.start(rev) + self.length(rev)
731
737
732 def parents(self, node):
738 def parents(self, node):
733 i = self.index
739 i = self.index
734 d = i[self.rev(node)]
740 d = i[self.rev(node)]
735 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
741 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
736
742
737 def chainlen(self, rev):
743 def chainlen(self, rev):
738 return self._chaininfo(rev)[0]
744 return self._chaininfo(rev)[0]
739
745
740 def _chaininfo(self, rev):
746 def _chaininfo(self, rev):
741 chaininfocache = self._chaininfocache
747 chaininfocache = self._chaininfocache
742 if rev in chaininfocache:
748 if rev in chaininfocache:
743 return chaininfocache[rev]
749 return chaininfocache[rev]
744 index = self.index
750 index = self.index
745 generaldelta = self._generaldelta
751 generaldelta = self._generaldelta
746 iterrev = rev
752 iterrev = rev
747 e = index[iterrev]
753 e = index[iterrev]
748 clen = 0
754 clen = 0
749 compresseddeltalen = 0
755 compresseddeltalen = 0
750 while iterrev != e[3]:
756 while iterrev != e[3]:
751 clen += 1
757 clen += 1
752 compresseddeltalen += e[1]
758 compresseddeltalen += e[1]
753 if generaldelta:
759 if generaldelta:
754 iterrev = e[3]
760 iterrev = e[3]
755 else:
761 else:
756 iterrev -= 1
762 iterrev -= 1
757 if iterrev in chaininfocache:
763 if iterrev in chaininfocache:
758 t = chaininfocache[iterrev]
764 t = chaininfocache[iterrev]
759 clen += t[0]
765 clen += t[0]
760 compresseddeltalen += t[1]
766 compresseddeltalen += t[1]
761 break
767 break
762 e = index[iterrev]
768 e = index[iterrev]
763 else:
769 else:
764 # Add text length of base since decompressing that also takes
770 # Add text length of base since decompressing that also takes
765 # work. For cache hits the length is already included.
771 # work. For cache hits the length is already included.
766 compresseddeltalen += e[1]
772 compresseddeltalen += e[1]
767 r = (clen, compresseddeltalen)
773 r = (clen, compresseddeltalen)
768 chaininfocache[rev] = r
774 chaininfocache[rev] = r
769 return r
775 return r
770
776
771 def _deltachain(self, rev, stoprev=None):
777 def _deltachain(self, rev, stoprev=None):
772 """Obtain the delta chain for a revision.
778 """Obtain the delta chain for a revision.
773
779
774 ``stoprev`` specifies a revision to stop at. If not specified, we
780 ``stoprev`` specifies a revision to stop at. If not specified, we
775 stop at the base of the chain.
781 stop at the base of the chain.
776
782
777 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
783 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
778 revs in ascending order and ``stopped`` is a bool indicating whether
784 revs in ascending order and ``stopped`` is a bool indicating whether
779 ``stoprev`` was hit.
785 ``stoprev`` was hit.
780 """
786 """
781 # Try C implementation.
787 # Try C implementation.
782 try:
788 try:
783 return self.index.deltachain(rev, stoprev, self._generaldelta)
789 return self.index.deltachain(rev, stoprev, self._generaldelta)
784 except AttributeError:
790 except AttributeError:
785 pass
791 pass
786
792
787 chain = []
793 chain = []
788
794
789 # Alias to prevent attribute lookup in tight loop.
795 # Alias to prevent attribute lookup in tight loop.
790 index = self.index
796 index = self.index
791 generaldelta = self._generaldelta
797 generaldelta = self._generaldelta
792
798
793 iterrev = rev
799 iterrev = rev
794 e = index[iterrev]
800 e = index[iterrev]
795 while iterrev != e[3] and iterrev != stoprev:
801 while iterrev != e[3] and iterrev != stoprev:
796 chain.append(iterrev)
802 chain.append(iterrev)
797 if generaldelta:
803 if generaldelta:
798 iterrev = e[3]
804 iterrev = e[3]
799 else:
805 else:
800 iterrev -= 1
806 iterrev -= 1
801 e = index[iterrev]
807 e = index[iterrev]
802
808
803 if iterrev == stoprev:
809 if iterrev == stoprev:
804 stopped = True
810 stopped = True
805 else:
811 else:
806 chain.append(iterrev)
812 chain.append(iterrev)
807 stopped = False
813 stopped = False
808
814
809 chain.reverse()
815 chain.reverse()
810 return chain, stopped
816 return chain, stopped
811
817
812 def ancestors(self, revs, stoprev=0, inclusive=False):
818 def ancestors(self, revs, stoprev=0, inclusive=False):
813 """Generate the ancestors of 'revs' in reverse revision order.
819 """Generate the ancestors of 'revs' in reverse revision order.
814 Does not generate revs lower than stoprev.
820 Does not generate revs lower than stoprev.
815
821
816 See the documentation for ancestor.lazyancestors for more details."""
822 See the documentation for ancestor.lazyancestors for more details."""
817
823
818 # first, make sure start revisions aren't filtered
824 # first, make sure start revisions aren't filtered
819 revs = list(revs)
825 revs = list(revs)
820 checkrev = self.node
826 checkrev = self.node
821 for r in revs:
827 for r in revs:
822 checkrev(r)
828 checkrev(r)
823 # and we're sure ancestors aren't filtered as well
829 # and we're sure ancestors aren't filtered as well
824
830
825 if rustancestor is not None:
831 if rustancestor is not None:
826 lazyancestors = rustancestor.LazyAncestors
832 lazyancestors = rustancestor.LazyAncestors
827 arg = self.index
833 arg = self.index
828 elif util.safehasattr(parsers, 'rustlazyancestors'):
834 elif util.safehasattr(parsers, 'rustlazyancestors'):
829 lazyancestors = ancestor.rustlazyancestors
835 lazyancestors = ancestor.rustlazyancestors
830 arg = self.index
836 arg = self.index
831 else:
837 else:
832 lazyancestors = ancestor.lazyancestors
838 lazyancestors = ancestor.lazyancestors
833 arg = self._uncheckedparentrevs
839 arg = self._uncheckedparentrevs
834 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
840 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
835
841
836 def descendants(self, revs):
842 def descendants(self, revs):
837 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
843 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
838
844
839 def findcommonmissing(self, common=None, heads=None):
845 def findcommonmissing(self, common=None, heads=None):
840 """Return a tuple of the ancestors of common and the ancestors of heads
846 """Return a tuple of the ancestors of common and the ancestors of heads
841 that are not ancestors of common. In revset terminology, we return the
847 that are not ancestors of common. In revset terminology, we return the
842 tuple:
848 tuple:
843
849
844 ::common, (::heads) - (::common)
850 ::common, (::heads) - (::common)
845
851
846 The list is sorted by revision number, meaning it is
852 The list is sorted by revision number, meaning it is
847 topologically sorted.
853 topologically sorted.
848
854
849 'heads' and 'common' are both lists of node IDs. If heads is
855 'heads' and 'common' are both lists of node IDs. If heads is
850 not supplied, uses all of the revlog's heads. If common is not
856 not supplied, uses all of the revlog's heads. If common is not
851 supplied, uses nullid."""
857 supplied, uses nullid."""
852 if common is None:
858 if common is None:
853 common = [nullid]
859 common = [nullid]
854 if heads is None:
860 if heads is None:
855 heads = self.heads()
861 heads = self.heads()
856
862
857 common = [self.rev(n) for n in common]
863 common = [self.rev(n) for n in common]
858 heads = [self.rev(n) for n in heads]
864 heads = [self.rev(n) for n in heads]
859
865
860 # we want the ancestors, but inclusive
866 # we want the ancestors, but inclusive
861 class lazyset(object):
867 class lazyset(object):
862 def __init__(self, lazyvalues):
868 def __init__(self, lazyvalues):
863 self.addedvalues = set()
869 self.addedvalues = set()
864 self.lazyvalues = lazyvalues
870 self.lazyvalues = lazyvalues
865
871
866 def __contains__(self, value):
872 def __contains__(self, value):
867 return value in self.addedvalues or value in self.lazyvalues
873 return value in self.addedvalues or value in self.lazyvalues
868
874
869 def __iter__(self):
875 def __iter__(self):
870 added = self.addedvalues
876 added = self.addedvalues
871 for r in added:
877 for r in added:
872 yield r
878 yield r
873 for r in self.lazyvalues:
879 for r in self.lazyvalues:
874 if not r in added:
880 if not r in added:
875 yield r
881 yield r
876
882
877 def add(self, value):
883 def add(self, value):
878 self.addedvalues.add(value)
884 self.addedvalues.add(value)
879
885
880 def update(self, values):
886 def update(self, values):
881 self.addedvalues.update(values)
887 self.addedvalues.update(values)
882
888
883 has = lazyset(self.ancestors(common))
889 has = lazyset(self.ancestors(common))
884 has.add(nullrev)
890 has.add(nullrev)
885 has.update(common)
891 has.update(common)
886
892
887 # take all ancestors from heads that aren't in has
893 # take all ancestors from heads that aren't in has
888 missing = set()
894 missing = set()
889 visit = collections.deque(r for r in heads if r not in has)
895 visit = collections.deque(r for r in heads if r not in has)
890 while visit:
896 while visit:
891 r = visit.popleft()
897 r = visit.popleft()
892 if r in missing:
898 if r in missing:
893 continue
899 continue
894 else:
900 else:
895 missing.add(r)
901 missing.add(r)
896 for p in self.parentrevs(r):
902 for p in self.parentrevs(r):
897 if p not in has:
903 if p not in has:
898 visit.append(p)
904 visit.append(p)
899 missing = list(missing)
905 missing = list(missing)
900 missing.sort()
906 missing.sort()
901 return has, [self.node(miss) for miss in missing]
907 return has, [self.node(miss) for miss in missing]
902
908
903 def incrementalmissingrevs(self, common=None):
909 def incrementalmissingrevs(self, common=None):
904 """Return an object that can be used to incrementally compute the
910 """Return an object that can be used to incrementally compute the
905 revision numbers of the ancestors of arbitrary sets that are not
911 revision numbers of the ancestors of arbitrary sets that are not
906 ancestors of common. This is an ancestor.incrementalmissingancestors
912 ancestors of common. This is an ancestor.incrementalmissingancestors
907 object.
913 object.
908
914
909 'common' is a list of revision numbers. If common is not supplied, uses
915 'common' is a list of revision numbers. If common is not supplied, uses
910 nullrev.
916 nullrev.
911 """
917 """
912 if common is None:
918 if common is None:
913 common = [nullrev]
919 common = [nullrev]
914
920
915 if rustancestor is not None:
921 if rustancestor is not None:
916 return rustancestor.MissingAncestors(self.index, common)
922 return rustancestor.MissingAncestors(self.index, common)
917 return ancestor.incrementalmissingancestors(self.parentrevs, common)
923 return ancestor.incrementalmissingancestors(self.parentrevs, common)
918
924
919 def findmissingrevs(self, common=None, heads=None):
925 def findmissingrevs(self, common=None, heads=None):
920 """Return the revision numbers of the ancestors of heads that
926 """Return the revision numbers of the ancestors of heads that
921 are not ancestors of common.
927 are not ancestors of common.
922
928
923 More specifically, return a list of revision numbers corresponding to
929 More specifically, return a list of revision numbers corresponding to
924 nodes N such that every N satisfies the following constraints:
930 nodes N such that every N satisfies the following constraints:
925
931
926 1. N is an ancestor of some node in 'heads'
932 1. N is an ancestor of some node in 'heads'
927 2. N is not an ancestor of any node in 'common'
933 2. N is not an ancestor of any node in 'common'
928
934
929 The list is sorted by revision number, meaning it is
935 The list is sorted by revision number, meaning it is
930 topologically sorted.
936 topologically sorted.
931
937
932 'heads' and 'common' are both lists of revision numbers. If heads is
938 'heads' and 'common' are both lists of revision numbers. If heads is
933 not supplied, uses all of the revlog's heads. If common is not
939 not supplied, uses all of the revlog's heads. If common is not
934 supplied, uses nullid."""
940 supplied, uses nullid."""
935 if common is None:
941 if common is None:
936 common = [nullrev]
942 common = [nullrev]
937 if heads is None:
943 if heads is None:
938 heads = self.headrevs()
944 heads = self.headrevs()
939
945
940 inc = self.incrementalmissingrevs(common=common)
946 inc = self.incrementalmissingrevs(common=common)
941 return inc.missingancestors(heads)
947 return inc.missingancestors(heads)
942
948
943 def findmissing(self, common=None, heads=None):
949 def findmissing(self, common=None, heads=None):
944 """Return the ancestors of heads that are not ancestors of common.
950 """Return the ancestors of heads that are not ancestors of common.
945
951
946 More specifically, return a list of nodes N such that every N
952 More specifically, return a list of nodes N such that every N
947 satisfies the following constraints:
953 satisfies the following constraints:
948
954
949 1. N is an ancestor of some node in 'heads'
955 1. N is an ancestor of some node in 'heads'
950 2. N is not an ancestor of any node in 'common'
956 2. N is not an ancestor of any node in 'common'
951
957
952 The list is sorted by revision number, meaning it is
958 The list is sorted by revision number, meaning it is
953 topologically sorted.
959 topologically sorted.
954
960
955 'heads' and 'common' are both lists of node IDs. If heads is
961 'heads' and 'common' are both lists of node IDs. If heads is
956 not supplied, uses all of the revlog's heads. If common is not
962 not supplied, uses all of the revlog's heads. If common is not
957 supplied, uses nullid."""
963 supplied, uses nullid."""
958 if common is None:
964 if common is None:
959 common = [nullid]
965 common = [nullid]
960 if heads is None:
966 if heads is None:
961 heads = self.heads()
967 heads = self.heads()
962
968
963 common = [self.rev(n) for n in common]
969 common = [self.rev(n) for n in common]
964 heads = [self.rev(n) for n in heads]
970 heads = [self.rev(n) for n in heads]
965
971
966 inc = self.incrementalmissingrevs(common=common)
972 inc = self.incrementalmissingrevs(common=common)
967 return [self.node(r) for r in inc.missingancestors(heads)]
973 return [self.node(r) for r in inc.missingancestors(heads)]
968
974
969 def nodesbetween(self, roots=None, heads=None):
975 def nodesbetween(self, roots=None, heads=None):
970 """Return a topological path from 'roots' to 'heads'.
976 """Return a topological path from 'roots' to 'heads'.
971
977
972 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
978 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
973 topologically sorted list of all nodes N that satisfy both of
979 topologically sorted list of all nodes N that satisfy both of
974 these constraints:
980 these constraints:
975
981
976 1. N is a descendant of some node in 'roots'
982 1. N is a descendant of some node in 'roots'
977 2. N is an ancestor of some node in 'heads'
983 2. N is an ancestor of some node in 'heads'
978
984
979 Every node is considered to be both a descendant and an ancestor
985 Every node is considered to be both a descendant and an ancestor
980 of itself, so every reachable node in 'roots' and 'heads' will be
986 of itself, so every reachable node in 'roots' and 'heads' will be
981 included in 'nodes'.
987 included in 'nodes'.
982
988
983 'outroots' is the list of reachable nodes in 'roots', i.e., the
989 'outroots' is the list of reachable nodes in 'roots', i.e., the
984 subset of 'roots' that is returned in 'nodes'. Likewise,
990 subset of 'roots' that is returned in 'nodes'. Likewise,
985 'outheads' is the subset of 'heads' that is also in 'nodes'.
991 'outheads' is the subset of 'heads' that is also in 'nodes'.
986
992
987 'roots' and 'heads' are both lists of node IDs. If 'roots' is
993 'roots' and 'heads' are both lists of node IDs. If 'roots' is
988 unspecified, uses nullid as the only root. If 'heads' is
994 unspecified, uses nullid as the only root. If 'heads' is
989 unspecified, uses list of all of the revlog's heads."""
995 unspecified, uses list of all of the revlog's heads."""
990 nonodes = ([], [], [])
996 nonodes = ([], [], [])
991 if roots is not None:
997 if roots is not None:
992 roots = list(roots)
998 roots = list(roots)
993 if not roots:
999 if not roots:
994 return nonodes
1000 return nonodes
995 lowestrev = min([self.rev(n) for n in roots])
1001 lowestrev = min([self.rev(n) for n in roots])
996 else:
1002 else:
997 roots = [nullid] # Everybody's a descendant of nullid
1003 roots = [nullid] # Everybody's a descendant of nullid
998 lowestrev = nullrev
1004 lowestrev = nullrev
999 if (lowestrev == nullrev) and (heads is None):
1005 if (lowestrev == nullrev) and (heads is None):
1000 # We want _all_ the nodes!
1006 # We want _all_ the nodes!
1001 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1007 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1002 if heads is None:
1008 if heads is None:
1003 # All nodes are ancestors, so the latest ancestor is the last
1009 # All nodes are ancestors, so the latest ancestor is the last
1004 # node.
1010 # node.
1005 highestrev = len(self) - 1
1011 highestrev = len(self) - 1
1006 # Set ancestors to None to signal that every node is an ancestor.
1012 # Set ancestors to None to signal that every node is an ancestor.
1007 ancestors = None
1013 ancestors = None
1008 # Set heads to an empty dictionary for later discovery of heads
1014 # Set heads to an empty dictionary for later discovery of heads
1009 heads = {}
1015 heads = {}
1010 else:
1016 else:
1011 heads = list(heads)
1017 heads = list(heads)
1012 if not heads:
1018 if not heads:
1013 return nonodes
1019 return nonodes
1014 ancestors = set()
1020 ancestors = set()
1015 # Turn heads into a dictionary so we can remove 'fake' heads.
1021 # Turn heads into a dictionary so we can remove 'fake' heads.
1016 # Also, later we will be using it to filter out the heads we can't
1022 # Also, later we will be using it to filter out the heads we can't
1017 # find from roots.
1023 # find from roots.
1018 heads = dict.fromkeys(heads, False)
1024 heads = dict.fromkeys(heads, False)
1019 # Start at the top and keep marking parents until we're done.
1025 # Start at the top and keep marking parents until we're done.
1020 nodestotag = set(heads)
1026 nodestotag = set(heads)
1021 # Remember where the top was so we can use it as a limit later.
1027 # Remember where the top was so we can use it as a limit later.
1022 highestrev = max([self.rev(n) for n in nodestotag])
1028 highestrev = max([self.rev(n) for n in nodestotag])
1023 while nodestotag:
1029 while nodestotag:
1024 # grab a node to tag
1030 # grab a node to tag
1025 n = nodestotag.pop()
1031 n = nodestotag.pop()
1026 # Never tag nullid
1032 # Never tag nullid
1027 if n == nullid:
1033 if n == nullid:
1028 continue
1034 continue
1029 # A node's revision number represents its place in a
1035 # A node's revision number represents its place in a
1030 # topologically sorted list of nodes.
1036 # topologically sorted list of nodes.
1031 r = self.rev(n)
1037 r = self.rev(n)
1032 if r >= lowestrev:
1038 if r >= lowestrev:
1033 if n not in ancestors:
1039 if n not in ancestors:
1034 # If we are possibly a descendant of one of the roots
1040 # If we are possibly a descendant of one of the roots
1035 # and we haven't already been marked as an ancestor
1041 # and we haven't already been marked as an ancestor
1036 ancestors.add(n) # Mark as ancestor
1042 ancestors.add(n) # Mark as ancestor
1037 # Add non-nullid parents to list of nodes to tag.
1043 # Add non-nullid parents to list of nodes to tag.
1038 nodestotag.update([p for p in self.parents(n) if
1044 nodestotag.update([p for p in self.parents(n) if
1039 p != nullid])
1045 p != nullid])
1040 elif n in heads: # We've seen it before, is it a fake head?
1046 elif n in heads: # We've seen it before, is it a fake head?
1041 # So it is, real heads should not be the ancestors of
1047 # So it is, real heads should not be the ancestors of
1042 # any other heads.
1048 # any other heads.
1043 heads.pop(n)
1049 heads.pop(n)
1044 if not ancestors:
1050 if not ancestors:
1045 return nonodes
1051 return nonodes
1046 # Now that we have our set of ancestors, we want to remove any
1052 # Now that we have our set of ancestors, we want to remove any
1047 # roots that are not ancestors.
1053 # roots that are not ancestors.
1048
1054
1049 # If one of the roots was nullid, everything is included anyway.
1055 # If one of the roots was nullid, everything is included anyway.
1050 if lowestrev > nullrev:
1056 if lowestrev > nullrev:
1051 # But, since we weren't, let's recompute the lowest rev to not
1057 # But, since we weren't, let's recompute the lowest rev to not
1052 # include roots that aren't ancestors.
1058 # include roots that aren't ancestors.
1053
1059
1054 # Filter out roots that aren't ancestors of heads
1060 # Filter out roots that aren't ancestors of heads
1055 roots = [root for root in roots if root in ancestors]
1061 roots = [root for root in roots if root in ancestors]
1056 # Recompute the lowest revision
1062 # Recompute the lowest revision
1057 if roots:
1063 if roots:
1058 lowestrev = min([self.rev(root) for root in roots])
1064 lowestrev = min([self.rev(root) for root in roots])
1059 else:
1065 else:
1060 # No more roots? Return empty list
1066 # No more roots? Return empty list
1061 return nonodes
1067 return nonodes
1062 else:
1068 else:
1063 # We are descending from nullid, and don't need to care about
1069 # We are descending from nullid, and don't need to care about
1064 # any other roots.
1070 # any other roots.
1065 lowestrev = nullrev
1071 lowestrev = nullrev
1066 roots = [nullid]
1072 roots = [nullid]
1067 # Transform our roots list into a set.
1073 # Transform our roots list into a set.
1068 descendants = set(roots)
1074 descendants = set(roots)
1069 # Also, keep the original roots so we can filter out roots that aren't
1075 # Also, keep the original roots so we can filter out roots that aren't
1070 # 'real' roots (i.e. are descended from other roots).
1076 # 'real' roots (i.e. are descended from other roots).
1071 roots = descendants.copy()
1077 roots = descendants.copy()
1072 # Our topologically sorted list of output nodes.
1078 # Our topologically sorted list of output nodes.
1073 orderedout = []
1079 orderedout = []
1074 # Don't start at nullid since we don't want nullid in our output list,
1080 # Don't start at nullid since we don't want nullid in our output list,
1075 # and if nullid shows up in descendants, empty parents will look like
1081 # and if nullid shows up in descendants, empty parents will look like
1076 # they're descendants.
1082 # they're descendants.
1077 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1083 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1078 n = self.node(r)
1084 n = self.node(r)
1079 isdescendant = False
1085 isdescendant = False
1080 if lowestrev == nullrev: # Everybody is a descendant of nullid
1086 if lowestrev == nullrev: # Everybody is a descendant of nullid
1081 isdescendant = True
1087 isdescendant = True
1082 elif n in descendants:
1088 elif n in descendants:
1083 # n is already a descendant
1089 # n is already a descendant
1084 isdescendant = True
1090 isdescendant = True
1085 # This check only needs to be done here because all the roots
1091 # This check only needs to be done here because all the roots
1086 # will start being marked is descendants before the loop.
1092 # will start being marked is descendants before the loop.
1087 if n in roots:
1093 if n in roots:
1088 # If n was a root, check if it's a 'real' root.
1094 # If n was a root, check if it's a 'real' root.
1089 p = tuple(self.parents(n))
1095 p = tuple(self.parents(n))
1090 # If any of its parents are descendants, it's not a root.
1096 # If any of its parents are descendants, it's not a root.
1091 if (p[0] in descendants) or (p[1] in descendants):
1097 if (p[0] in descendants) or (p[1] in descendants):
1092 roots.remove(n)
1098 roots.remove(n)
1093 else:
1099 else:
1094 p = tuple(self.parents(n))
1100 p = tuple(self.parents(n))
1095 # A node is a descendant if either of its parents are
1101 # A node is a descendant if either of its parents are
1096 # descendants. (We seeded the dependents list with the roots
1102 # descendants. (We seeded the dependents list with the roots
1097 # up there, remember?)
1103 # up there, remember?)
1098 if (p[0] in descendants) or (p[1] in descendants):
1104 if (p[0] in descendants) or (p[1] in descendants):
1099 descendants.add(n)
1105 descendants.add(n)
1100 isdescendant = True
1106 isdescendant = True
1101 if isdescendant and ((ancestors is None) or (n in ancestors)):
1107 if isdescendant and ((ancestors is None) or (n in ancestors)):
1102 # Only include nodes that are both descendants and ancestors.
1108 # Only include nodes that are both descendants and ancestors.
1103 orderedout.append(n)
1109 orderedout.append(n)
1104 if (ancestors is not None) and (n in heads):
1110 if (ancestors is not None) and (n in heads):
1105 # We're trying to figure out which heads are reachable
1111 # We're trying to figure out which heads are reachable
1106 # from roots.
1112 # from roots.
1107 # Mark this head as having been reached
1113 # Mark this head as having been reached
1108 heads[n] = True
1114 heads[n] = True
1109 elif ancestors is None:
1115 elif ancestors is None:
1110 # Otherwise, we're trying to discover the heads.
1116 # Otherwise, we're trying to discover the heads.
1111 # Assume this is a head because if it isn't, the next step
1117 # Assume this is a head because if it isn't, the next step
1112 # will eventually remove it.
1118 # will eventually remove it.
1113 heads[n] = True
1119 heads[n] = True
1114 # But, obviously its parents aren't.
1120 # But, obviously its parents aren't.
1115 for p in self.parents(n):
1121 for p in self.parents(n):
1116 heads.pop(p, None)
1122 heads.pop(p, None)
1117 heads = [head for head, flag in heads.iteritems() if flag]
1123 heads = [head for head, flag in heads.iteritems() if flag]
1118 roots = list(roots)
1124 roots = list(roots)
1119 assert orderedout
1125 assert orderedout
1120 assert roots
1126 assert roots
1121 assert heads
1127 assert heads
1122 return (orderedout, roots, heads)
1128 return (orderedout, roots, heads)
1123
1129
1124 def headrevs(self, revs=None):
1130 def headrevs(self, revs=None):
1125 if revs is None:
1131 if revs is None:
1126 try:
1132 try:
1127 return self.index.headrevs()
1133 return self.index.headrevs()
1128 except AttributeError:
1134 except AttributeError:
1129 return self._headrevs()
1135 return self._headrevs()
1130 if rustdagop is not None:
1136 if rustdagop is not None:
1131 return rustdagop.headrevs(self.index, revs)
1137 return rustdagop.headrevs(self.index, revs)
1132 return dagop.headrevs(revs, self._uncheckedparentrevs)
1138 return dagop.headrevs(revs, self._uncheckedparentrevs)
1133
1139
1134 def computephases(self, roots):
1140 def computephases(self, roots):
1135 return self.index.computephasesmapsets(roots)
1141 return self.index.computephasesmapsets(roots)
1136
1142
1137 def _headrevs(self):
1143 def _headrevs(self):
1138 count = len(self)
1144 count = len(self)
1139 if not count:
1145 if not count:
1140 return [nullrev]
1146 return [nullrev]
1141 # we won't iter over filtered rev so nobody is a head at start
1147 # we won't iter over filtered rev so nobody is a head at start
1142 ishead = [0] * (count + 1)
1148 ishead = [0] * (count + 1)
1143 index = self.index
1149 index = self.index
1144 for r in self:
1150 for r in self:
1145 ishead[r] = 1 # I may be an head
1151 ishead[r] = 1 # I may be an head
1146 e = index[r]
1152 e = index[r]
1147 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1153 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1148 return [r for r, val in enumerate(ishead) if val]
1154 return [r for r, val in enumerate(ishead) if val]
1149
1155
1150 def heads(self, start=None, stop=None):
1156 def heads(self, start=None, stop=None):
1151 """return the list of all nodes that have no children
1157 """return the list of all nodes that have no children
1152
1158
1153 if start is specified, only heads that are descendants of
1159 if start is specified, only heads that are descendants of
1154 start will be returned
1160 start will be returned
1155 if stop is specified, it will consider all the revs from stop
1161 if stop is specified, it will consider all the revs from stop
1156 as if they had no children
1162 as if they had no children
1157 """
1163 """
1158 if start is None and stop is None:
1164 if start is None and stop is None:
1159 if not len(self):
1165 if not len(self):
1160 return [nullid]
1166 return [nullid]
1161 return [self.node(r) for r in self.headrevs()]
1167 return [self.node(r) for r in self.headrevs()]
1162
1168
1163 if start is None:
1169 if start is None:
1164 start = nullrev
1170 start = nullrev
1165 else:
1171 else:
1166 start = self.rev(start)
1172 start = self.rev(start)
1167
1173
1168 stoprevs = set(self.rev(n) for n in stop or [])
1174 stoprevs = set(self.rev(n) for n in stop or [])
1169
1175
1170 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1176 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1171 stoprevs=stoprevs)
1177 stoprevs=stoprevs)
1172
1178
1173 return [self.node(rev) for rev in revs]
1179 return [self.node(rev) for rev in revs]
1174
1180
1175 def children(self, node):
1181 def children(self, node):
1176 """find the children of a given node"""
1182 """find the children of a given node"""
1177 c = []
1183 c = []
1178 p = self.rev(node)
1184 p = self.rev(node)
1179 for r in self.revs(start=p + 1):
1185 for r in self.revs(start=p + 1):
1180 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1186 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1181 if prevs:
1187 if prevs:
1182 for pr in prevs:
1188 for pr in prevs:
1183 if pr == p:
1189 if pr == p:
1184 c.append(self.node(r))
1190 c.append(self.node(r))
1185 elif p == nullrev:
1191 elif p == nullrev:
1186 c.append(self.node(r))
1192 c.append(self.node(r))
1187 return c
1193 return c
1188
1194
1189 def commonancestorsheads(self, a, b):
1195 def commonancestorsheads(self, a, b):
1190 """calculate all the heads of the common ancestors of nodes a and b"""
1196 """calculate all the heads of the common ancestors of nodes a and b"""
1191 a, b = self.rev(a), self.rev(b)
1197 a, b = self.rev(a), self.rev(b)
1192 ancs = self._commonancestorsheads(a, b)
1198 ancs = self._commonancestorsheads(a, b)
1193 return pycompat.maplist(self.node, ancs)
1199 return pycompat.maplist(self.node, ancs)
1194
1200
1195 def _commonancestorsheads(self, *revs):
1201 def _commonancestorsheads(self, *revs):
1196 """calculate all the heads of the common ancestors of revs"""
1202 """calculate all the heads of the common ancestors of revs"""
1197 try:
1203 try:
1198 ancs = self.index.commonancestorsheads(*revs)
1204 ancs = self.index.commonancestorsheads(*revs)
1199 except (AttributeError, OverflowError): # C implementation failed
1205 except (AttributeError, OverflowError): # C implementation failed
1200 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1206 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1201 return ancs
1207 return ancs
1202
1208
1203 def isancestor(self, a, b):
1209 def isancestor(self, a, b):
1204 """return True if node a is an ancestor of node b
1210 """return True if node a is an ancestor of node b
1205
1211
1206 A revision is considered an ancestor of itself."""
1212 A revision is considered an ancestor of itself."""
1207 a, b = self.rev(a), self.rev(b)
1213 a, b = self.rev(a), self.rev(b)
1208 return self.isancestorrev(a, b)
1214 return self.isancestorrev(a, b)
1209
1215
1210 def isancestorrev(self, a, b):
1216 def isancestorrev(self, a, b):
1211 """return True if revision a is an ancestor of revision b
1217 """return True if revision a is an ancestor of revision b
1212
1218
1213 A revision is considered an ancestor of itself.
1219 A revision is considered an ancestor of itself.
1214
1220
1215 The implementation of this is trivial but the use of
1221 The implementation of this is trivial but the use of
1216 reachableroots is not."""
1222 reachableroots is not."""
1217 if a == nullrev:
1223 if a == nullrev:
1218 return True
1224 return True
1219 elif a == b:
1225 elif a == b:
1220 return True
1226 return True
1221 elif a > b:
1227 elif a > b:
1222 return False
1228 return False
1223 return bool(self.reachableroots(a, [b], [a], includepath=False))
1229 return bool(self.reachableroots(a, [b], [a], includepath=False))
1224
1230
1225 def reachableroots(self, minroot, heads, roots, includepath=False):
1231 def reachableroots(self, minroot, heads, roots, includepath=False):
1226 """return (heads(::<roots> and <roots>::<heads>))
1232 """return (heads(::<roots> and <roots>::<heads>))
1227
1233
1228 If includepath is True, return (<roots>::<heads>)."""
1234 If includepath is True, return (<roots>::<heads>)."""
1229 try:
1235 try:
1230 return self.index.reachableroots2(minroot, heads, roots,
1236 return self.index.reachableroots2(minroot, heads, roots,
1231 includepath)
1237 includepath)
1232 except AttributeError:
1238 except AttributeError:
1233 return dagop._reachablerootspure(self.parentrevs,
1239 return dagop._reachablerootspure(self.parentrevs,
1234 minroot, roots, heads, includepath)
1240 minroot, roots, heads, includepath)
1235
1241
1236 def ancestor(self, a, b):
1242 def ancestor(self, a, b):
1237 """calculate the "best" common ancestor of nodes a and b"""
1243 """calculate the "best" common ancestor of nodes a and b"""
1238
1244
1239 a, b = self.rev(a), self.rev(b)
1245 a, b = self.rev(a), self.rev(b)
1240 try:
1246 try:
1241 ancs = self.index.ancestors(a, b)
1247 ancs = self.index.ancestors(a, b)
1242 except (AttributeError, OverflowError):
1248 except (AttributeError, OverflowError):
1243 ancs = ancestor.ancestors(self.parentrevs, a, b)
1249 ancs = ancestor.ancestors(self.parentrevs, a, b)
1244 if ancs:
1250 if ancs:
1245 # choose a consistent winner when there's a tie
1251 # choose a consistent winner when there's a tie
1246 return min(map(self.node, ancs))
1252 return min(map(self.node, ancs))
1247 return nullid
1253 return nullid
1248
1254
1249 def _match(self, id):
1255 def _match(self, id):
1250 if isinstance(id, int):
1256 if isinstance(id, int):
1251 # rev
1257 # rev
1252 return self.node(id)
1258 return self.node(id)
1253 if len(id) == 20:
1259 if len(id) == 20:
1254 # possibly a binary node
1260 # possibly a binary node
1255 # odds of a binary node being all hex in ASCII are 1 in 10**25
1261 # odds of a binary node being all hex in ASCII are 1 in 10**25
1256 try:
1262 try:
1257 node = id
1263 node = id
1258 self.rev(node) # quick search the index
1264 self.rev(node) # quick search the index
1259 return node
1265 return node
1260 except error.LookupError:
1266 except error.LookupError:
1261 pass # may be partial hex id
1267 pass # may be partial hex id
1262 try:
1268 try:
1263 # str(rev)
1269 # str(rev)
1264 rev = int(id)
1270 rev = int(id)
1265 if "%d" % rev != id:
1271 if "%d" % rev != id:
1266 raise ValueError
1272 raise ValueError
1267 if rev < 0:
1273 if rev < 0:
1268 rev = len(self) + rev
1274 rev = len(self) + rev
1269 if rev < 0 or rev >= len(self):
1275 if rev < 0 or rev >= len(self):
1270 raise ValueError
1276 raise ValueError
1271 return self.node(rev)
1277 return self.node(rev)
1272 except (ValueError, OverflowError):
1278 except (ValueError, OverflowError):
1273 pass
1279 pass
1274 if len(id) == 40:
1280 if len(id) == 40:
1275 try:
1281 try:
1276 # a full hex nodeid?
1282 # a full hex nodeid?
1277 node = bin(id)
1283 node = bin(id)
1278 self.rev(node)
1284 self.rev(node)
1279 return node
1285 return node
1280 except (TypeError, error.LookupError):
1286 except (TypeError, error.LookupError):
1281 pass
1287 pass
1282
1288
1283 def _partialmatch(self, id):
1289 def _partialmatch(self, id):
1284 # we don't care wdirfilenodeids as they should be always full hash
1290 # we don't care wdirfilenodeids as they should be always full hash
1285 maybewdir = wdirhex.startswith(id)
1291 maybewdir = wdirhex.startswith(id)
1286 try:
1292 try:
1287 partial = self.index.partialmatch(id)
1293 partial = self.index.partialmatch(id)
1288 if partial and self.hasnode(partial):
1294 if partial and self.hasnode(partial):
1289 if maybewdir:
1295 if maybewdir:
1290 # single 'ff...' match in radix tree, ambiguous with wdir
1296 # single 'ff...' match in radix tree, ambiguous with wdir
1291 raise error.RevlogError
1297 raise error.RevlogError
1292 return partial
1298 return partial
1293 if maybewdir:
1299 if maybewdir:
1294 # no 'ff...' match in radix tree, wdir identified
1300 # no 'ff...' match in radix tree, wdir identified
1295 raise error.WdirUnsupported
1301 raise error.WdirUnsupported
1296 return None
1302 return None
1297 except error.RevlogError:
1303 except error.RevlogError:
1298 # parsers.c radix tree lookup gave multiple matches
1304 # parsers.c radix tree lookup gave multiple matches
1299 # fast path: for unfiltered changelog, radix tree is accurate
1305 # fast path: for unfiltered changelog, radix tree is accurate
1300 if not getattr(self, 'filteredrevs', None):
1306 if not getattr(self, 'filteredrevs', None):
1301 raise error.AmbiguousPrefixLookupError(
1307 raise error.AmbiguousPrefixLookupError(
1302 id, self.indexfile, _('ambiguous identifier'))
1308 id, self.indexfile, _('ambiguous identifier'))
1303 # fall through to slow path that filters hidden revisions
1309 # fall through to slow path that filters hidden revisions
1304 except (AttributeError, ValueError):
1310 except (AttributeError, ValueError):
1305 # we are pure python, or key was too short to search radix tree
1311 # we are pure python, or key was too short to search radix tree
1306 pass
1312 pass
1307
1313
1308 if id in self._pcache:
1314 if id in self._pcache:
1309 return self._pcache[id]
1315 return self._pcache[id]
1310
1316
1311 if len(id) <= 40:
1317 if len(id) <= 40:
1312 try:
1318 try:
1313 # hex(node)[:...]
1319 # hex(node)[:...]
1314 l = len(id) // 2 # grab an even number of digits
1320 l = len(id) // 2 # grab an even number of digits
1315 prefix = bin(id[:l * 2])
1321 prefix = bin(id[:l * 2])
1316 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1322 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1317 nl = [n for n in nl if hex(n).startswith(id) and
1323 nl = [n for n in nl if hex(n).startswith(id) and
1318 self.hasnode(n)]
1324 self.hasnode(n)]
1319 if nullhex.startswith(id):
1325 if nullhex.startswith(id):
1320 nl.append(nullid)
1326 nl.append(nullid)
1321 if len(nl) > 0:
1327 if len(nl) > 0:
1322 if len(nl) == 1 and not maybewdir:
1328 if len(nl) == 1 and not maybewdir:
1323 self._pcache[id] = nl[0]
1329 self._pcache[id] = nl[0]
1324 return nl[0]
1330 return nl[0]
1325 raise error.AmbiguousPrefixLookupError(
1331 raise error.AmbiguousPrefixLookupError(
1326 id, self.indexfile, _('ambiguous identifier'))
1332 id, self.indexfile, _('ambiguous identifier'))
1327 if maybewdir:
1333 if maybewdir:
1328 raise error.WdirUnsupported
1334 raise error.WdirUnsupported
1329 return None
1335 return None
1330 except TypeError:
1336 except TypeError:
1331 pass
1337 pass
1332
1338
1333 def lookup(self, id):
1339 def lookup(self, id):
1334 """locate a node based on:
1340 """locate a node based on:
1335 - revision number or str(revision number)
1341 - revision number or str(revision number)
1336 - nodeid or subset of hex nodeid
1342 - nodeid or subset of hex nodeid
1337 """
1343 """
1338 n = self._match(id)
1344 n = self._match(id)
1339 if n is not None:
1345 if n is not None:
1340 return n
1346 return n
1341 n = self._partialmatch(id)
1347 n = self._partialmatch(id)
1342 if n:
1348 if n:
1343 return n
1349 return n
1344
1350
1345 raise error.LookupError(id, self.indexfile, _('no match found'))
1351 raise error.LookupError(id, self.indexfile, _('no match found'))
1346
1352
1347 def shortest(self, node, minlength=1):
1353 def shortest(self, node, minlength=1):
1348 """Find the shortest unambiguous prefix that matches node."""
1354 """Find the shortest unambiguous prefix that matches node."""
1349 def isvalid(prefix):
1355 def isvalid(prefix):
1350 try:
1356 try:
1351 node = self._partialmatch(prefix)
1357 node = self._partialmatch(prefix)
1352 except error.AmbiguousPrefixLookupError:
1358 except error.AmbiguousPrefixLookupError:
1353 return False
1359 return False
1354 except error.WdirUnsupported:
1360 except error.WdirUnsupported:
1355 # single 'ff...' match
1361 # single 'ff...' match
1356 return True
1362 return True
1357 if node is None:
1363 if node is None:
1358 raise error.LookupError(node, self.indexfile, _('no node'))
1364 raise error.LookupError(node, self.indexfile, _('no node'))
1359 return True
1365 return True
1360
1366
1361 def maybewdir(prefix):
1367 def maybewdir(prefix):
1362 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1368 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1363
1369
1364 hexnode = hex(node)
1370 hexnode = hex(node)
1365
1371
1366 def disambiguate(hexnode, minlength):
1372 def disambiguate(hexnode, minlength):
1367 """Disambiguate against wdirid."""
1373 """Disambiguate against wdirid."""
1368 for length in range(minlength, 41):
1374 for length in range(minlength, 41):
1369 prefix = hexnode[:length]
1375 prefix = hexnode[:length]
1370 if not maybewdir(prefix):
1376 if not maybewdir(prefix):
1371 return prefix
1377 return prefix
1372
1378
1373 if not getattr(self, 'filteredrevs', None):
1379 if not getattr(self, 'filteredrevs', None):
1374 try:
1380 try:
1375 length = max(self.index.shortest(node), minlength)
1381 length = max(self.index.shortest(node), minlength)
1376 return disambiguate(hexnode, length)
1382 return disambiguate(hexnode, length)
1377 except error.RevlogError:
1383 except error.RevlogError:
1378 if node != wdirid:
1384 if node != wdirid:
1379 raise error.LookupError(node, self.indexfile, _('no node'))
1385 raise error.LookupError(node, self.indexfile, _('no node'))
1380 except AttributeError:
1386 except AttributeError:
1381 # Fall through to pure code
1387 # Fall through to pure code
1382 pass
1388 pass
1383
1389
1384 if node == wdirid:
1390 if node == wdirid:
1385 for length in range(minlength, 41):
1391 for length in range(minlength, 41):
1386 prefix = hexnode[:length]
1392 prefix = hexnode[:length]
1387 if isvalid(prefix):
1393 if isvalid(prefix):
1388 return prefix
1394 return prefix
1389
1395
1390 for length in range(minlength, 41):
1396 for length in range(minlength, 41):
1391 prefix = hexnode[:length]
1397 prefix = hexnode[:length]
1392 if isvalid(prefix):
1398 if isvalid(prefix):
1393 return disambiguate(hexnode, length)
1399 return disambiguate(hexnode, length)
1394
1400
1395 def cmp(self, node, text):
1401 def cmp(self, node, text):
1396 """compare text with a given file revision
1402 """compare text with a given file revision
1397
1403
1398 returns True if text is different than what is stored.
1404 returns True if text is different than what is stored.
1399 """
1405 """
1400 p1, p2 = self.parents(node)
1406 p1, p2 = self.parents(node)
1401 return storageutil.hashrevisionsha1(text, p1, p2) != node
1407 return storageutil.hashrevisionsha1(text, p1, p2) != node
1402
1408
1403 def _cachesegment(self, offset, data):
1409 def _cachesegment(self, offset, data):
1404 """Add a segment to the revlog cache.
1410 """Add a segment to the revlog cache.
1405
1411
1406 Accepts an absolute offset and the data that is at that location.
1412 Accepts an absolute offset and the data that is at that location.
1407 """
1413 """
1408 o, d = self._chunkcache
1414 o, d = self._chunkcache
1409 # try to add to existing cache
1415 # try to add to existing cache
1410 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1416 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1411 self._chunkcache = o, d + data
1417 self._chunkcache = o, d + data
1412 else:
1418 else:
1413 self._chunkcache = offset, data
1419 self._chunkcache = offset, data
1414
1420
1415 def _readsegment(self, offset, length, df=None):
1421 def _readsegment(self, offset, length, df=None):
1416 """Load a segment of raw data from the revlog.
1422 """Load a segment of raw data from the revlog.
1417
1423
1418 Accepts an absolute offset, length to read, and an optional existing
1424 Accepts an absolute offset, length to read, and an optional existing
1419 file handle to read from.
1425 file handle to read from.
1420
1426
1421 If an existing file handle is passed, it will be seeked and the
1427 If an existing file handle is passed, it will be seeked and the
1422 original seek position will NOT be restored.
1428 original seek position will NOT be restored.
1423
1429
1424 Returns a str or buffer of raw byte data.
1430 Returns a str or buffer of raw byte data.
1425
1431
1426 Raises if the requested number of bytes could not be read.
1432 Raises if the requested number of bytes could not be read.
1427 """
1433 """
1428 # Cache data both forward and backward around the requested
1434 # Cache data both forward and backward around the requested
1429 # data, in a fixed size window. This helps speed up operations
1435 # data, in a fixed size window. This helps speed up operations
1430 # involving reading the revlog backwards.
1436 # involving reading the revlog backwards.
1431 cachesize = self._chunkcachesize
1437 cachesize = self._chunkcachesize
1432 realoffset = offset & ~(cachesize - 1)
1438 realoffset = offset & ~(cachesize - 1)
1433 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1439 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1434 - realoffset)
1440 - realoffset)
1435 with self._datareadfp(df) as df:
1441 with self._datareadfp(df) as df:
1436 df.seek(realoffset)
1442 df.seek(realoffset)
1437 d = df.read(reallength)
1443 d = df.read(reallength)
1438
1444
1439 self._cachesegment(realoffset, d)
1445 self._cachesegment(realoffset, d)
1440 if offset != realoffset or reallength != length:
1446 if offset != realoffset or reallength != length:
1441 startoffset = offset - realoffset
1447 startoffset = offset - realoffset
1442 if len(d) - startoffset < length:
1448 if len(d) - startoffset < length:
1443 raise error.RevlogError(
1449 raise error.RevlogError(
1444 _('partial read of revlog %s; expected %d bytes from '
1450 _('partial read of revlog %s; expected %d bytes from '
1445 'offset %d, got %d') %
1451 'offset %d, got %d') %
1446 (self.indexfile if self._inline else self.datafile,
1452 (self.indexfile if self._inline else self.datafile,
1447 length, realoffset, len(d) - startoffset))
1453 length, realoffset, len(d) - startoffset))
1448
1454
1449 return util.buffer(d, startoffset, length)
1455 return util.buffer(d, startoffset, length)
1450
1456
1451 if len(d) < length:
1457 if len(d) < length:
1452 raise error.RevlogError(
1458 raise error.RevlogError(
1453 _('partial read of revlog %s; expected %d bytes from offset '
1459 _('partial read of revlog %s; expected %d bytes from offset '
1454 '%d, got %d') %
1460 '%d, got %d') %
1455 (self.indexfile if self._inline else self.datafile,
1461 (self.indexfile if self._inline else self.datafile,
1456 length, offset, len(d)))
1462 length, offset, len(d)))
1457
1463
1458 return d
1464 return d
1459
1465
1460 def _getsegment(self, offset, length, df=None):
1466 def _getsegment(self, offset, length, df=None):
1461 """Obtain a segment of raw data from the revlog.
1467 """Obtain a segment of raw data from the revlog.
1462
1468
1463 Accepts an absolute offset, length of bytes to obtain, and an
1469 Accepts an absolute offset, length of bytes to obtain, and an
1464 optional file handle to the already-opened revlog. If the file
1470 optional file handle to the already-opened revlog. If the file
1465 handle is used, it's original seek position will not be preserved.
1471 handle is used, it's original seek position will not be preserved.
1466
1472
1467 Requests for data may be returned from a cache.
1473 Requests for data may be returned from a cache.
1468
1474
1469 Returns a str or a buffer instance of raw byte data.
1475 Returns a str or a buffer instance of raw byte data.
1470 """
1476 """
1471 o, d = self._chunkcache
1477 o, d = self._chunkcache
1472 l = len(d)
1478 l = len(d)
1473
1479
1474 # is it in the cache?
1480 # is it in the cache?
1475 cachestart = offset - o
1481 cachestart = offset - o
1476 cacheend = cachestart + length
1482 cacheend = cachestart + length
1477 if cachestart >= 0 and cacheend <= l:
1483 if cachestart >= 0 and cacheend <= l:
1478 if cachestart == 0 and cacheend == l:
1484 if cachestart == 0 and cacheend == l:
1479 return d # avoid a copy
1485 return d # avoid a copy
1480 return util.buffer(d, cachestart, cacheend - cachestart)
1486 return util.buffer(d, cachestart, cacheend - cachestart)
1481
1487
1482 return self._readsegment(offset, length, df=df)
1488 return self._readsegment(offset, length, df=df)
1483
1489
1484 def _getsegmentforrevs(self, startrev, endrev, df=None):
1490 def _getsegmentforrevs(self, startrev, endrev, df=None):
1485 """Obtain a segment of raw data corresponding to a range of revisions.
1491 """Obtain a segment of raw data corresponding to a range of revisions.
1486
1492
1487 Accepts the start and end revisions and an optional already-open
1493 Accepts the start and end revisions and an optional already-open
1488 file handle to be used for reading. If the file handle is read, its
1494 file handle to be used for reading. If the file handle is read, its
1489 seek position will not be preserved.
1495 seek position will not be preserved.
1490
1496
1491 Requests for data may be satisfied by a cache.
1497 Requests for data may be satisfied by a cache.
1492
1498
1493 Returns a 2-tuple of (offset, data) for the requested range of
1499 Returns a 2-tuple of (offset, data) for the requested range of
1494 revisions. Offset is the integer offset from the beginning of the
1500 revisions. Offset is the integer offset from the beginning of the
1495 revlog and data is a str or buffer of the raw byte data.
1501 revlog and data is a str or buffer of the raw byte data.
1496
1502
1497 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1503 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1498 to determine where each revision's data begins and ends.
1504 to determine where each revision's data begins and ends.
1499 """
1505 """
1500 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1506 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1501 # (functions are expensive).
1507 # (functions are expensive).
1502 index = self.index
1508 index = self.index
1503 istart = index[startrev]
1509 istart = index[startrev]
1504 start = int(istart[0] >> 16)
1510 start = int(istart[0] >> 16)
1505 if startrev == endrev:
1511 if startrev == endrev:
1506 end = start + istart[1]
1512 end = start + istart[1]
1507 else:
1513 else:
1508 iend = index[endrev]
1514 iend = index[endrev]
1509 end = int(iend[0] >> 16) + iend[1]
1515 end = int(iend[0] >> 16) + iend[1]
1510
1516
1511 if self._inline:
1517 if self._inline:
1512 start += (startrev + 1) * self._io.size
1518 start += (startrev + 1) * self._io.size
1513 end += (endrev + 1) * self._io.size
1519 end += (endrev + 1) * self._io.size
1514 length = end - start
1520 length = end - start
1515
1521
1516 return start, self._getsegment(start, length, df=df)
1522 return start, self._getsegment(start, length, df=df)
1517
1523
1518 def _chunk(self, rev, df=None):
1524 def _chunk(self, rev, df=None):
1519 """Obtain a single decompressed chunk for a revision.
1525 """Obtain a single decompressed chunk for a revision.
1520
1526
1521 Accepts an integer revision and an optional already-open file handle
1527 Accepts an integer revision and an optional already-open file handle
1522 to be used for reading. If used, the seek position of the file will not
1528 to be used for reading. If used, the seek position of the file will not
1523 be preserved.
1529 be preserved.
1524
1530
1525 Returns a str holding uncompressed data for the requested revision.
1531 Returns a str holding uncompressed data for the requested revision.
1526 """
1532 """
1527 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1533 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1528
1534
1529 def _chunks(self, revs, df=None, targetsize=None):
1535 def _chunks(self, revs, df=None, targetsize=None):
1530 """Obtain decompressed chunks for the specified revisions.
1536 """Obtain decompressed chunks for the specified revisions.
1531
1537
1532 Accepts an iterable of numeric revisions that are assumed to be in
1538 Accepts an iterable of numeric revisions that are assumed to be in
1533 ascending order. Also accepts an optional already-open file handle
1539 ascending order. Also accepts an optional already-open file handle
1534 to be used for reading. If used, the seek position of the file will
1540 to be used for reading. If used, the seek position of the file will
1535 not be preserved.
1541 not be preserved.
1536
1542
1537 This function is similar to calling ``self._chunk()`` multiple times,
1543 This function is similar to calling ``self._chunk()`` multiple times,
1538 but is faster.
1544 but is faster.
1539
1545
1540 Returns a list with decompressed data for each requested revision.
1546 Returns a list with decompressed data for each requested revision.
1541 """
1547 """
1542 if not revs:
1548 if not revs:
1543 return []
1549 return []
1544 start = self.start
1550 start = self.start
1545 length = self.length
1551 length = self.length
1546 inline = self._inline
1552 inline = self._inline
1547 iosize = self._io.size
1553 iosize = self._io.size
1548 buffer = util.buffer
1554 buffer = util.buffer
1549
1555
1550 l = []
1556 l = []
1551 ladd = l.append
1557 ladd = l.append
1552
1558
1553 if not self._withsparseread:
1559 if not self._withsparseread:
1554 slicedchunks = (revs,)
1560 slicedchunks = (revs,)
1555 else:
1561 else:
1556 slicedchunks = deltautil.slicechunk(self, revs,
1562 slicedchunks = deltautil.slicechunk(self, revs,
1557 targetsize=targetsize)
1563 targetsize=targetsize)
1558
1564
1559 for revschunk in slicedchunks:
1565 for revschunk in slicedchunks:
1560 firstrev = revschunk[0]
1566 firstrev = revschunk[0]
1561 # Skip trailing revisions with empty diff
1567 # Skip trailing revisions with empty diff
1562 for lastrev in revschunk[::-1]:
1568 for lastrev in revschunk[::-1]:
1563 if length(lastrev) != 0:
1569 if length(lastrev) != 0:
1564 break
1570 break
1565
1571
1566 try:
1572 try:
1567 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1573 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1568 except OverflowError:
1574 except OverflowError:
1569 # issue4215 - we can't cache a run of chunks greater than
1575 # issue4215 - we can't cache a run of chunks greater than
1570 # 2G on Windows
1576 # 2G on Windows
1571 return [self._chunk(rev, df=df) for rev in revschunk]
1577 return [self._chunk(rev, df=df) for rev in revschunk]
1572
1578
1573 decomp = self.decompress
1579 decomp = self.decompress
1574 for rev in revschunk:
1580 for rev in revschunk:
1575 chunkstart = start(rev)
1581 chunkstart = start(rev)
1576 if inline:
1582 if inline:
1577 chunkstart += (rev + 1) * iosize
1583 chunkstart += (rev + 1) * iosize
1578 chunklength = length(rev)
1584 chunklength = length(rev)
1579 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1585 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1580
1586
1581 return l
1587 return l
1582
1588
1583 def _chunkclear(self):
1589 def _chunkclear(self):
1584 """Clear the raw chunk cache."""
1590 """Clear the raw chunk cache."""
1585 self._chunkcache = (0, '')
1591 self._chunkcache = (0, '')
1586
1592
1587 def deltaparent(self, rev):
1593 def deltaparent(self, rev):
1588 """return deltaparent of the given revision"""
1594 """return deltaparent of the given revision"""
1589 base = self.index[rev][3]
1595 base = self.index[rev][3]
1590 if base == rev:
1596 if base == rev:
1591 return nullrev
1597 return nullrev
1592 elif self._generaldelta:
1598 elif self._generaldelta:
1593 return base
1599 return base
1594 else:
1600 else:
1595 return rev - 1
1601 return rev - 1
1596
1602
1597 def issnapshot(self, rev):
1603 def issnapshot(self, rev):
1598 """tells whether rev is a snapshot
1604 """tells whether rev is a snapshot
1599 """
1605 """
1600 if not self._sparserevlog:
1606 if not self._sparserevlog:
1601 return self.deltaparent(rev) == nullrev
1607 return self.deltaparent(rev) == nullrev
1602 elif util.safehasattr(self.index, 'issnapshot'):
1608 elif util.safehasattr(self.index, 'issnapshot'):
1603 # directly assign the method to cache the testing and access
1609 # directly assign the method to cache the testing and access
1604 self.issnapshot = self.index.issnapshot
1610 self.issnapshot = self.index.issnapshot
1605 return self.issnapshot(rev)
1611 return self.issnapshot(rev)
1606 if rev == nullrev:
1612 if rev == nullrev:
1607 return True
1613 return True
1608 entry = self.index[rev]
1614 entry = self.index[rev]
1609 base = entry[3]
1615 base = entry[3]
1610 if base == rev:
1616 if base == rev:
1611 return True
1617 return True
1612 if base == nullrev:
1618 if base == nullrev:
1613 return True
1619 return True
1614 p1 = entry[5]
1620 p1 = entry[5]
1615 p2 = entry[6]
1621 p2 = entry[6]
1616 if base == p1 or base == p2:
1622 if base == p1 or base == p2:
1617 return False
1623 return False
1618 return self.issnapshot(base)
1624 return self.issnapshot(base)
1619
1625
1620 def snapshotdepth(self, rev):
1626 def snapshotdepth(self, rev):
1621 """number of snapshot in the chain before this one"""
1627 """number of snapshot in the chain before this one"""
1622 if not self.issnapshot(rev):
1628 if not self.issnapshot(rev):
1623 raise error.ProgrammingError('revision %d not a snapshot')
1629 raise error.ProgrammingError('revision %d not a snapshot')
1624 return len(self._deltachain(rev)[0]) - 1
1630 return len(self._deltachain(rev)[0]) - 1
1625
1631
1626 def revdiff(self, rev1, rev2):
1632 def revdiff(self, rev1, rev2):
1627 """return or calculate a delta between two revisions
1633 """return or calculate a delta between two revisions
1628
1634
1629 The delta calculated is in binary form and is intended to be written to
1635 The delta calculated is in binary form and is intended to be written to
1630 revlog data directly. So this function needs raw revision data.
1636 revlog data directly. So this function needs raw revision data.
1631 """
1637 """
1632 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1638 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1633 return bytes(self._chunk(rev2))
1639 return bytes(self._chunk(rev2))
1634
1640
1635 return mdiff.textdiff(self.revision(rev1, raw=True),
1641 return mdiff.textdiff(self.revision(rev1, raw=True),
1636 self.revision(rev2, raw=True))
1642 self.revision(rev2, raw=True))
1637
1643
1638 def revision(self, nodeorrev, _df=None, raw=False):
1644 def revision(self, nodeorrev, _df=None, raw=False):
1639 """return an uncompressed revision of a given node or revision
1645 """return an uncompressed revision of a given node or revision
1640 number.
1646 number.
1641
1647
1642 _df - an existing file handle to read from. (internal-only)
1648 _df - an existing file handle to read from. (internal-only)
1643 raw - an optional argument specifying if the revision data is to be
1649 raw - an optional argument specifying if the revision data is to be
1644 treated as raw data when applying flag transforms. 'raw' should be set
1650 treated as raw data when applying flag transforms. 'raw' should be set
1645 to True when generating changegroups or in debug commands.
1651 to True when generating changegroups or in debug commands.
1646 """
1652 """
1647 if isinstance(nodeorrev, int):
1653 if isinstance(nodeorrev, int):
1648 rev = nodeorrev
1654 rev = nodeorrev
1649 node = self.node(rev)
1655 node = self.node(rev)
1650 else:
1656 else:
1651 node = nodeorrev
1657 node = nodeorrev
1652 rev = None
1658 rev = None
1653
1659
1654 cachedrev = None
1660 cachedrev = None
1655 flags = None
1661 flags = None
1656 rawtext = None
1662 rawtext = None
1657 if node == nullid:
1663 if node == nullid:
1658 return ""
1664 return ""
1659 if self._revisioncache:
1665 if self._revisioncache:
1660 if self._revisioncache[0] == node:
1666 if self._revisioncache[0] == node:
1661 # _cache only stores rawtext
1667 # _cache only stores rawtext
1662 if raw:
1668 if raw:
1663 return self._revisioncache[2]
1669 return self._revisioncache[2]
1664 # duplicated, but good for perf
1670 # duplicated, but good for perf
1665 if rev is None:
1671 if rev is None:
1666 rev = self.rev(node)
1672 rev = self.rev(node)
1667 if flags is None:
1673 if flags is None:
1668 flags = self.flags(rev)
1674 flags = self.flags(rev)
1669 # no extra flags set, no flag processor runs, text = rawtext
1675 # no extra flags set, no flag processor runs, text = rawtext
1670 if flags == REVIDX_DEFAULT_FLAGS:
1676 if flags == REVIDX_DEFAULT_FLAGS:
1671 return self._revisioncache[2]
1677 return self._revisioncache[2]
1672 # rawtext is reusable. need to run flag processor
1678 # rawtext is reusable. need to run flag processor
1673 rawtext = self._revisioncache[2]
1679 rawtext = self._revisioncache[2]
1674
1680
1675 cachedrev = self._revisioncache[1]
1681 cachedrev = self._revisioncache[1]
1676
1682
1677 # look up what we need to read
1683 # look up what we need to read
1678 if rawtext is None:
1684 if rawtext is None:
1679 if rev is None:
1685 if rev is None:
1680 rev = self.rev(node)
1686 rev = self.rev(node)
1681
1687
1682 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1688 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1683 if stopped:
1689 if stopped:
1684 rawtext = self._revisioncache[2]
1690 rawtext = self._revisioncache[2]
1685
1691
1686 # drop cache to save memory
1692 # drop cache to save memory
1687 self._revisioncache = None
1693 self._revisioncache = None
1688
1694
1689 targetsize = None
1695 targetsize = None
1690 rawsize = self.index[rev][2]
1696 rawsize = self.index[rev][2]
1691 if 0 <= rawsize:
1697 if 0 <= rawsize:
1692 targetsize = 4 * rawsize
1698 targetsize = 4 * rawsize
1693
1699
1694 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1700 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1695 if rawtext is None:
1701 if rawtext is None:
1696 rawtext = bytes(bins[0])
1702 rawtext = bytes(bins[0])
1697 bins = bins[1:]
1703 bins = bins[1:]
1698
1704
1699 rawtext = mdiff.patches(rawtext, bins)
1705 rawtext = mdiff.patches(rawtext, bins)
1700 self._revisioncache = (node, rev, rawtext)
1706 self._revisioncache = (node, rev, rawtext)
1701
1707
1702 if flags is None:
1708 if flags is None:
1703 if rev is None:
1709 if rev is None:
1704 rev = self.rev(node)
1710 rev = self.rev(node)
1705 flags = self.flags(rev)
1711 flags = self.flags(rev)
1706
1712
1707 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1713 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1708 if validatehash:
1714 if validatehash:
1709 self.checkhash(text, node, rev=rev)
1715 self.checkhash(text, node, rev=rev)
1710
1716
1711 return text
1717 return text
1712
1718
1713 def hash(self, text, p1, p2):
1719 def hash(self, text, p1, p2):
1714 """Compute a node hash.
1720 """Compute a node hash.
1715
1721
1716 Available as a function so that subclasses can replace the hash
1722 Available as a function so that subclasses can replace the hash
1717 as needed.
1723 as needed.
1718 """
1724 """
1719 return storageutil.hashrevisionsha1(text, p1, p2)
1725 return storageutil.hashrevisionsha1(text, p1, p2)
1720
1726
1721 def _processflags(self, text, flags, operation, raw=False):
1727 def _processflags(self, text, flags, operation, raw=False):
1722 """Inspect revision data flags and applies transforms defined by
1728 """Inspect revision data flags and applies transforms defined by
1723 registered flag processors.
1729 registered flag processors.
1724
1730
1725 ``text`` - the revision data to process
1731 ``text`` - the revision data to process
1726 ``flags`` - the revision flags
1732 ``flags`` - the revision flags
1727 ``operation`` - the operation being performed (read or write)
1733 ``operation`` - the operation being performed (read or write)
1728 ``raw`` - an optional argument describing if the raw transform should be
1734 ``raw`` - an optional argument describing if the raw transform should be
1729 applied.
1735 applied.
1730
1736
1731 This method processes the flags in the order (or reverse order if
1737 This method processes the flags in the order (or reverse order if
1732 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1738 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1733 flag processors registered for present flags. The order of flags defined
1739 flag processors registered for present flags. The order of flags defined
1734 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1740 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1735
1741
1736 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1742 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1737 processed text and ``validatehash`` is a bool indicating whether the
1743 processed text and ``validatehash`` is a bool indicating whether the
1738 returned text should be checked for hash integrity.
1744 returned text should be checked for hash integrity.
1739
1745
1740 Note: If the ``raw`` argument is set, it has precedence over the
1746 Note: If the ``raw`` argument is set, it has precedence over the
1741 operation and will only update the value of ``validatehash``.
1747 operation and will only update the value of ``validatehash``.
1742 """
1748 """
1743 # fast path: no flag processors will run
1749 # fast path: no flag processors will run
1744 if flags == 0:
1750 if flags == 0:
1745 return text, True
1751 return text, True
1746 if not operation in ('read', 'write'):
1752 if not operation in ('read', 'write'):
1747 raise error.ProgrammingError(_("invalid '%s' operation") %
1753 raise error.ProgrammingError(_("invalid '%s' operation") %
1748 operation)
1754 operation)
1749 # Check all flags are known.
1755 # Check all flags are known.
1750 if flags & ~REVIDX_KNOWN_FLAGS:
1756 if flags & ~REVIDX_KNOWN_FLAGS:
1751 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1757 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1752 (flags & ~REVIDX_KNOWN_FLAGS))
1758 (flags & ~REVIDX_KNOWN_FLAGS))
1753 validatehash = True
1759 validatehash = True
1754 # Depending on the operation (read or write), the order might be
1760 # Depending on the operation (read or write), the order might be
1755 # reversed due to non-commutative transforms.
1761 # reversed due to non-commutative transforms.
1756 orderedflags = REVIDX_FLAGS_ORDER
1762 orderedflags = REVIDX_FLAGS_ORDER
1757 if operation == 'write':
1763 if operation == 'write':
1758 orderedflags = reversed(orderedflags)
1764 orderedflags = reversed(orderedflags)
1759
1765
1760 for flag in orderedflags:
1766 for flag in orderedflags:
1761 # If a flagprocessor has been registered for a known flag, apply the
1767 # If a flagprocessor has been registered for a known flag, apply the
1762 # related operation transform and update result tuple.
1768 # related operation transform and update result tuple.
1763 if flag & flags:
1769 if flag & flags:
1764 vhash = True
1770 vhash = True
1765
1771
1766 if flag not in self._flagprocessors:
1772 if flag not in self._flagprocessors:
1767 message = _("missing processor for flag '%#x'") % (flag)
1773 message = _("missing processor for flag '%#x'") % (flag)
1768 raise error.RevlogError(message)
1774 raise error.RevlogError(message)
1769
1775
1770 processor = self._flagprocessors[flag]
1776 processor = self._flagprocessors[flag]
1771 if processor is not None:
1777 if processor is not None:
1772 readtransform, writetransform, rawtransform = processor
1778 readtransform, writetransform, rawtransform = processor
1773
1779
1774 if raw:
1780 if raw:
1775 vhash = rawtransform(self, text)
1781 vhash = rawtransform(self, text)
1776 elif operation == 'read':
1782 elif operation == 'read':
1777 text, vhash = readtransform(self, text)
1783 text, vhash = readtransform(self, text)
1778 else: # write operation
1784 else: # write operation
1779 text, vhash = writetransform(self, text)
1785 text, vhash = writetransform(self, text)
1780 validatehash = validatehash and vhash
1786 validatehash = validatehash and vhash
1781
1787
1782 return text, validatehash
1788 return text, validatehash
1783
1789
1784 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1790 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1785 """Check node hash integrity.
1791 """Check node hash integrity.
1786
1792
1787 Available as a function so that subclasses can extend hash mismatch
1793 Available as a function so that subclasses can extend hash mismatch
1788 behaviors as needed.
1794 behaviors as needed.
1789 """
1795 """
1790 try:
1796 try:
1791 if p1 is None and p2 is None:
1797 if p1 is None and p2 is None:
1792 p1, p2 = self.parents(node)
1798 p1, p2 = self.parents(node)
1793 if node != self.hash(text, p1, p2):
1799 if node != self.hash(text, p1, p2):
1794 # Clear the revision cache on hash failure. The revision cache
1800 # Clear the revision cache on hash failure. The revision cache
1795 # only stores the raw revision and clearing the cache does have
1801 # only stores the raw revision and clearing the cache does have
1796 # the side-effect that we won't have a cache hit when the raw
1802 # the side-effect that we won't have a cache hit when the raw
1797 # revision data is accessed. But this case should be rare and
1803 # revision data is accessed. But this case should be rare and
1798 # it is extra work to teach the cache about the hash
1804 # it is extra work to teach the cache about the hash
1799 # verification state.
1805 # verification state.
1800 if self._revisioncache and self._revisioncache[0] == node:
1806 if self._revisioncache and self._revisioncache[0] == node:
1801 self._revisioncache = None
1807 self._revisioncache = None
1802
1808
1803 revornode = rev
1809 revornode = rev
1804 if revornode is None:
1810 if revornode is None:
1805 revornode = templatefilters.short(hex(node))
1811 revornode = templatefilters.short(hex(node))
1806 raise error.RevlogError(_("integrity check failed on %s:%s")
1812 raise error.RevlogError(_("integrity check failed on %s:%s")
1807 % (self.indexfile, pycompat.bytestr(revornode)))
1813 % (self.indexfile, pycompat.bytestr(revornode)))
1808 except error.RevlogError:
1814 except error.RevlogError:
1809 if self._censorable and storageutil.iscensoredtext(text):
1815 if self._censorable and storageutil.iscensoredtext(text):
1810 raise error.CensoredNodeError(self.indexfile, node, text)
1816 raise error.CensoredNodeError(self.indexfile, node, text)
1811 raise
1817 raise
1812
1818
1813 def _enforceinlinesize(self, tr, fp=None):
1819 def _enforceinlinesize(self, tr, fp=None):
1814 """Check if the revlog is too big for inline and convert if so.
1820 """Check if the revlog is too big for inline and convert if so.
1815
1821
1816 This should be called after revisions are added to the revlog. If the
1822 This should be called after revisions are added to the revlog. If the
1817 revlog has grown too large to be an inline revlog, it will convert it
1823 revlog has grown too large to be an inline revlog, it will convert it
1818 to use multiple index and data files.
1824 to use multiple index and data files.
1819 """
1825 """
1820 tiprev = len(self) - 1
1826 tiprev = len(self) - 1
1821 if (not self._inline or
1827 if (not self._inline or
1822 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1828 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1823 return
1829 return
1824
1830
1825 trinfo = tr.find(self.indexfile)
1831 trinfo = tr.find(self.indexfile)
1826 if trinfo is None:
1832 if trinfo is None:
1827 raise error.RevlogError(_("%s not found in the transaction")
1833 raise error.RevlogError(_("%s not found in the transaction")
1828 % self.indexfile)
1834 % self.indexfile)
1829
1835
1830 trindex = trinfo[2]
1836 trindex = trinfo[2]
1831 if trindex is not None:
1837 if trindex is not None:
1832 dataoff = self.start(trindex)
1838 dataoff = self.start(trindex)
1833 else:
1839 else:
1834 # revlog was stripped at start of transaction, use all leftover data
1840 # revlog was stripped at start of transaction, use all leftover data
1835 trindex = len(self) - 1
1841 trindex = len(self) - 1
1836 dataoff = self.end(tiprev)
1842 dataoff = self.end(tiprev)
1837
1843
1838 tr.add(self.datafile, dataoff)
1844 tr.add(self.datafile, dataoff)
1839
1845
1840 if fp:
1846 if fp:
1841 fp.flush()
1847 fp.flush()
1842 fp.close()
1848 fp.close()
1843 # We can't use the cached file handle after close(). So prevent
1849 # We can't use the cached file handle after close(). So prevent
1844 # its usage.
1850 # its usage.
1845 self._writinghandles = None
1851 self._writinghandles = None
1846
1852
1847 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1853 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1848 for r in self:
1854 for r in self:
1849 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1855 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1850
1856
1851 with self._indexfp('w') as fp:
1857 with self._indexfp('w') as fp:
1852 self.version &= ~FLAG_INLINE_DATA
1858 self.version &= ~FLAG_INLINE_DATA
1853 self._inline = False
1859 self._inline = False
1854 io = self._io
1860 io = self._io
1855 for i in self:
1861 for i in self:
1856 e = io.packentry(self.index[i], self.node, self.version, i)
1862 e = io.packentry(self.index[i], self.node, self.version, i)
1857 fp.write(e)
1863 fp.write(e)
1858
1864
1859 # the temp file replace the real index when we exit the context
1865 # the temp file replace the real index when we exit the context
1860 # manager
1866 # manager
1861
1867
1862 tr.replace(self.indexfile, trindex * self._io.size)
1868 tr.replace(self.indexfile, trindex * self._io.size)
1863 self._chunkclear()
1869 self._chunkclear()
1864
1870
1865 def _nodeduplicatecallback(self, transaction, node):
1871 def _nodeduplicatecallback(self, transaction, node):
1866 """called when trying to add a node already stored.
1872 """called when trying to add a node already stored.
1867 """
1873 """
1868
1874
1869 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1875 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1870 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1876 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1871 """add a revision to the log
1877 """add a revision to the log
1872
1878
1873 text - the revision data to add
1879 text - the revision data to add
1874 transaction - the transaction object used for rollback
1880 transaction - the transaction object used for rollback
1875 link - the linkrev data to add
1881 link - the linkrev data to add
1876 p1, p2 - the parent nodeids of the revision
1882 p1, p2 - the parent nodeids of the revision
1877 cachedelta - an optional precomputed delta
1883 cachedelta - an optional precomputed delta
1878 node - nodeid of revision; typically node is not specified, and it is
1884 node - nodeid of revision; typically node is not specified, and it is
1879 computed by default as hash(text, p1, p2), however subclasses might
1885 computed by default as hash(text, p1, p2), however subclasses might
1880 use different hashing method (and override checkhash() in such case)
1886 use different hashing method (and override checkhash() in such case)
1881 flags - the known flags to set on the revision
1887 flags - the known flags to set on the revision
1882 deltacomputer - an optional deltacomputer instance shared between
1888 deltacomputer - an optional deltacomputer instance shared between
1883 multiple calls
1889 multiple calls
1884 """
1890 """
1885 if link == nullrev:
1891 if link == nullrev:
1886 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1892 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1887 % self.indexfile)
1893 % self.indexfile)
1888
1894
1889 if flags:
1895 if flags:
1890 node = node or self.hash(text, p1, p2)
1896 node = node or self.hash(text, p1, p2)
1891
1897
1892 rawtext, validatehash = self._processflags(text, flags, 'write')
1898 rawtext, validatehash = self._processflags(text, flags, 'write')
1893
1899
1894 # If the flag processor modifies the revision data, ignore any provided
1900 # If the flag processor modifies the revision data, ignore any provided
1895 # cachedelta.
1901 # cachedelta.
1896 if rawtext != text:
1902 if rawtext != text:
1897 cachedelta = None
1903 cachedelta = None
1898
1904
1899 if len(rawtext) > _maxentrysize:
1905 if len(rawtext) > _maxentrysize:
1900 raise error.RevlogError(
1906 raise error.RevlogError(
1901 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1907 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1902 % (self.indexfile, len(rawtext)))
1908 % (self.indexfile, len(rawtext)))
1903
1909
1904 node = node or self.hash(rawtext, p1, p2)
1910 node = node or self.hash(rawtext, p1, p2)
1905 if node in self.nodemap:
1911 if node in self.nodemap:
1906 return node
1912 return node
1907
1913
1908 if validatehash:
1914 if validatehash:
1909 self.checkhash(rawtext, node, p1=p1, p2=p2)
1915 self.checkhash(rawtext, node, p1=p1, p2=p2)
1910
1916
1911 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1917 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1912 flags, cachedelta=cachedelta,
1918 flags, cachedelta=cachedelta,
1913 deltacomputer=deltacomputer)
1919 deltacomputer=deltacomputer)
1914
1920
1915 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1921 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1916 cachedelta=None, deltacomputer=None):
1922 cachedelta=None, deltacomputer=None):
1917 """add a raw revision with known flags, node and parents
1923 """add a raw revision with known flags, node and parents
1918 useful when reusing a revision not stored in this revlog (ex: received
1924 useful when reusing a revision not stored in this revlog (ex: received
1919 over wire, or read from an external bundle).
1925 over wire, or read from an external bundle).
1920 """
1926 """
1921 dfh = None
1927 dfh = None
1922 if not self._inline:
1928 if not self._inline:
1923 dfh = self._datafp("a+")
1929 dfh = self._datafp("a+")
1924 ifh = self._indexfp("a+")
1930 ifh = self._indexfp("a+")
1925 try:
1931 try:
1926 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1932 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1927 flags, cachedelta, ifh, dfh,
1933 flags, cachedelta, ifh, dfh,
1928 deltacomputer=deltacomputer)
1934 deltacomputer=deltacomputer)
1929 finally:
1935 finally:
1930 if dfh:
1936 if dfh:
1931 dfh.close()
1937 dfh.close()
1932 ifh.close()
1938 ifh.close()
1933
1939
1934 def compress(self, data):
1940 def compress(self, data):
1935 """Generate a possibly-compressed representation of data."""
1941 """Generate a possibly-compressed representation of data."""
1936 if not data:
1942 if not data:
1937 return '', data
1943 return '', data
1938
1944
1939 compressed = self._compressor.compress(data)
1945 compressed = self._compressor.compress(data)
1940
1946
1941 if compressed:
1947 if compressed:
1942 # The revlog compressor added the header in the returned data.
1948 # The revlog compressor added the header in the returned data.
1943 return '', compressed
1949 return '', compressed
1944
1950
1945 if data[0:1] == '\0':
1951 if data[0:1] == '\0':
1946 return '', data
1952 return '', data
1947 return 'u', data
1953 return 'u', data
1948
1954
1949 def decompress(self, data):
1955 def decompress(self, data):
1950 """Decompress a revlog chunk.
1956 """Decompress a revlog chunk.
1951
1957
1952 The chunk is expected to begin with a header identifying the
1958 The chunk is expected to begin with a header identifying the
1953 format type so it can be routed to an appropriate decompressor.
1959 format type so it can be routed to an appropriate decompressor.
1954 """
1960 """
1955 if not data:
1961 if not data:
1956 return data
1962 return data
1957
1963
1958 # Revlogs are read much more frequently than they are written and many
1964 # Revlogs are read much more frequently than they are written and many
1959 # chunks only take microseconds to decompress, so performance is
1965 # chunks only take microseconds to decompress, so performance is
1960 # important here.
1966 # important here.
1961 #
1967 #
1962 # We can make a few assumptions about revlogs:
1968 # We can make a few assumptions about revlogs:
1963 #
1969 #
1964 # 1) the majority of chunks will be compressed (as opposed to inline
1970 # 1) the majority of chunks will be compressed (as opposed to inline
1965 # raw data).
1971 # raw data).
1966 # 2) decompressing *any* data will likely by at least 10x slower than
1972 # 2) decompressing *any* data will likely by at least 10x slower than
1967 # returning raw inline data.
1973 # returning raw inline data.
1968 # 3) we want to prioritize common and officially supported compression
1974 # 3) we want to prioritize common and officially supported compression
1969 # engines
1975 # engines
1970 #
1976 #
1971 # It follows that we want to optimize for "decompress compressed data
1977 # It follows that we want to optimize for "decompress compressed data
1972 # when encoded with common and officially supported compression engines"
1978 # when encoded with common and officially supported compression engines"
1973 # case over "raw data" and "data encoded by less common or non-official
1979 # case over "raw data" and "data encoded by less common or non-official
1974 # compression engines." That is why we have the inline lookup first
1980 # compression engines." That is why we have the inline lookup first
1975 # followed by the compengines lookup.
1981 # followed by the compengines lookup.
1976 #
1982 #
1977 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1983 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1978 # compressed chunks. And this matters for changelog and manifest reads.
1984 # compressed chunks. And this matters for changelog and manifest reads.
1979 t = data[0:1]
1985 t = data[0:1]
1980
1986
1981 if t == 'x':
1987 if t == 'x':
1982 try:
1988 try:
1983 return _zlibdecompress(data)
1989 return _zlibdecompress(data)
1984 except zlib.error as e:
1990 except zlib.error as e:
1985 raise error.RevlogError(_('revlog decompress error: %s') %
1991 raise error.RevlogError(_('revlog decompress error: %s') %
1986 stringutil.forcebytestr(e))
1992 stringutil.forcebytestr(e))
1987 # '\0' is more common than 'u' so it goes first.
1993 # '\0' is more common than 'u' so it goes first.
1988 elif t == '\0':
1994 elif t == '\0':
1989 return data
1995 return data
1990 elif t == 'u':
1996 elif t == 'u':
1991 return util.buffer(data, 1)
1997 return util.buffer(data, 1)
1992
1998
1993 try:
1999 try:
1994 compressor = self._decompressors[t]
2000 compressor = self._decompressors[t]
1995 except KeyError:
2001 except KeyError:
1996 try:
2002 try:
1997 engine = util.compengines.forrevlogheader(t)
2003 engine = util.compengines.forrevlogheader(t)
1998 compressor = engine.revlogcompressor(self._compengineopts)
2004 compressor = engine.revlogcompressor(self._compengineopts)
1999 self._decompressors[t] = compressor
2005 self._decompressors[t] = compressor
2000 except KeyError:
2006 except KeyError:
2001 raise error.RevlogError(_('unknown compression type %r') % t)
2007 raise error.RevlogError(_('unknown compression type %r') % t)
2002
2008
2003 return compressor.decompress(data)
2009 return compressor.decompress(data)
2004
2010
2005 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2011 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2006 cachedelta, ifh, dfh, alwayscache=False,
2012 cachedelta, ifh, dfh, alwayscache=False,
2007 deltacomputer=None):
2013 deltacomputer=None):
2008 """internal function to add revisions to the log
2014 """internal function to add revisions to the log
2009
2015
2010 see addrevision for argument descriptions.
2016 see addrevision for argument descriptions.
2011
2017
2012 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2018 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2013
2019
2014 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2020 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2015 be used.
2021 be used.
2016
2022
2017 invariants:
2023 invariants:
2018 - rawtext is optional (can be None); if not set, cachedelta must be set.
2024 - rawtext is optional (can be None); if not set, cachedelta must be set.
2019 if both are set, they must correspond to each other.
2025 if both are set, they must correspond to each other.
2020 """
2026 """
2021 if node == nullid:
2027 if node == nullid:
2022 raise error.RevlogError(_("%s: attempt to add null revision") %
2028 raise error.RevlogError(_("%s: attempt to add null revision") %
2023 self.indexfile)
2029 self.indexfile)
2024 if node == wdirid or node in wdirfilenodeids:
2030 if node == wdirid or node in wdirfilenodeids:
2025 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2031 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2026 self.indexfile)
2032 self.indexfile)
2027
2033
2028 if self._inline:
2034 if self._inline:
2029 fh = ifh
2035 fh = ifh
2030 else:
2036 else:
2031 fh = dfh
2037 fh = dfh
2032
2038
2033 btext = [rawtext]
2039 btext = [rawtext]
2034
2040
2035 curr = len(self)
2041 curr = len(self)
2036 prev = curr - 1
2042 prev = curr - 1
2037 offset = self.end(prev)
2043 offset = self.end(prev)
2038 p1r, p2r = self.rev(p1), self.rev(p2)
2044 p1r, p2r = self.rev(p1), self.rev(p2)
2039
2045
2040 # full versions are inserted when the needed deltas
2046 # full versions are inserted when the needed deltas
2041 # become comparable to the uncompressed text
2047 # become comparable to the uncompressed text
2042 if rawtext is None:
2048 if rawtext is None:
2043 # need rawtext size, before changed by flag processors, which is
2049 # need rawtext size, before changed by flag processors, which is
2044 # the non-raw size. use revlog explicitly to avoid filelog's extra
2050 # the non-raw size. use revlog explicitly to avoid filelog's extra
2045 # logic that might remove metadata size.
2051 # logic that might remove metadata size.
2046 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2052 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2047 cachedelta[1])
2053 cachedelta[1])
2048 else:
2054 else:
2049 textlen = len(rawtext)
2055 textlen = len(rawtext)
2050
2056
2051 if deltacomputer is None:
2057 if deltacomputer is None:
2052 deltacomputer = deltautil.deltacomputer(self)
2058 deltacomputer = deltautil.deltacomputer(self)
2053
2059
2054 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2060 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2055
2061
2056 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2062 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2057
2063
2058 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2064 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2059 deltainfo.base, link, p1r, p2r, node)
2065 deltainfo.base, link, p1r, p2r, node)
2060 self.index.append(e)
2066 self.index.append(e)
2061 self.nodemap[node] = curr
2067 self.nodemap[node] = curr
2062
2068
2063 # Reset the pure node cache start lookup offset to account for new
2069 # Reset the pure node cache start lookup offset to account for new
2064 # revision.
2070 # revision.
2065 if self._nodepos is not None:
2071 if self._nodepos is not None:
2066 self._nodepos = curr
2072 self._nodepos = curr
2067
2073
2068 entry = self._io.packentry(e, self.node, self.version, curr)
2074 entry = self._io.packentry(e, self.node, self.version, curr)
2069 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2075 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2070 link, offset)
2076 link, offset)
2071
2077
2072 rawtext = btext[0]
2078 rawtext = btext[0]
2073
2079
2074 if alwayscache and rawtext is None:
2080 if alwayscache and rawtext is None:
2075 rawtext = deltacomputer.buildtext(revinfo, fh)
2081 rawtext = deltacomputer.buildtext(revinfo, fh)
2076
2082
2077 if type(rawtext) == bytes: # only accept immutable objects
2083 if type(rawtext) == bytes: # only accept immutable objects
2078 self._revisioncache = (node, curr, rawtext)
2084 self._revisioncache = (node, curr, rawtext)
2079 self._chainbasecache[curr] = deltainfo.chainbase
2085 self._chainbasecache[curr] = deltainfo.chainbase
2080 return node
2086 return node
2081
2087
2082 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2088 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2083 # Files opened in a+ mode have inconsistent behavior on various
2089 # Files opened in a+ mode have inconsistent behavior on various
2084 # platforms. Windows requires that a file positioning call be made
2090 # platforms. Windows requires that a file positioning call be made
2085 # when the file handle transitions between reads and writes. See
2091 # when the file handle transitions between reads and writes. See
2086 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2092 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2087 # platforms, Python or the platform itself can be buggy. Some versions
2093 # platforms, Python or the platform itself can be buggy. Some versions
2088 # of Solaris have been observed to not append at the end of the file
2094 # of Solaris have been observed to not append at the end of the file
2089 # if the file was seeked to before the end. See issue4943 for more.
2095 # if the file was seeked to before the end. See issue4943 for more.
2090 #
2096 #
2091 # We work around this issue by inserting a seek() before writing.
2097 # We work around this issue by inserting a seek() before writing.
2092 # Note: This is likely not necessary on Python 3. However, because
2098 # Note: This is likely not necessary on Python 3. However, because
2093 # the file handle is reused for reads and may be seeked there, we need
2099 # the file handle is reused for reads and may be seeked there, we need
2094 # to be careful before changing this.
2100 # to be careful before changing this.
2095 ifh.seek(0, os.SEEK_END)
2101 ifh.seek(0, os.SEEK_END)
2096 if dfh:
2102 if dfh:
2097 dfh.seek(0, os.SEEK_END)
2103 dfh.seek(0, os.SEEK_END)
2098
2104
2099 curr = len(self) - 1
2105 curr = len(self) - 1
2100 if not self._inline:
2106 if not self._inline:
2101 transaction.add(self.datafile, offset)
2107 transaction.add(self.datafile, offset)
2102 transaction.add(self.indexfile, curr * len(entry))
2108 transaction.add(self.indexfile, curr * len(entry))
2103 if data[0]:
2109 if data[0]:
2104 dfh.write(data[0])
2110 dfh.write(data[0])
2105 dfh.write(data[1])
2111 dfh.write(data[1])
2106 ifh.write(entry)
2112 ifh.write(entry)
2107 else:
2113 else:
2108 offset += curr * self._io.size
2114 offset += curr * self._io.size
2109 transaction.add(self.indexfile, offset, curr)
2115 transaction.add(self.indexfile, offset, curr)
2110 ifh.write(entry)
2116 ifh.write(entry)
2111 ifh.write(data[0])
2117 ifh.write(data[0])
2112 ifh.write(data[1])
2118 ifh.write(data[1])
2113 self._enforceinlinesize(transaction, ifh)
2119 self._enforceinlinesize(transaction, ifh)
2114
2120
2115 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2121 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2116 """
2122 """
2117 add a delta group
2123 add a delta group
2118
2124
2119 given a set of deltas, add them to the revision log. the
2125 given a set of deltas, add them to the revision log. the
2120 first delta is against its parent, which should be in our
2126 first delta is against its parent, which should be in our
2121 log, the rest are against the previous delta.
2127 log, the rest are against the previous delta.
2122
2128
2123 If ``addrevisioncb`` is defined, it will be called with arguments of
2129 If ``addrevisioncb`` is defined, it will be called with arguments of
2124 this revlog and the node that was added.
2130 this revlog and the node that was added.
2125 """
2131 """
2126
2132
2127 if self._writinghandles:
2133 if self._writinghandles:
2128 raise error.ProgrammingError('cannot nest addgroup() calls')
2134 raise error.ProgrammingError('cannot nest addgroup() calls')
2129
2135
2130 nodes = []
2136 nodes = []
2131
2137
2132 r = len(self)
2138 r = len(self)
2133 end = 0
2139 end = 0
2134 if r:
2140 if r:
2135 end = self.end(r - 1)
2141 end = self.end(r - 1)
2136 ifh = self._indexfp("a+")
2142 ifh = self._indexfp("a+")
2137 isize = r * self._io.size
2143 isize = r * self._io.size
2138 if self._inline:
2144 if self._inline:
2139 transaction.add(self.indexfile, end + isize, r)
2145 transaction.add(self.indexfile, end + isize, r)
2140 dfh = None
2146 dfh = None
2141 else:
2147 else:
2142 transaction.add(self.indexfile, isize, r)
2148 transaction.add(self.indexfile, isize, r)
2143 transaction.add(self.datafile, end)
2149 transaction.add(self.datafile, end)
2144 dfh = self._datafp("a+")
2150 dfh = self._datafp("a+")
2145 def flush():
2151 def flush():
2146 if dfh:
2152 if dfh:
2147 dfh.flush()
2153 dfh.flush()
2148 ifh.flush()
2154 ifh.flush()
2149
2155
2150 self._writinghandles = (ifh, dfh)
2156 self._writinghandles = (ifh, dfh)
2151
2157
2152 try:
2158 try:
2153 deltacomputer = deltautil.deltacomputer(self)
2159 deltacomputer = deltautil.deltacomputer(self)
2154 # loop through our set of deltas
2160 # loop through our set of deltas
2155 for data in deltas:
2161 for data in deltas:
2156 node, p1, p2, linknode, deltabase, delta, flags = data
2162 node, p1, p2, linknode, deltabase, delta, flags = data
2157 link = linkmapper(linknode)
2163 link = linkmapper(linknode)
2158 flags = flags or REVIDX_DEFAULT_FLAGS
2164 flags = flags or REVIDX_DEFAULT_FLAGS
2159
2165
2160 nodes.append(node)
2166 nodes.append(node)
2161
2167
2162 if node in self.nodemap:
2168 if node in self.nodemap:
2163 self._nodeduplicatecallback(transaction, node)
2169 self._nodeduplicatecallback(transaction, node)
2164 # this can happen if two branches make the same change
2170 # this can happen if two branches make the same change
2165 continue
2171 continue
2166
2172
2167 for p in (p1, p2):
2173 for p in (p1, p2):
2168 if p not in self.nodemap:
2174 if p not in self.nodemap:
2169 raise error.LookupError(p, self.indexfile,
2175 raise error.LookupError(p, self.indexfile,
2170 _('unknown parent'))
2176 _('unknown parent'))
2171
2177
2172 if deltabase not in self.nodemap:
2178 if deltabase not in self.nodemap:
2173 raise error.LookupError(deltabase, self.indexfile,
2179 raise error.LookupError(deltabase, self.indexfile,
2174 _('unknown delta base'))
2180 _('unknown delta base'))
2175
2181
2176 baserev = self.rev(deltabase)
2182 baserev = self.rev(deltabase)
2177
2183
2178 if baserev != nullrev and self.iscensored(baserev):
2184 if baserev != nullrev and self.iscensored(baserev):
2179 # if base is censored, delta must be full replacement in a
2185 # if base is censored, delta must be full replacement in a
2180 # single patch operation
2186 # single patch operation
2181 hlen = struct.calcsize(">lll")
2187 hlen = struct.calcsize(">lll")
2182 oldlen = self.rawsize(baserev)
2188 oldlen = self.rawsize(baserev)
2183 newlen = len(delta) - hlen
2189 newlen = len(delta) - hlen
2184 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2190 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2185 raise error.CensoredBaseError(self.indexfile,
2191 raise error.CensoredBaseError(self.indexfile,
2186 self.node(baserev))
2192 self.node(baserev))
2187
2193
2188 if not flags and self._peek_iscensored(baserev, delta, flush):
2194 if not flags and self._peek_iscensored(baserev, delta, flush):
2189 flags |= REVIDX_ISCENSORED
2195 flags |= REVIDX_ISCENSORED
2190
2196
2191 # We assume consumers of addrevisioncb will want to retrieve
2197 # We assume consumers of addrevisioncb will want to retrieve
2192 # the added revision, which will require a call to
2198 # the added revision, which will require a call to
2193 # revision(). revision() will fast path if there is a cache
2199 # revision(). revision() will fast path if there is a cache
2194 # hit. So, we tell _addrevision() to always cache in this case.
2200 # hit. So, we tell _addrevision() to always cache in this case.
2195 # We're only using addgroup() in the context of changegroup
2201 # We're only using addgroup() in the context of changegroup
2196 # generation so the revision data can always be handled as raw
2202 # generation so the revision data can always be handled as raw
2197 # by the flagprocessor.
2203 # by the flagprocessor.
2198 self._addrevision(node, None, transaction, link,
2204 self._addrevision(node, None, transaction, link,
2199 p1, p2, flags, (baserev, delta),
2205 p1, p2, flags, (baserev, delta),
2200 ifh, dfh,
2206 ifh, dfh,
2201 alwayscache=bool(addrevisioncb),
2207 alwayscache=bool(addrevisioncb),
2202 deltacomputer=deltacomputer)
2208 deltacomputer=deltacomputer)
2203
2209
2204 if addrevisioncb:
2210 if addrevisioncb:
2205 addrevisioncb(self, node)
2211 addrevisioncb(self, node)
2206
2212
2207 if not dfh and not self._inline:
2213 if not dfh and not self._inline:
2208 # addrevision switched from inline to conventional
2214 # addrevision switched from inline to conventional
2209 # reopen the index
2215 # reopen the index
2210 ifh.close()
2216 ifh.close()
2211 dfh = self._datafp("a+")
2217 dfh = self._datafp("a+")
2212 ifh = self._indexfp("a+")
2218 ifh = self._indexfp("a+")
2213 self._writinghandles = (ifh, dfh)
2219 self._writinghandles = (ifh, dfh)
2214 finally:
2220 finally:
2215 self._writinghandles = None
2221 self._writinghandles = None
2216
2222
2217 if dfh:
2223 if dfh:
2218 dfh.close()
2224 dfh.close()
2219 ifh.close()
2225 ifh.close()
2220
2226
2221 return nodes
2227 return nodes
2222
2228
2223 def iscensored(self, rev):
2229 def iscensored(self, rev):
2224 """Check if a file revision is censored."""
2230 """Check if a file revision is censored."""
2225 if not self._censorable:
2231 if not self._censorable:
2226 return False
2232 return False
2227
2233
2228 return self.flags(rev) & REVIDX_ISCENSORED
2234 return self.flags(rev) & REVIDX_ISCENSORED
2229
2235
2230 def _peek_iscensored(self, baserev, delta, flush):
2236 def _peek_iscensored(self, baserev, delta, flush):
2231 """Quickly check if a delta produces a censored revision."""
2237 """Quickly check if a delta produces a censored revision."""
2232 if not self._censorable:
2238 if not self._censorable:
2233 return False
2239 return False
2234
2240
2235 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2241 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2236
2242
2237 def getstrippoint(self, minlink):
2243 def getstrippoint(self, minlink):
2238 """find the minimum rev that must be stripped to strip the linkrev
2244 """find the minimum rev that must be stripped to strip the linkrev
2239
2245
2240 Returns a tuple containing the minimum rev and a set of all revs that
2246 Returns a tuple containing the minimum rev and a set of all revs that
2241 have linkrevs that will be broken by this strip.
2247 have linkrevs that will be broken by this strip.
2242 """
2248 """
2243 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2249 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2244 self.headrevs(),
2250 self.headrevs(),
2245 self.linkrev, self.parentrevs)
2251 self.linkrev, self.parentrevs)
2246
2252
2247 def strip(self, minlink, transaction):
2253 def strip(self, minlink, transaction):
2248 """truncate the revlog on the first revision with a linkrev >= minlink
2254 """truncate the revlog on the first revision with a linkrev >= minlink
2249
2255
2250 This function is called when we're stripping revision minlink and
2256 This function is called when we're stripping revision minlink and
2251 its descendants from the repository.
2257 its descendants from the repository.
2252
2258
2253 We have to remove all revisions with linkrev >= minlink, because
2259 We have to remove all revisions with linkrev >= minlink, because
2254 the equivalent changelog revisions will be renumbered after the
2260 the equivalent changelog revisions will be renumbered after the
2255 strip.
2261 strip.
2256
2262
2257 So we truncate the revlog on the first of these revisions, and
2263 So we truncate the revlog on the first of these revisions, and
2258 trust that the caller has saved the revisions that shouldn't be
2264 trust that the caller has saved the revisions that shouldn't be
2259 removed and that it'll re-add them after this truncation.
2265 removed and that it'll re-add them after this truncation.
2260 """
2266 """
2261 if len(self) == 0:
2267 if len(self) == 0:
2262 return
2268 return
2263
2269
2264 rev, _ = self.getstrippoint(minlink)
2270 rev, _ = self.getstrippoint(minlink)
2265 if rev == len(self):
2271 if rev == len(self):
2266 return
2272 return
2267
2273
2268 # first truncate the files on disk
2274 # first truncate the files on disk
2269 end = self.start(rev)
2275 end = self.start(rev)
2270 if not self._inline:
2276 if not self._inline:
2271 transaction.add(self.datafile, end)
2277 transaction.add(self.datafile, end)
2272 end = rev * self._io.size
2278 end = rev * self._io.size
2273 else:
2279 else:
2274 end += rev * self._io.size
2280 end += rev * self._io.size
2275
2281
2276 transaction.add(self.indexfile, end)
2282 transaction.add(self.indexfile, end)
2277
2283
2278 # then reset internal state in memory to forget those revisions
2284 # then reset internal state in memory to forget those revisions
2279 self._revisioncache = None
2285 self._revisioncache = None
2280 self._chaininfocache = {}
2286 self._chaininfocache = {}
2281 self._chunkclear()
2287 self._chunkclear()
2282 for x in pycompat.xrange(rev, len(self)):
2288 for x in pycompat.xrange(rev, len(self)):
2283 del self.nodemap[self.node(x)]
2289 del self.nodemap[self.node(x)]
2284
2290
2285 del self.index[rev:-1]
2291 del self.index[rev:-1]
2286 self._nodepos = None
2292 self._nodepos = None
2287
2293
2288 def checksize(self):
2294 def checksize(self):
2289 """Check size of index and data files
2295 """Check size of index and data files
2290
2296
2291 return a (dd, di) tuple.
2297 return a (dd, di) tuple.
2292 - dd: extra bytes for the "data" file
2298 - dd: extra bytes for the "data" file
2293 - di: extra bytes for the "index" file
2299 - di: extra bytes for the "index" file
2294
2300
2295 A healthy revlog will return (0, 0).
2301 A healthy revlog will return (0, 0).
2296 """
2302 """
2297 expected = 0
2303 expected = 0
2298 if len(self):
2304 if len(self):
2299 expected = max(0, self.end(len(self) - 1))
2305 expected = max(0, self.end(len(self) - 1))
2300
2306
2301 try:
2307 try:
2302 with self._datafp() as f:
2308 with self._datafp() as f:
2303 f.seek(0, 2)
2309 f.seek(0, 2)
2304 actual = f.tell()
2310 actual = f.tell()
2305 dd = actual - expected
2311 dd = actual - expected
2306 except IOError as inst:
2312 except IOError as inst:
2307 if inst.errno != errno.ENOENT:
2313 if inst.errno != errno.ENOENT:
2308 raise
2314 raise
2309 dd = 0
2315 dd = 0
2310
2316
2311 try:
2317 try:
2312 f = self.opener(self.indexfile)
2318 f = self.opener(self.indexfile)
2313 f.seek(0, 2)
2319 f.seek(0, 2)
2314 actual = f.tell()
2320 actual = f.tell()
2315 f.close()
2321 f.close()
2316 s = self._io.size
2322 s = self._io.size
2317 i = max(0, actual // s)
2323 i = max(0, actual // s)
2318 di = actual - (i * s)
2324 di = actual - (i * s)
2319 if self._inline:
2325 if self._inline:
2320 databytes = 0
2326 databytes = 0
2321 for r in self:
2327 for r in self:
2322 databytes += max(0, self.length(r))
2328 databytes += max(0, self.length(r))
2323 dd = 0
2329 dd = 0
2324 di = actual - len(self) * s - databytes
2330 di = actual - len(self) * s - databytes
2325 except IOError as inst:
2331 except IOError as inst:
2326 if inst.errno != errno.ENOENT:
2332 if inst.errno != errno.ENOENT:
2327 raise
2333 raise
2328 di = 0
2334 di = 0
2329
2335
2330 return (dd, di)
2336 return (dd, di)
2331
2337
2332 def files(self):
2338 def files(self):
2333 res = [self.indexfile]
2339 res = [self.indexfile]
2334 if not self._inline:
2340 if not self._inline:
2335 res.append(self.datafile)
2341 res.append(self.datafile)
2336 return res
2342 return res
2337
2343
2338 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2344 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2339 assumehaveparentrevisions=False,
2345 assumehaveparentrevisions=False,
2340 deltamode=repository.CG_DELTAMODE_STD):
2346 deltamode=repository.CG_DELTAMODE_STD):
2341 if nodesorder not in ('nodes', 'storage', 'linear', None):
2347 if nodesorder not in ('nodes', 'storage', 'linear', None):
2342 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2348 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2343 nodesorder)
2349 nodesorder)
2344
2350
2345 if nodesorder is None and not self._generaldelta:
2351 if nodesorder is None and not self._generaldelta:
2346 nodesorder = 'storage'
2352 nodesorder = 'storage'
2347
2353
2348 if (not self._storedeltachains and
2354 if (not self._storedeltachains and
2349 deltamode != repository.CG_DELTAMODE_PREV):
2355 deltamode != repository.CG_DELTAMODE_PREV):
2350 deltamode = repository.CG_DELTAMODE_FULL
2356 deltamode = repository.CG_DELTAMODE_FULL
2351
2357
2352 return storageutil.emitrevisions(
2358 return storageutil.emitrevisions(
2353 self, nodes, nodesorder, revlogrevisiondelta,
2359 self, nodes, nodesorder, revlogrevisiondelta,
2354 deltaparentfn=self.deltaparent,
2360 deltaparentfn=self.deltaparent,
2355 candeltafn=self.candelta,
2361 candeltafn=self.candelta,
2356 rawsizefn=self.rawsize,
2362 rawsizefn=self.rawsize,
2357 revdifffn=self.revdiff,
2363 revdifffn=self.revdiff,
2358 flagsfn=self.flags,
2364 flagsfn=self.flags,
2359 deltamode=deltamode,
2365 deltamode=deltamode,
2360 revisiondata=revisiondata,
2366 revisiondata=revisiondata,
2361 assumehaveparentrevisions=assumehaveparentrevisions)
2367 assumehaveparentrevisions=assumehaveparentrevisions)
2362
2368
2363 DELTAREUSEALWAYS = 'always'
2369 DELTAREUSEALWAYS = 'always'
2364 DELTAREUSESAMEREVS = 'samerevs'
2370 DELTAREUSESAMEREVS = 'samerevs'
2365 DELTAREUSENEVER = 'never'
2371 DELTAREUSENEVER = 'never'
2366
2372
2367 DELTAREUSEFULLADD = 'fulladd'
2373 DELTAREUSEFULLADD = 'fulladd'
2368
2374
2369 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2375 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2370
2376
2371 def clone(self, tr, destrevlog, addrevisioncb=None,
2377 def clone(self, tr, destrevlog, addrevisioncb=None,
2372 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2378 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2373 """Copy this revlog to another, possibly with format changes.
2379 """Copy this revlog to another, possibly with format changes.
2374
2380
2375 The destination revlog will contain the same revisions and nodes.
2381 The destination revlog will contain the same revisions and nodes.
2376 However, it may not be bit-for-bit identical due to e.g. delta encoding
2382 However, it may not be bit-for-bit identical due to e.g. delta encoding
2377 differences.
2383 differences.
2378
2384
2379 The ``deltareuse`` argument control how deltas from the existing revlog
2385 The ``deltareuse`` argument control how deltas from the existing revlog
2380 are preserved in the destination revlog. The argument can have the
2386 are preserved in the destination revlog. The argument can have the
2381 following values:
2387 following values:
2382
2388
2383 DELTAREUSEALWAYS
2389 DELTAREUSEALWAYS
2384 Deltas will always be reused (if possible), even if the destination
2390 Deltas will always be reused (if possible), even if the destination
2385 revlog would not select the same revisions for the delta. This is the
2391 revlog would not select the same revisions for the delta. This is the
2386 fastest mode of operation.
2392 fastest mode of operation.
2387 DELTAREUSESAMEREVS
2393 DELTAREUSESAMEREVS
2388 Deltas will be reused if the destination revlog would pick the same
2394 Deltas will be reused if the destination revlog would pick the same
2389 revisions for the delta. This mode strikes a balance between speed
2395 revisions for the delta. This mode strikes a balance between speed
2390 and optimization.
2396 and optimization.
2391 DELTAREUSENEVER
2397 DELTAREUSENEVER
2392 Deltas will never be reused. This is the slowest mode of execution.
2398 Deltas will never be reused. This is the slowest mode of execution.
2393 This mode can be used to recompute deltas (e.g. if the diff/delta
2399 This mode can be used to recompute deltas (e.g. if the diff/delta
2394 algorithm changes).
2400 algorithm changes).
2395
2401
2396 Delta computation can be slow, so the choice of delta reuse policy can
2402 Delta computation can be slow, so the choice of delta reuse policy can
2397 significantly affect run time.
2403 significantly affect run time.
2398
2404
2399 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2405 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2400 two extremes. Deltas will be reused if they are appropriate. But if the
2406 two extremes. Deltas will be reused if they are appropriate. But if the
2401 delta could choose a better revision, it will do so. This means if you
2407 delta could choose a better revision, it will do so. This means if you
2402 are converting a non-generaldelta revlog to a generaldelta revlog,
2408 are converting a non-generaldelta revlog to a generaldelta revlog,
2403 deltas will be recomputed if the delta's parent isn't a parent of the
2409 deltas will be recomputed if the delta's parent isn't a parent of the
2404 revision.
2410 revision.
2405
2411
2406 In addition to the delta policy, the ``forcedeltabothparents``
2412 In addition to the delta policy, the ``forcedeltabothparents``
2407 argument controls whether to force compute deltas against both parents
2413 argument controls whether to force compute deltas against both parents
2408 for merges. By default, the current default is used.
2414 for merges. By default, the current default is used.
2409 """
2415 """
2410 if deltareuse not in self.DELTAREUSEALL:
2416 if deltareuse not in self.DELTAREUSEALL:
2411 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2417 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2412
2418
2413 if len(destrevlog):
2419 if len(destrevlog):
2414 raise ValueError(_('destination revlog is not empty'))
2420 raise ValueError(_('destination revlog is not empty'))
2415
2421
2416 if getattr(self, 'filteredrevs', None):
2422 if getattr(self, 'filteredrevs', None):
2417 raise ValueError(_('source revlog has filtered revisions'))
2423 raise ValueError(_('source revlog has filtered revisions'))
2418 if getattr(destrevlog, 'filteredrevs', None):
2424 if getattr(destrevlog, 'filteredrevs', None):
2419 raise ValueError(_('destination revlog has filtered revisions'))
2425 raise ValueError(_('destination revlog has filtered revisions'))
2420
2426
2421 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2427 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2422 # if possible.
2428 # if possible.
2423 oldlazydelta = destrevlog._lazydelta
2429 oldlazydelta = destrevlog._lazydelta
2424 oldlazydeltabase = destrevlog._lazydeltabase
2430 oldlazydeltabase = destrevlog._lazydeltabase
2425 oldamd = destrevlog._deltabothparents
2431 oldamd = destrevlog._deltabothparents
2426
2432
2427 try:
2433 try:
2428 if deltareuse == self.DELTAREUSEALWAYS:
2434 if deltareuse == self.DELTAREUSEALWAYS:
2429 destrevlog._lazydeltabase = True
2435 destrevlog._lazydeltabase = True
2430 destrevlog._lazydelta = True
2436 destrevlog._lazydelta = True
2431 elif deltareuse == self.DELTAREUSESAMEREVS:
2437 elif deltareuse == self.DELTAREUSESAMEREVS:
2432 destrevlog._lazydeltabase = False
2438 destrevlog._lazydeltabase = False
2433 destrevlog._lazydelta = True
2439 destrevlog._lazydelta = True
2434 elif deltareuse == self.DELTAREUSENEVER:
2440 elif deltareuse == self.DELTAREUSENEVER:
2435 destrevlog._lazydeltabase = False
2441 destrevlog._lazydeltabase = False
2436 destrevlog._lazydelta = False
2442 destrevlog._lazydelta = False
2437
2443
2438 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2444 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2439
2445
2440 deltacomputer = deltautil.deltacomputer(destrevlog)
2446 deltacomputer = deltautil.deltacomputer(destrevlog)
2441 index = self.index
2447 index = self.index
2442 for rev in self:
2448 for rev in self:
2443 entry = index[rev]
2449 entry = index[rev]
2444
2450
2445 # Some classes override linkrev to take filtered revs into
2451 # Some classes override linkrev to take filtered revs into
2446 # account. Use raw entry from index.
2452 # account. Use raw entry from index.
2447 flags = entry[0] & 0xffff
2453 flags = entry[0] & 0xffff
2448 linkrev = entry[4]
2454 linkrev = entry[4]
2449 p1 = index[entry[5]][7]
2455 p1 = index[entry[5]][7]
2450 p2 = index[entry[6]][7]
2456 p2 = index[entry[6]][7]
2451 node = entry[7]
2457 node = entry[7]
2452
2458
2453 # (Possibly) reuse the delta from the revlog if allowed and
2459 # (Possibly) reuse the delta from the revlog if allowed and
2454 # the revlog chunk is a delta.
2460 # the revlog chunk is a delta.
2455 cachedelta = None
2461 cachedelta = None
2456 rawtext = None
2462 rawtext = None
2457 if destrevlog._lazydelta:
2463 if destrevlog._lazydelta:
2458 dp = self.deltaparent(rev)
2464 dp = self.deltaparent(rev)
2459 if dp != nullrev:
2465 if dp != nullrev:
2460 cachedelta = (dp, bytes(self._chunk(rev)))
2466 cachedelta = (dp, bytes(self._chunk(rev)))
2461
2467
2462 if not cachedelta:
2468 if not cachedelta:
2463 rawtext = self.revision(rev, raw=True)
2469 rawtext = self.revision(rev, raw=True)
2464
2470
2465
2471
2466 if deltareuse == self.DELTAREUSEFULLADD:
2472 if deltareuse == self.DELTAREUSEFULLADD:
2467 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2473 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2468 cachedelta=cachedelta,
2474 cachedelta=cachedelta,
2469 node=node, flags=flags,
2475 node=node, flags=flags,
2470 deltacomputer=deltacomputer)
2476 deltacomputer=deltacomputer)
2471 else:
2477 else:
2472 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2478 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2473 checkambig=False)
2479 checkambig=False)
2474 dfh = None
2480 dfh = None
2475 if not destrevlog._inline:
2481 if not destrevlog._inline:
2476 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2482 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2477 try:
2483 try:
2478 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2484 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2479 p2, flags, cachedelta, ifh, dfh,
2485 p2, flags, cachedelta, ifh, dfh,
2480 deltacomputer=deltacomputer)
2486 deltacomputer=deltacomputer)
2481 finally:
2487 finally:
2482 if dfh:
2488 if dfh:
2483 dfh.close()
2489 dfh.close()
2484 ifh.close()
2490 ifh.close()
2485
2491
2486 if addrevisioncb:
2492 if addrevisioncb:
2487 addrevisioncb(self, rev, node)
2493 addrevisioncb(self, rev, node)
2488 finally:
2494 finally:
2489 destrevlog._lazydelta = oldlazydelta
2495 destrevlog._lazydelta = oldlazydelta
2490 destrevlog._lazydeltabase = oldlazydeltabase
2496 destrevlog._lazydeltabase = oldlazydeltabase
2491 destrevlog._deltabothparents = oldamd
2497 destrevlog._deltabothparents = oldamd
2492
2498
2493 def censorrevision(self, tr, censornode, tombstone=b''):
2499 def censorrevision(self, tr, censornode, tombstone=b''):
2494 if (self.version & 0xFFFF) == REVLOGV0:
2500 if (self.version & 0xFFFF) == REVLOGV0:
2495 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2501 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2496 self.version)
2502 self.version)
2497
2503
2498 censorrev = self.rev(censornode)
2504 censorrev = self.rev(censornode)
2499 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2505 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2500
2506
2501 if len(tombstone) > self.rawsize(censorrev):
2507 if len(tombstone) > self.rawsize(censorrev):
2502 raise error.Abort(_('censor tombstone must be no longer than '
2508 raise error.Abort(_('censor tombstone must be no longer than '
2503 'censored data'))
2509 'censored data'))
2504
2510
2505 # Rewriting the revlog in place is hard. Our strategy for censoring is
2511 # Rewriting the revlog in place is hard. Our strategy for censoring is
2506 # to create a new revlog, copy all revisions to it, then replace the
2512 # to create a new revlog, copy all revisions to it, then replace the
2507 # revlogs on transaction close.
2513 # revlogs on transaction close.
2508
2514
2509 newindexfile = self.indexfile + b'.tmpcensored'
2515 newindexfile = self.indexfile + b'.tmpcensored'
2510 newdatafile = self.datafile + b'.tmpcensored'
2516 newdatafile = self.datafile + b'.tmpcensored'
2511
2517
2512 # This is a bit dangerous. We could easily have a mismatch of state.
2518 # This is a bit dangerous. We could easily have a mismatch of state.
2513 newrl = revlog(self.opener, newindexfile, newdatafile,
2519 newrl = revlog(self.opener, newindexfile, newdatafile,
2514 censorable=True)
2520 censorable=True)
2515 newrl.version = self.version
2521 newrl.version = self.version
2516 newrl._generaldelta = self._generaldelta
2522 newrl._generaldelta = self._generaldelta
2517 newrl._io = self._io
2523 newrl._io = self._io
2518
2524
2519 for rev in self.revs():
2525 for rev in self.revs():
2520 node = self.node(rev)
2526 node = self.node(rev)
2521 p1, p2 = self.parents(node)
2527 p1, p2 = self.parents(node)
2522
2528
2523 if rev == censorrev:
2529 if rev == censorrev:
2524 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2530 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2525 p1, p2, censornode, REVIDX_ISCENSORED)
2531 p1, p2, censornode, REVIDX_ISCENSORED)
2526
2532
2527 if newrl.deltaparent(rev) != nullrev:
2533 if newrl.deltaparent(rev) != nullrev:
2528 raise error.Abort(_('censored revision stored as delta; '
2534 raise error.Abort(_('censored revision stored as delta; '
2529 'cannot censor'),
2535 'cannot censor'),
2530 hint=_('censoring of revlogs is not '
2536 hint=_('censoring of revlogs is not '
2531 'fully implemented; please report '
2537 'fully implemented; please report '
2532 'this bug'))
2538 'this bug'))
2533 continue
2539 continue
2534
2540
2535 if self.iscensored(rev):
2541 if self.iscensored(rev):
2536 if self.deltaparent(rev) != nullrev:
2542 if self.deltaparent(rev) != nullrev:
2537 raise error.Abort(_('cannot censor due to censored '
2543 raise error.Abort(_('cannot censor due to censored '
2538 'revision having delta stored'))
2544 'revision having delta stored'))
2539 rawtext = self._chunk(rev)
2545 rawtext = self._chunk(rev)
2540 else:
2546 else:
2541 rawtext = self.revision(rev, raw=True)
2547 rawtext = self.revision(rev, raw=True)
2542
2548
2543 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2549 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2544 self.flags(rev))
2550 self.flags(rev))
2545
2551
2546 tr.addbackup(self.indexfile, location='store')
2552 tr.addbackup(self.indexfile, location='store')
2547 if not self._inline:
2553 if not self._inline:
2548 tr.addbackup(self.datafile, location='store')
2554 tr.addbackup(self.datafile, location='store')
2549
2555
2550 self.opener.rename(newrl.indexfile, self.indexfile)
2556 self.opener.rename(newrl.indexfile, self.indexfile)
2551 if not self._inline:
2557 if not self._inline:
2552 self.opener.rename(newrl.datafile, self.datafile)
2558 self.opener.rename(newrl.datafile, self.datafile)
2553
2559
2554 self.clearcaches()
2560 self.clearcaches()
2555 self._loadindex()
2561 self._loadindex()
2556
2562
2557 def verifyintegrity(self, state):
2563 def verifyintegrity(self, state):
2558 """Verifies the integrity of the revlog.
2564 """Verifies the integrity of the revlog.
2559
2565
2560 Yields ``revlogproblem`` instances describing problems that are
2566 Yields ``revlogproblem`` instances describing problems that are
2561 found.
2567 found.
2562 """
2568 """
2563 dd, di = self.checksize()
2569 dd, di = self.checksize()
2564 if dd:
2570 if dd:
2565 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2571 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2566 if di:
2572 if di:
2567 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2573 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2568
2574
2569 version = self.version & 0xFFFF
2575 version = self.version & 0xFFFF
2570
2576
2571 # The verifier tells us what version revlog we should be.
2577 # The verifier tells us what version revlog we should be.
2572 if version != state['expectedversion']:
2578 if version != state['expectedversion']:
2573 yield revlogproblem(
2579 yield revlogproblem(
2574 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2580 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2575 (self.indexfile, version, state['expectedversion']))
2581 (self.indexfile, version, state['expectedversion']))
2576
2582
2577 state['skipread'] = set()
2583 state['skipread'] = set()
2578
2584
2579 for rev in self:
2585 for rev in self:
2580 node = self.node(rev)
2586 node = self.node(rev)
2581
2587
2582 # Verify contents. 4 cases to care about:
2588 # Verify contents. 4 cases to care about:
2583 #
2589 #
2584 # common: the most common case
2590 # common: the most common case
2585 # rename: with a rename
2591 # rename: with a rename
2586 # meta: file content starts with b'\1\n', the metadata
2592 # meta: file content starts with b'\1\n', the metadata
2587 # header defined in filelog.py, but without a rename
2593 # header defined in filelog.py, but without a rename
2588 # ext: content stored externally
2594 # ext: content stored externally
2589 #
2595 #
2590 # More formally, their differences are shown below:
2596 # More formally, their differences are shown below:
2591 #
2597 #
2592 # | common | rename | meta | ext
2598 # | common | rename | meta | ext
2593 # -------------------------------------------------------
2599 # -------------------------------------------------------
2594 # flags() | 0 | 0 | 0 | not 0
2600 # flags() | 0 | 0 | 0 | not 0
2595 # renamed() | False | True | False | ?
2601 # renamed() | False | True | False | ?
2596 # rawtext[0:2]=='\1\n'| False | True | True | ?
2602 # rawtext[0:2]=='\1\n'| False | True | True | ?
2597 #
2603 #
2598 # "rawtext" means the raw text stored in revlog data, which
2604 # "rawtext" means the raw text stored in revlog data, which
2599 # could be retrieved by "revision(rev, raw=True)". "text"
2605 # could be retrieved by "revision(rev, raw=True)". "text"
2600 # mentioned below is "revision(rev, raw=False)".
2606 # mentioned below is "revision(rev, raw=False)".
2601 #
2607 #
2602 # There are 3 different lengths stored physically:
2608 # There are 3 different lengths stored physically:
2603 # 1. L1: rawsize, stored in revlog index
2609 # 1. L1: rawsize, stored in revlog index
2604 # 2. L2: len(rawtext), stored in revlog data
2610 # 2. L2: len(rawtext), stored in revlog data
2605 # 3. L3: len(text), stored in revlog data if flags==0, or
2611 # 3. L3: len(text), stored in revlog data if flags==0, or
2606 # possibly somewhere else if flags!=0
2612 # possibly somewhere else if flags!=0
2607 #
2613 #
2608 # L1 should be equal to L2. L3 could be different from them.
2614 # L1 should be equal to L2. L3 could be different from them.
2609 # "text" may or may not affect commit hash depending on flag
2615 # "text" may or may not affect commit hash depending on flag
2610 # processors (see revlog.addflagprocessor).
2616 # processors (see revlog.addflagprocessor).
2611 #
2617 #
2612 # | common | rename | meta | ext
2618 # | common | rename | meta | ext
2613 # -------------------------------------------------
2619 # -------------------------------------------------
2614 # rawsize() | L1 | L1 | L1 | L1
2620 # rawsize() | L1 | L1 | L1 | L1
2615 # size() | L1 | L2-LM | L1(*) | L1 (?)
2621 # size() | L1 | L2-LM | L1(*) | L1 (?)
2616 # len(rawtext) | L2 | L2 | L2 | L2
2622 # len(rawtext) | L2 | L2 | L2 | L2
2617 # len(text) | L2 | L2 | L2 | L3
2623 # len(text) | L2 | L2 | L2 | L3
2618 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2624 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2619 #
2625 #
2620 # LM: length of metadata, depending on rawtext
2626 # LM: length of metadata, depending on rawtext
2621 # (*): not ideal, see comment in filelog.size
2627 # (*): not ideal, see comment in filelog.size
2622 # (?): could be "- len(meta)" if the resolved content has
2628 # (?): could be "- len(meta)" if the resolved content has
2623 # rename metadata
2629 # rename metadata
2624 #
2630 #
2625 # Checks needed to be done:
2631 # Checks needed to be done:
2626 # 1. length check: L1 == L2, in all cases.
2632 # 1. length check: L1 == L2, in all cases.
2627 # 2. hash check: depending on flag processor, we may need to
2633 # 2. hash check: depending on flag processor, we may need to
2628 # use either "text" (external), or "rawtext" (in revlog).
2634 # use either "text" (external), or "rawtext" (in revlog).
2629
2635
2630 try:
2636 try:
2631 skipflags = state.get('skipflags', 0)
2637 skipflags = state.get('skipflags', 0)
2632 if skipflags:
2638 if skipflags:
2633 skipflags &= self.flags(rev)
2639 skipflags &= self.flags(rev)
2634
2640
2635 if skipflags:
2641 if skipflags:
2636 state['skipread'].add(node)
2642 state['skipread'].add(node)
2637 else:
2643 else:
2638 # Side-effect: read content and verify hash.
2644 # Side-effect: read content and verify hash.
2639 self.revision(node)
2645 self.revision(node)
2640
2646
2641 l1 = self.rawsize(rev)
2647 l1 = self.rawsize(rev)
2642 l2 = len(self.revision(node, raw=True))
2648 l2 = len(self.revision(node, raw=True))
2643
2649
2644 if l1 != l2:
2650 if l1 != l2:
2645 yield revlogproblem(
2651 yield revlogproblem(
2646 error=_('unpacked size is %d, %d expected') % (l2, l1),
2652 error=_('unpacked size is %d, %d expected') % (l2, l1),
2647 node=node)
2653 node=node)
2648
2654
2649 except error.CensoredNodeError:
2655 except error.CensoredNodeError:
2650 if state['erroroncensored']:
2656 if state['erroroncensored']:
2651 yield revlogproblem(error=_('censored file data'),
2657 yield revlogproblem(error=_('censored file data'),
2652 node=node)
2658 node=node)
2653 state['skipread'].add(node)
2659 state['skipread'].add(node)
2654 except Exception as e:
2660 except Exception as e:
2655 yield revlogproblem(
2661 yield revlogproblem(
2656 error=_('unpacking %s: %s') % (short(node),
2662 error=_('unpacking %s: %s') % (short(node),
2657 stringutil.forcebytestr(e)),
2663 stringutil.forcebytestr(e)),
2658 node=node)
2664 node=node)
2659 state['skipread'].add(node)
2665 state['skipread'].add(node)
2660
2666
2661 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2667 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2662 revisionscount=False, trackedsize=False,
2668 revisionscount=False, trackedsize=False,
2663 storedsize=False):
2669 storedsize=False):
2664 d = {}
2670 d = {}
2665
2671
2666 if exclusivefiles:
2672 if exclusivefiles:
2667 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2673 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2668 if not self._inline:
2674 if not self._inline:
2669 d['exclusivefiles'].append((self.opener, self.datafile))
2675 d['exclusivefiles'].append((self.opener, self.datafile))
2670
2676
2671 if sharedfiles:
2677 if sharedfiles:
2672 d['sharedfiles'] = []
2678 d['sharedfiles'] = []
2673
2679
2674 if revisionscount:
2680 if revisionscount:
2675 d['revisionscount'] = len(self)
2681 d['revisionscount'] = len(self)
2676
2682
2677 if trackedsize:
2683 if trackedsize:
2678 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2684 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2679
2685
2680 if storedsize:
2686 if storedsize:
2681 d['storedsize'] = sum(self.opener.stat(path).st_size
2687 d['storedsize'] = sum(self.opener.stat(path).st_size
2682 for path in self.files())
2688 for path in self.files())
2683
2689
2684 return d
2690 return d
General Comments 0
You need to be logged in to leave comments. Login now