##// END OF EJS Templates
mercurial: switch to util.timer for all interval timings...
Simon Farnsworth -
r30975:22fbca1d default
parent child Browse files
Show More
@@ -1,100 +1,98 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # hgperf - measure performance of Mercurial commands
4 4 #
5 5 # Copyright 2014 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 '''measure performance of Mercurial commands
11 11
12 12 Using ``hgperf`` instead of ``hg`` measures performance of the target
13 13 Mercurial command. For example, the execution below measures
14 14 performance of :hg:`heads --topo`::
15 15
16 16 $ hgperf heads --topo
17 17
18 18 All command output via ``ui`` is suppressed, and just measurement
19 19 result is displayed: see also "perf" extension in "contrib".
20 20
21 21 Costs of processing before dispatching to the command function like
22 22 below are not measured::
23 23
24 24 - parsing command line (e.g. option validity check)
25 25 - reading configuration files in
26 26
27 27 But ``pre-`` and ``post-`` hook invocation for the target command is
28 28 measured, even though these are invoked before or after dispatching to
29 29 the command function, because these may be required to repeat
30 30 execution of the target command correctly.
31 31 '''
32 32
33 33 import os
34 34 import sys
35 35
36 36 libdir = '@LIBDIR@'
37 37
38 38 if libdir != '@' 'LIBDIR' '@':
39 39 if not os.path.isabs(libdir):
40 40 libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
41 41 libdir)
42 42 libdir = os.path.abspath(libdir)
43 43 sys.path.insert(0, libdir)
44 44
45 45 # enable importing on demand to reduce startup time
46 46 try:
47 47 from mercurial import demandimport; demandimport.enable()
48 48 except ImportError:
49 49 import sys
50 50 sys.stderr.write("abort: couldn't find mercurial libraries in [%s]\n" %
51 51 ' '.join(sys.path))
52 52 sys.stderr.write("(check your install and PYTHONPATH)\n")
53 53 sys.exit(-1)
54 54
55 55 import mercurial.util
56 56 import mercurial.dispatch
57 57
58 import time
59
60 58 def timer(func, title=None):
61 59 results = []
62 begin = time.time()
60 begin = mercurial.util.timer()
63 61 count = 0
64 62 while True:
65 63 ostart = os.times()
66 cstart = time.time()
64 cstart = mercurial.util.timer()
67 65 r = func()
68 cstop = time.time()
66 cstop = mercurial.util.timer()
69 67 ostop = os.times()
70 68 count += 1
71 69 a, b = ostart, ostop
72 70 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
73 71 if cstop - begin > 3 and count >= 100:
74 72 break
75 73 if cstop - begin > 10 and count >= 3:
76 74 break
77 75 if title:
78 76 sys.stderr.write("! %s\n" % title)
79 77 if r:
80 78 sys.stderr.write("! result: %s\n" % r)
81 79 m = min(results)
82 80 sys.stderr.write("! wall %f comb %f user %f sys %f (best of %d)\n"
83 81 % (m[0], m[1] + m[2], m[1], m[2], count))
84 82
85 83 orgruncommand = mercurial.dispatch.runcommand
86 84
87 85 def runcommand(lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions):
88 86 ui.pushbuffer()
89 87 lui.pushbuffer()
90 88 timer(lambda : orgruncommand(lui, repo, cmd, fullargs, ui,
91 89 options, d, cmdpats, cmdoptions))
92 90 ui.popbuffer()
93 91 lui.popbuffer()
94 92
95 93 mercurial.dispatch.runcommand = runcommand
96 94
97 95 for fp in (sys.stdin, sys.stdout, sys.stderr):
98 96 mercurial.util.setbinary(fp)
99 97
100 98 mercurial.dispatch.run()
@@ -1,1285 +1,1285 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 # "historical portability" policy of perf.py:
5 5 #
6 6 # We have to do:
7 7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 10 # - make historical perf command work correctly with as wide Mercurial
11 11 # version as possible
12 12 #
13 13 # We have to do, if possible with reasonable cost:
14 14 # - make recent perf command for historical feature work correctly
15 15 # with early Mercurial
16 16 #
17 17 # We don't have to do:
18 18 # - make perf command for recent feature work correctly with early
19 19 # Mercurial
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 23 import os
24 24 import random
25 25 import sys
26 26 import time
27 27 from mercurial import (
28 28 bdiff,
29 29 changegroup,
30 30 cmdutil,
31 31 commands,
32 32 copies,
33 33 error,
34 34 extensions,
35 35 mdiff,
36 36 merge,
37 37 util,
38 38 )
39 39
40 40 # for "historical portability":
41 41 # try to import modules separately (in dict order), and ignore
42 42 # failure, because these aren't available with early Mercurial
43 43 try:
44 44 from mercurial import branchmap # since 2.5 (or bcee63733aad)
45 45 except ImportError:
46 46 pass
47 47 try:
48 48 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
49 49 except ImportError:
50 50 pass
51 51 try:
52 52 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
53 53 except ImportError:
54 54 pass
55 55 try:
56 56 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
57 57 except ImportError:
58 58 pass
59 59
60 60 # for "historical portability":
61 61 # define util.safehasattr forcibly, because util.safehasattr has been
62 62 # available since 1.9.3 (or 94b200a11cf7)
63 63 _undefined = object()
64 64 def safehasattr(thing, attr):
65 65 return getattr(thing, attr, _undefined) is not _undefined
66 66 setattr(util, 'safehasattr', safehasattr)
67 67
68 68 # for "historical portability":
69 69 # use locally defined empty option list, if formatteropts isn't
70 70 # available, because commands.formatteropts has been available since
71 71 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
72 72 # available since 2.2 (or ae5f92e154d3)
73 73 formatteropts = getattr(commands, "formatteropts", [])
74 74
75 75 # for "historical portability":
76 76 # use locally defined option list, if debugrevlogopts isn't available,
77 77 # because commands.debugrevlogopts has been available since 3.7 (or
78 78 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
79 79 # since 1.9 (or a79fea6b3e77).
80 80 revlogopts = getattr(commands, "debugrevlogopts", [
81 81 ('c', 'changelog', False, ('open changelog')),
82 82 ('m', 'manifest', False, ('open manifest')),
83 83 ('', 'dir', False, ('open directory manifest')),
84 84 ])
85 85
86 86 cmdtable = {}
87 87
88 88 # for "historical portability":
89 89 # define parsealiases locally, because cmdutil.parsealiases has been
90 90 # available since 1.5 (or 6252852b4332)
91 91 def parsealiases(cmd):
92 92 return cmd.lstrip("^").split("|")
93 93
94 94 if safehasattr(cmdutil, 'command'):
95 95 import inspect
96 96 command = cmdutil.command(cmdtable)
97 97 if 'norepo' not in inspect.getargspec(command)[0]:
98 98 # for "historical portability":
99 99 # wrap original cmdutil.command, because "norepo" option has
100 100 # been available since 3.1 (or 75a96326cecb)
101 101 _command = command
102 102 def command(name, options=(), synopsis=None, norepo=False):
103 103 if norepo:
104 104 commands.norepo += ' %s' % ' '.join(parsealiases(name))
105 105 return _command(name, list(options), synopsis)
106 106 else:
107 107 # for "historical portability":
108 108 # define "@command" annotation locally, because cmdutil.command
109 109 # has been available since 1.9 (or 2daa5179e73f)
110 110 def command(name, options=(), synopsis=None, norepo=False):
111 111 def decorator(func):
112 112 if synopsis:
113 113 cmdtable[name] = func, list(options), synopsis
114 114 else:
115 115 cmdtable[name] = func, list(options)
116 116 if norepo:
117 117 commands.norepo += ' %s' % ' '.join(parsealiases(name))
118 118 return func
119 119 return decorator
120 120
121 121 def getlen(ui):
122 122 if ui.configbool("perf", "stub"):
123 123 return lambda x: 1
124 124 return len
125 125
126 126 def gettimer(ui, opts=None):
127 127 """return a timer function and formatter: (timer, formatter)
128 128
129 129 This function exists to gather the creation of formatter in a single
130 130 place instead of duplicating it in all performance commands."""
131 131
132 132 # enforce an idle period before execution to counteract power management
133 133 # experimental config: perf.presleep
134 134 time.sleep(getint(ui, "perf", "presleep", 1))
135 135
136 136 if opts is None:
137 137 opts = {}
138 138 # redirect all to stderr unless buffer api is in use
139 139 if not ui._buffers:
140 140 ui = ui.copy()
141 141 uifout = safeattrsetter(ui, 'fout', ignoremissing=True)
142 142 if uifout:
143 143 # for "historical portability":
144 144 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
145 145 uifout.set(ui.ferr)
146 146
147 147 # get a formatter
148 148 uiformatter = getattr(ui, 'formatter', None)
149 149 if uiformatter:
150 150 fm = uiformatter('perf', opts)
151 151 else:
152 152 # for "historical portability":
153 153 # define formatter locally, because ui.formatter has been
154 154 # available since 2.2 (or ae5f92e154d3)
155 155 from mercurial import node
156 156 class defaultformatter(object):
157 157 """Minimized composition of baseformatter and plainformatter
158 158 """
159 159 def __init__(self, ui, topic, opts):
160 160 self._ui = ui
161 161 if ui.debugflag:
162 162 self.hexfunc = node.hex
163 163 else:
164 164 self.hexfunc = node.short
165 165 def __nonzero__(self):
166 166 return False
167 167 def startitem(self):
168 168 pass
169 169 def data(self, **data):
170 170 pass
171 171 def write(self, fields, deftext, *fielddata, **opts):
172 172 self._ui.write(deftext % fielddata, **opts)
173 173 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
174 174 if cond:
175 175 self._ui.write(deftext % fielddata, **opts)
176 176 def plain(self, text, **opts):
177 177 self._ui.write(text, **opts)
178 178 def end(self):
179 179 pass
180 180 fm = defaultformatter(ui, 'perf', opts)
181 181
182 182 # stub function, runs code only once instead of in a loop
183 183 # experimental config: perf.stub
184 184 if ui.configbool("perf", "stub"):
185 185 return functools.partial(stub_timer, fm), fm
186 186 return functools.partial(_timer, fm), fm
187 187
188 188 def stub_timer(fm, func, title=None):
189 189 func()
190 190
191 191 def _timer(fm, func, title=None):
192 192 results = []
193 begin = time.time()
193 begin = util.timer()
194 194 count = 0
195 195 while True:
196 196 ostart = os.times()
197 cstart = time.time()
197 cstart = util.timer()
198 198 r = func()
199 cstop = time.time()
199 cstop = util.timer()
200 200 ostop = os.times()
201 201 count += 1
202 202 a, b = ostart, ostop
203 203 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
204 204 if cstop - begin > 3 and count >= 100:
205 205 break
206 206 if cstop - begin > 10 and count >= 3:
207 207 break
208 208
209 209 fm.startitem()
210 210
211 211 if title:
212 212 fm.write('title', '! %s\n', title)
213 213 if r:
214 214 fm.write('result', '! result: %s\n', r)
215 215 m = min(results)
216 216 fm.plain('!')
217 217 fm.write('wall', ' wall %f', m[0])
218 218 fm.write('comb', ' comb %f', m[1] + m[2])
219 219 fm.write('user', ' user %f', m[1])
220 220 fm.write('sys', ' sys %f', m[2])
221 221 fm.write('count', ' (best of %d)', count)
222 222 fm.plain('\n')
223 223
224 224 # utilities for historical portability
225 225
226 226 def getint(ui, section, name, default):
227 227 # for "historical portability":
228 228 # ui.configint has been available since 1.9 (or fa2b596db182)
229 229 v = ui.config(section, name, None)
230 230 if v is None:
231 231 return default
232 232 try:
233 233 return int(v)
234 234 except ValueError:
235 235 raise error.ConfigError(("%s.%s is not an integer ('%s')")
236 236 % (section, name, v))
237 237
238 238 def safeattrsetter(obj, name, ignoremissing=False):
239 239 """Ensure that 'obj' has 'name' attribute before subsequent setattr
240 240
241 241 This function is aborted, if 'obj' doesn't have 'name' attribute
242 242 at runtime. This avoids overlooking removal of an attribute, which
243 243 breaks assumption of performance measurement, in the future.
244 244
245 245 This function returns the object to (1) assign a new value, and
246 246 (2) restore an original value to the attribute.
247 247
248 248 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
249 249 abortion, and this function returns None. This is useful to
250 250 examine an attribute, which isn't ensured in all Mercurial
251 251 versions.
252 252 """
253 253 if not util.safehasattr(obj, name):
254 254 if ignoremissing:
255 255 return None
256 256 raise error.Abort(("missing attribute %s of %s might break assumption"
257 257 " of performance measurement") % (name, obj))
258 258
259 259 origvalue = getattr(obj, name)
260 260 class attrutil(object):
261 261 def set(self, newvalue):
262 262 setattr(obj, name, newvalue)
263 263 def restore(self):
264 264 setattr(obj, name, origvalue)
265 265
266 266 return attrutil()
267 267
268 268 # utilities to examine each internal API changes
269 269
270 270 def getbranchmapsubsettable():
271 271 # for "historical portability":
272 272 # subsettable is defined in:
273 273 # - branchmap since 2.9 (or 175c6fd8cacc)
274 274 # - repoview since 2.5 (or 59a9f18d4587)
275 275 for mod in (branchmap, repoview):
276 276 subsettable = getattr(mod, 'subsettable', None)
277 277 if subsettable:
278 278 return subsettable
279 279
280 280 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
281 281 # branchmap and repoview modules exist, but subsettable attribute
282 282 # doesn't)
283 283 raise error.Abort(("perfbranchmap not available with this Mercurial"),
284 284 hint="use 2.5 or later")
285 285
286 286 def getsvfs(repo):
287 287 """Return appropriate object to access files under .hg/store
288 288 """
289 289 # for "historical portability":
290 290 # repo.svfs has been available since 2.3 (or 7034365089bf)
291 291 svfs = getattr(repo, 'svfs', None)
292 292 if svfs:
293 293 return svfs
294 294 else:
295 295 return getattr(repo, 'sopener')
296 296
297 297 def getvfs(repo):
298 298 """Return appropriate object to access files under .hg
299 299 """
300 300 # for "historical portability":
301 301 # repo.vfs has been available since 2.3 (or 7034365089bf)
302 302 vfs = getattr(repo, 'vfs', None)
303 303 if vfs:
304 304 return vfs
305 305 else:
306 306 return getattr(repo, 'opener')
307 307
308 308 def repocleartagscachefunc(repo):
309 309 """Return the function to clear tags cache according to repo internal API
310 310 """
311 311 if util.safehasattr(repo, '_tagscache'): # since 2.0 (or 9dca7653b525)
312 312 # in this case, setattr(repo, '_tagscache', None) or so isn't
313 313 # correct way to clear tags cache, because existing code paths
314 314 # expect _tagscache to be a structured object.
315 315 def clearcache():
316 316 # _tagscache has been filteredpropertycache since 2.5 (or
317 317 # 98c867ac1330), and delattr() can't work in such case
318 318 if '_tagscache' in vars(repo):
319 319 del repo.__dict__['_tagscache']
320 320 return clearcache
321 321
322 322 repotags = safeattrsetter(repo, '_tags', ignoremissing=True)
323 323 if repotags: # since 1.4 (or 5614a628d173)
324 324 return lambda : repotags.set(None)
325 325
326 326 repotagscache = safeattrsetter(repo, 'tagscache', ignoremissing=True)
327 327 if repotagscache: # since 0.6 (or d7df759d0e97)
328 328 return lambda : repotagscache.set(None)
329 329
330 330 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
331 331 # this point, but it isn't so problematic, because:
332 332 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
333 333 # in perftags() causes failure soon
334 334 # - perf.py itself has been available since 1.1 (or eb240755386d)
335 335 raise error.Abort(("tags API of this hg command is unknown"))
336 336
337 337 # perf commands
338 338
339 339 @command('perfwalk', formatteropts)
340 340 def perfwalk(ui, repo, *pats, **opts):
341 341 timer, fm = gettimer(ui, opts)
342 342 try:
343 343 m = scmutil.match(repo[None], pats, {})
344 344 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
345 345 except Exception:
346 346 try:
347 347 m = scmutil.match(repo[None], pats, {})
348 348 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
349 349 except Exception:
350 350 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
351 351 fm.end()
352 352
353 353 @command('perfannotate', formatteropts)
354 354 def perfannotate(ui, repo, f, **opts):
355 355 timer, fm = gettimer(ui, opts)
356 356 fc = repo['.'][f]
357 357 timer(lambda: len(fc.annotate(True)))
358 358 fm.end()
359 359
360 360 @command('perfstatus',
361 361 [('u', 'unknown', False,
362 362 'ask status to look for unknown files')] + formatteropts)
363 363 def perfstatus(ui, repo, **opts):
364 364 #m = match.always(repo.root, repo.getcwd())
365 365 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
366 366 # False))))
367 367 timer, fm = gettimer(ui, opts)
368 368 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
369 369 fm.end()
370 370
371 371 @command('perfaddremove', formatteropts)
372 372 def perfaddremove(ui, repo, **opts):
373 373 timer, fm = gettimer(ui, opts)
374 374 try:
375 375 oldquiet = repo.ui.quiet
376 376 repo.ui.quiet = True
377 377 matcher = scmutil.match(repo[None])
378 378 timer(lambda: scmutil.addremove(repo, matcher, "", dry_run=True))
379 379 finally:
380 380 repo.ui.quiet = oldquiet
381 381 fm.end()
382 382
383 383 def clearcaches(cl):
384 384 # behave somewhat consistently across internal API changes
385 385 if util.safehasattr(cl, 'clearcaches'):
386 386 cl.clearcaches()
387 387 elif util.safehasattr(cl, '_nodecache'):
388 388 from mercurial.node import nullid, nullrev
389 389 cl._nodecache = {nullid: nullrev}
390 390 cl._nodepos = None
391 391
392 392 @command('perfheads', formatteropts)
393 393 def perfheads(ui, repo, **opts):
394 394 timer, fm = gettimer(ui, opts)
395 395 cl = repo.changelog
396 396 def d():
397 397 len(cl.headrevs())
398 398 clearcaches(cl)
399 399 timer(d)
400 400 fm.end()
401 401
402 402 @command('perftags', formatteropts)
403 403 def perftags(ui, repo, **opts):
404 404 import mercurial.changelog
405 405 import mercurial.manifest
406 406 timer, fm = gettimer(ui, opts)
407 407 svfs = getsvfs(repo)
408 408 repocleartagscache = repocleartagscachefunc(repo)
409 409 def t():
410 410 repo.changelog = mercurial.changelog.changelog(svfs)
411 411 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
412 412 repocleartagscache()
413 413 return len(repo.tags())
414 414 timer(t)
415 415 fm.end()
416 416
417 417 @command('perfancestors', formatteropts)
418 418 def perfancestors(ui, repo, **opts):
419 419 timer, fm = gettimer(ui, opts)
420 420 heads = repo.changelog.headrevs()
421 421 def d():
422 422 for a in repo.changelog.ancestors(heads):
423 423 pass
424 424 timer(d)
425 425 fm.end()
426 426
427 427 @command('perfancestorset', formatteropts)
428 428 def perfancestorset(ui, repo, revset, **opts):
429 429 timer, fm = gettimer(ui, opts)
430 430 revs = repo.revs(revset)
431 431 heads = repo.changelog.headrevs()
432 432 def d():
433 433 s = repo.changelog.ancestors(heads)
434 434 for rev in revs:
435 435 rev in s
436 436 timer(d)
437 437 fm.end()
438 438
439 439 @command('perfchangegroupchangelog', formatteropts +
440 440 [('', 'version', '02', 'changegroup version'),
441 441 ('r', 'rev', '', 'revisions to add to changegroup')])
442 442 def perfchangegroupchangelog(ui, repo, version='02', rev=None, **opts):
443 443 """Benchmark producing a changelog group for a changegroup.
444 444
445 445 This measures the time spent processing the changelog during a
446 446 bundle operation. This occurs during `hg bundle` and on a server
447 447 processing a `getbundle` wire protocol request (handles clones
448 448 and pull requests).
449 449
450 450 By default, all revisions are added to the changegroup.
451 451 """
452 452 cl = repo.changelog
453 453 revs = [cl.lookup(r) for r in repo.revs(rev or 'all()')]
454 454 bundler = changegroup.getbundler(version, repo)
455 455
456 456 def lookup(node):
457 457 # The real bundler reads the revision in order to access the
458 458 # manifest node and files list. Do that here.
459 459 cl.read(node)
460 460 return node
461 461
462 462 def d():
463 463 for chunk in bundler.group(revs, cl, lookup):
464 464 pass
465 465
466 466 timer, fm = gettimer(ui, opts)
467 467 timer(d)
468 468 fm.end()
469 469
470 470 @command('perfdirs', formatteropts)
471 471 def perfdirs(ui, repo, **opts):
472 472 timer, fm = gettimer(ui, opts)
473 473 dirstate = repo.dirstate
474 474 'a' in dirstate
475 475 def d():
476 476 dirstate.dirs()
477 477 del dirstate._dirs
478 478 timer(d)
479 479 fm.end()
480 480
481 481 @command('perfdirstate', formatteropts)
482 482 def perfdirstate(ui, repo, **opts):
483 483 timer, fm = gettimer(ui, opts)
484 484 "a" in repo.dirstate
485 485 def d():
486 486 repo.dirstate.invalidate()
487 487 "a" in repo.dirstate
488 488 timer(d)
489 489 fm.end()
490 490
491 491 @command('perfdirstatedirs', formatteropts)
492 492 def perfdirstatedirs(ui, repo, **opts):
493 493 timer, fm = gettimer(ui, opts)
494 494 "a" in repo.dirstate
495 495 def d():
496 496 "a" in repo.dirstate._dirs
497 497 del repo.dirstate._dirs
498 498 timer(d)
499 499 fm.end()
500 500
501 501 @command('perfdirstatefoldmap', formatteropts)
502 502 def perfdirstatefoldmap(ui, repo, **opts):
503 503 timer, fm = gettimer(ui, opts)
504 504 dirstate = repo.dirstate
505 505 'a' in dirstate
506 506 def d():
507 507 dirstate._filefoldmap.get('a')
508 508 del dirstate._filefoldmap
509 509 timer(d)
510 510 fm.end()
511 511
512 512 @command('perfdirfoldmap', formatteropts)
513 513 def perfdirfoldmap(ui, repo, **opts):
514 514 timer, fm = gettimer(ui, opts)
515 515 dirstate = repo.dirstate
516 516 'a' in dirstate
517 517 def d():
518 518 dirstate._dirfoldmap.get('a')
519 519 del dirstate._dirfoldmap
520 520 del dirstate._dirs
521 521 timer(d)
522 522 fm.end()
523 523
524 524 @command('perfdirstatewrite', formatteropts)
525 525 def perfdirstatewrite(ui, repo, **opts):
526 526 timer, fm = gettimer(ui, opts)
527 527 ds = repo.dirstate
528 528 "a" in ds
529 529 def d():
530 530 ds._dirty = True
531 531 ds.write(repo.currenttransaction())
532 532 timer(d)
533 533 fm.end()
534 534
535 535 @command('perfmergecalculate',
536 536 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
537 537 def perfmergecalculate(ui, repo, rev, **opts):
538 538 timer, fm = gettimer(ui, opts)
539 539 wctx = repo[None]
540 540 rctx = scmutil.revsingle(repo, rev, rev)
541 541 ancestor = wctx.ancestor(rctx)
542 542 # we don't want working dir files to be stat'd in the benchmark, so prime
543 543 # that cache
544 544 wctx.dirty()
545 545 def d():
546 546 # acceptremote is True because we don't want prompts in the middle of
547 547 # our benchmark
548 548 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
549 549 acceptremote=True, followcopies=True)
550 550 timer(d)
551 551 fm.end()
552 552
553 553 @command('perfpathcopies', [], "REV REV")
554 554 def perfpathcopies(ui, repo, rev1, rev2, **opts):
555 555 timer, fm = gettimer(ui, opts)
556 556 ctx1 = scmutil.revsingle(repo, rev1, rev1)
557 557 ctx2 = scmutil.revsingle(repo, rev2, rev2)
558 558 def d():
559 559 copies.pathcopies(ctx1, ctx2)
560 560 timer(d)
561 561 fm.end()
562 562
563 563 @command('perfmanifest', [], 'REV')
564 564 def perfmanifest(ui, repo, rev, **opts):
565 565 timer, fm = gettimer(ui, opts)
566 566 ctx = scmutil.revsingle(repo, rev, rev)
567 567 t = ctx.manifestnode()
568 568 def d():
569 569 repo.manifestlog.clearcaches()
570 570 repo.manifestlog[t].read()
571 571 timer(d)
572 572 fm.end()
573 573
574 574 @command('perfchangeset', formatteropts)
575 575 def perfchangeset(ui, repo, rev, **opts):
576 576 timer, fm = gettimer(ui, opts)
577 577 n = repo[rev].node()
578 578 def d():
579 579 repo.changelog.read(n)
580 580 #repo.changelog._cache = None
581 581 timer(d)
582 582 fm.end()
583 583
584 584 @command('perfindex', formatteropts)
585 585 def perfindex(ui, repo, **opts):
586 586 import mercurial.revlog
587 587 timer, fm = gettimer(ui, opts)
588 588 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
589 589 n = repo["tip"].node()
590 590 svfs = getsvfs(repo)
591 591 def d():
592 592 cl = mercurial.revlog.revlog(svfs, "00changelog.i")
593 593 cl.rev(n)
594 594 timer(d)
595 595 fm.end()
596 596
597 597 @command('perfstartup', formatteropts)
598 598 def perfstartup(ui, repo, **opts):
599 599 timer, fm = gettimer(ui, opts)
600 600 cmd = sys.argv[0]
601 601 def d():
602 602 if os.name != 'nt':
603 603 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
604 604 else:
605 605 os.environ['HGRCPATH'] = ''
606 606 os.system("%s version -q > NUL" % cmd)
607 607 timer(d)
608 608 fm.end()
609 609
610 610 @command('perfparents', formatteropts)
611 611 def perfparents(ui, repo, **opts):
612 612 timer, fm = gettimer(ui, opts)
613 613 # control the number of commits perfparents iterates over
614 614 # experimental config: perf.parentscount
615 615 count = getint(ui, "perf", "parentscount", 1000)
616 616 if len(repo.changelog) < count:
617 617 raise error.Abort("repo needs %d commits for this test" % count)
618 618 repo = repo.unfiltered()
619 619 nl = [repo.changelog.node(i) for i in xrange(count)]
620 620 def d():
621 621 for n in nl:
622 622 repo.changelog.parents(n)
623 623 timer(d)
624 624 fm.end()
625 625
626 626 @command('perfctxfiles', formatteropts)
627 627 def perfctxfiles(ui, repo, x, **opts):
628 628 x = int(x)
629 629 timer, fm = gettimer(ui, opts)
630 630 def d():
631 631 len(repo[x].files())
632 632 timer(d)
633 633 fm.end()
634 634
635 635 @command('perfrawfiles', formatteropts)
636 636 def perfrawfiles(ui, repo, x, **opts):
637 637 x = int(x)
638 638 timer, fm = gettimer(ui, opts)
639 639 cl = repo.changelog
640 640 def d():
641 641 len(cl.read(x)[3])
642 642 timer(d)
643 643 fm.end()
644 644
645 645 @command('perflookup', formatteropts)
646 646 def perflookup(ui, repo, rev, **opts):
647 647 timer, fm = gettimer(ui, opts)
648 648 timer(lambda: len(repo.lookup(rev)))
649 649 fm.end()
650 650
651 651 @command('perfrevrange', formatteropts)
652 652 def perfrevrange(ui, repo, *specs, **opts):
653 653 timer, fm = gettimer(ui, opts)
654 654 revrange = scmutil.revrange
655 655 timer(lambda: len(revrange(repo, specs)))
656 656 fm.end()
657 657
658 658 @command('perfnodelookup', formatteropts)
659 659 def perfnodelookup(ui, repo, rev, **opts):
660 660 timer, fm = gettimer(ui, opts)
661 661 import mercurial.revlog
662 662 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
663 663 n = repo[rev].node()
664 664 cl = mercurial.revlog.revlog(getsvfs(repo), "00changelog.i")
665 665 def d():
666 666 cl.rev(n)
667 667 clearcaches(cl)
668 668 timer(d)
669 669 fm.end()
670 670
671 671 @command('perflog',
672 672 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
673 673 def perflog(ui, repo, rev=None, **opts):
674 674 if rev is None:
675 675 rev=[]
676 676 timer, fm = gettimer(ui, opts)
677 677 ui.pushbuffer()
678 678 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
679 679 copies=opts.get('rename')))
680 680 ui.popbuffer()
681 681 fm.end()
682 682
683 683 @command('perfmoonwalk', formatteropts)
684 684 def perfmoonwalk(ui, repo, **opts):
685 685 """benchmark walking the changelog backwards
686 686
687 687 This also loads the changelog data for each revision in the changelog.
688 688 """
689 689 timer, fm = gettimer(ui, opts)
690 690 def moonwalk():
691 691 for i in xrange(len(repo), -1, -1):
692 692 ctx = repo[i]
693 693 ctx.branch() # read changelog data (in addition to the index)
694 694 timer(moonwalk)
695 695 fm.end()
696 696
697 697 @command('perftemplating', formatteropts)
698 698 def perftemplating(ui, repo, rev=None, **opts):
699 699 if rev is None:
700 700 rev=[]
701 701 timer, fm = gettimer(ui, opts)
702 702 ui.pushbuffer()
703 703 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
704 704 template='{date|shortdate} [{rev}:{node|short}]'
705 705 ' {author|person}: {desc|firstline}\n'))
706 706 ui.popbuffer()
707 707 fm.end()
708 708
709 709 @command('perfcca', formatteropts)
710 710 def perfcca(ui, repo, **opts):
711 711 timer, fm = gettimer(ui, opts)
712 712 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
713 713 fm.end()
714 714
715 715 @command('perffncacheload', formatteropts)
716 716 def perffncacheload(ui, repo, **opts):
717 717 timer, fm = gettimer(ui, opts)
718 718 s = repo.store
719 719 def d():
720 720 s.fncache._load()
721 721 timer(d)
722 722 fm.end()
723 723
724 724 @command('perffncachewrite', formatteropts)
725 725 def perffncachewrite(ui, repo, **opts):
726 726 timer, fm = gettimer(ui, opts)
727 727 s = repo.store
728 728 s.fncache._load()
729 729 lock = repo.lock()
730 730 tr = repo.transaction('perffncachewrite')
731 731 def d():
732 732 s.fncache._dirty = True
733 733 s.fncache.write(tr)
734 734 timer(d)
735 735 tr.close()
736 736 lock.release()
737 737 fm.end()
738 738
739 739 @command('perffncacheencode', formatteropts)
740 740 def perffncacheencode(ui, repo, **opts):
741 741 timer, fm = gettimer(ui, opts)
742 742 s = repo.store
743 743 s.fncache._load()
744 744 def d():
745 745 for p in s.fncache.entries:
746 746 s.encode(p)
747 747 timer(d)
748 748 fm.end()
749 749
750 750 @command('perfbdiff', revlogopts + formatteropts + [
751 751 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
752 752 ('', 'alldata', False, 'test bdiffs for all associated revisions')],
753 753 '-c|-m|FILE REV')
754 754 def perfbdiff(ui, repo, file_, rev=None, count=None, **opts):
755 755 """benchmark a bdiff between revisions
756 756
757 757 By default, benchmark a bdiff between its delta parent and itself.
758 758
759 759 With ``--count``, benchmark bdiffs between delta parents and self for N
760 760 revisions starting at the specified revision.
761 761
762 762 With ``--alldata``, assume the requested revision is a changeset and
763 763 measure bdiffs for all changes related to that changeset (manifest
764 764 and filelogs).
765 765 """
766 766 if opts['alldata']:
767 767 opts['changelog'] = True
768 768
769 769 if opts.get('changelog') or opts.get('manifest'):
770 770 file_, rev = None, file_
771 771 elif rev is None:
772 772 raise error.CommandError('perfbdiff', 'invalid arguments')
773 773
774 774 textpairs = []
775 775
776 776 r = cmdutil.openrevlog(repo, 'perfbdiff', file_, opts)
777 777
778 778 startrev = r.rev(r.lookup(rev))
779 779 for rev in range(startrev, min(startrev + count, len(r) - 1)):
780 780 if opts['alldata']:
781 781 # Load revisions associated with changeset.
782 782 ctx = repo[rev]
783 783 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
784 784 for pctx in ctx.parents():
785 785 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
786 786 textpairs.append((pman, mtext))
787 787
788 788 # Load filelog revisions by iterating manifest delta.
789 789 man = ctx.manifest()
790 790 pman = ctx.p1().manifest()
791 791 for filename, change in pman.diff(man).items():
792 792 fctx = repo.file(filename)
793 793 f1 = fctx.revision(change[0][0] or -1)
794 794 f2 = fctx.revision(change[1][0] or -1)
795 795 textpairs.append((f1, f2))
796 796 else:
797 797 dp = r.deltaparent(rev)
798 798 textpairs.append((r.revision(dp), r.revision(rev)))
799 799
800 800 def d():
801 801 for pair in textpairs:
802 802 bdiff.bdiff(*pair)
803 803
804 804 timer, fm = gettimer(ui, opts)
805 805 timer(d)
806 806 fm.end()
807 807
808 808 @command('perfdiffwd', formatteropts)
809 809 def perfdiffwd(ui, repo, **opts):
810 810 """Profile diff of working directory changes"""
811 811 timer, fm = gettimer(ui, opts)
812 812 options = {
813 813 'w': 'ignore_all_space',
814 814 'b': 'ignore_space_change',
815 815 'B': 'ignore_blank_lines',
816 816 }
817 817
818 818 for diffopt in ('', 'w', 'b', 'B', 'wB'):
819 819 opts = dict((options[c], '1') for c in diffopt)
820 820 def d():
821 821 ui.pushbuffer()
822 822 commands.diff(ui, repo, **opts)
823 823 ui.popbuffer()
824 824 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
825 825 timer(d, title)
826 826 fm.end()
827 827
828 828 @command('perfrevlog', revlogopts + formatteropts +
829 829 [('d', 'dist', 100, 'distance between the revisions'),
830 830 ('s', 'startrev', 0, 'revision to start reading at'),
831 831 ('', 'reverse', False, 'read in reverse')],
832 832 '-c|-m|FILE')
833 833 def perfrevlog(ui, repo, file_=None, startrev=0, reverse=False, **opts):
834 834 """Benchmark reading a series of revisions from a revlog.
835 835
836 836 By default, we read every ``-d/--dist`` revision from 0 to tip of
837 837 the specified revlog.
838 838
839 839 The start revision can be defined via ``-s/--startrev``.
840 840 """
841 841 timer, fm = gettimer(ui, opts)
842 842 _len = getlen(ui)
843 843
844 844 def d():
845 845 r = cmdutil.openrevlog(repo, 'perfrevlog', file_, opts)
846 846
847 847 startrev = 0
848 848 endrev = _len(r)
849 849 dist = opts['dist']
850 850
851 851 if reverse:
852 852 startrev, endrev = endrev, startrev
853 853 dist = -1 * dist
854 854
855 855 for x in xrange(startrev, endrev, dist):
856 856 r.revision(r.node(x))
857 857
858 858 timer(d)
859 859 fm.end()
860 860
861 861 @command('perfrevlogchunks', revlogopts + formatteropts +
862 862 [('e', 'engines', '', 'compression engines to use'),
863 863 ('s', 'startrev', 0, 'revision to start at')],
864 864 '-c|-m|FILE')
865 865 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
866 866 """Benchmark operations on revlog chunks.
867 867
868 868 Logically, each revlog is a collection of fulltext revisions. However,
869 869 stored within each revlog are "chunks" of possibly compressed data. This
870 870 data needs to be read and decompressed or compressed and written.
871 871
872 872 This command measures the time it takes to read+decompress and recompress
873 873 chunks in a revlog. It effectively isolates I/O and compression performance.
874 874 For measurements of higher-level operations like resolving revisions,
875 875 see ``perfrevlog`` and ``perfrevlogrevision``.
876 876 """
877 877 rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts)
878 878
879 879 # Verify engines argument.
880 880 if engines:
881 881 engines = set(e.strip() for e in engines.split(','))
882 882 for engine in engines:
883 883 try:
884 884 util.compressionengines[engine]
885 885 except KeyError:
886 886 raise error.Abort('unknown compression engine: %s' % engine)
887 887 else:
888 888 engines = []
889 889 for e in util.compengines:
890 890 engine = util.compengines[e]
891 891 try:
892 892 if engine.available():
893 893 engine.revlogcompressor().compress('dummy')
894 894 engines.append(e)
895 895 except NotImplementedError:
896 896 pass
897 897
898 898 revs = list(rl.revs(startrev, len(rl) - 1))
899 899
900 900 def rlfh(rl):
901 901 if rl._inline:
902 902 return getsvfs(repo)(rl.indexfile)
903 903 else:
904 904 return getsvfs(repo)(rl.datafile)
905 905
906 906 def doread():
907 907 rl.clearcaches()
908 908 for rev in revs:
909 909 rl._chunkraw(rev, rev)
910 910
911 911 def doreadcachedfh():
912 912 rl.clearcaches()
913 913 fh = rlfh(rl)
914 914 for rev in revs:
915 915 rl._chunkraw(rev, rev, df=fh)
916 916
917 917 def doreadbatch():
918 918 rl.clearcaches()
919 919 rl._chunkraw(revs[0], revs[-1])
920 920
921 921 def doreadbatchcachedfh():
922 922 rl.clearcaches()
923 923 fh = rlfh(rl)
924 924 rl._chunkraw(revs[0], revs[-1], df=fh)
925 925
926 926 def dochunk():
927 927 rl.clearcaches()
928 928 fh = rlfh(rl)
929 929 for rev in revs:
930 930 rl._chunk(rev, df=fh)
931 931
932 932 chunks = [None]
933 933
934 934 def dochunkbatch():
935 935 rl.clearcaches()
936 936 fh = rlfh(rl)
937 937 # Save chunks as a side-effect.
938 938 chunks[0] = rl._chunks(revs, df=fh)
939 939
940 940 def docompress(compressor):
941 941 rl.clearcaches()
942 942
943 943 try:
944 944 # Swap in the requested compression engine.
945 945 oldcompressor = rl._compressor
946 946 rl._compressor = compressor
947 947 for chunk in chunks[0]:
948 948 rl.compress(chunk)
949 949 finally:
950 950 rl._compressor = oldcompressor
951 951
952 952 benches = [
953 953 (lambda: doread(), 'read'),
954 954 (lambda: doreadcachedfh(), 'read w/ reused fd'),
955 955 (lambda: doreadbatch(), 'read batch'),
956 956 (lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'),
957 957 (lambda: dochunk(), 'chunk'),
958 958 (lambda: dochunkbatch(), 'chunk batch'),
959 959 ]
960 960
961 961 for engine in sorted(engines):
962 962 compressor = util.compengines[engine].revlogcompressor()
963 963 benches.append((functools.partial(docompress, compressor),
964 964 'compress w/ %s' % engine))
965 965
966 966 for fn, title in benches:
967 967 timer, fm = gettimer(ui, opts)
968 968 timer(fn, title=title)
969 969 fm.end()
970 970
971 971 @command('perfrevlogrevision', revlogopts + formatteropts +
972 972 [('', 'cache', False, 'use caches instead of clearing')],
973 973 '-c|-m|FILE REV')
974 974 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
975 975 """Benchmark obtaining a revlog revision.
976 976
977 977 Obtaining a revlog revision consists of roughly the following steps:
978 978
979 979 1. Compute the delta chain
980 980 2. Obtain the raw chunks for that delta chain
981 981 3. Decompress each raw chunk
982 982 4. Apply binary patches to obtain fulltext
983 983 5. Verify hash of fulltext
984 984
985 985 This command measures the time spent in each of these phases.
986 986 """
987 987 if opts.get('changelog') or opts.get('manifest'):
988 988 file_, rev = None, file_
989 989 elif rev is None:
990 990 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
991 991
992 992 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
993 993 node = r.lookup(rev)
994 994 rev = r.rev(node)
995 995
996 996 def getrawchunks(data, chain):
997 997 start = r.start
998 998 length = r.length
999 999 inline = r._inline
1000 1000 iosize = r._io.size
1001 1001 buffer = util.buffer
1002 1002 offset = start(chain[0])
1003 1003
1004 1004 chunks = []
1005 1005 ladd = chunks.append
1006 1006
1007 1007 for rev in chain:
1008 1008 chunkstart = start(rev)
1009 1009 if inline:
1010 1010 chunkstart += (rev + 1) * iosize
1011 1011 chunklength = length(rev)
1012 1012 ladd(buffer(data, chunkstart - offset, chunklength))
1013 1013
1014 1014 return chunks
1015 1015
1016 1016 def dodeltachain(rev):
1017 1017 if not cache:
1018 1018 r.clearcaches()
1019 1019 r._deltachain(rev)
1020 1020
1021 1021 def doread(chain):
1022 1022 if not cache:
1023 1023 r.clearcaches()
1024 1024 r._chunkraw(chain[0], chain[-1])
1025 1025
1026 1026 def dorawchunks(data, chain):
1027 1027 if not cache:
1028 1028 r.clearcaches()
1029 1029 getrawchunks(data, chain)
1030 1030
1031 1031 def dodecompress(chunks):
1032 1032 decomp = r.decompress
1033 1033 for chunk in chunks:
1034 1034 decomp(chunk)
1035 1035
1036 1036 def dopatch(text, bins):
1037 1037 if not cache:
1038 1038 r.clearcaches()
1039 1039 mdiff.patches(text, bins)
1040 1040
1041 1041 def dohash(text):
1042 1042 if not cache:
1043 1043 r.clearcaches()
1044 1044 r.checkhash(text, node, rev=rev)
1045 1045
1046 1046 def dorevision():
1047 1047 if not cache:
1048 1048 r.clearcaches()
1049 1049 r.revision(node)
1050 1050
1051 1051 chain = r._deltachain(rev)[0]
1052 1052 data = r._chunkraw(chain[0], chain[-1])[1]
1053 1053 rawchunks = getrawchunks(data, chain)
1054 1054 bins = r._chunks(chain)
1055 1055 text = str(bins[0])
1056 1056 bins = bins[1:]
1057 1057 text = mdiff.patches(text, bins)
1058 1058
1059 1059 benches = [
1060 1060 (lambda: dorevision(), 'full'),
1061 1061 (lambda: dodeltachain(rev), 'deltachain'),
1062 1062 (lambda: doread(chain), 'read'),
1063 1063 (lambda: dorawchunks(data, chain), 'rawchunks'),
1064 1064 (lambda: dodecompress(rawchunks), 'decompress'),
1065 1065 (lambda: dopatch(text, bins), 'patch'),
1066 1066 (lambda: dohash(text), 'hash'),
1067 1067 ]
1068 1068
1069 1069 for fn, title in benches:
1070 1070 timer, fm = gettimer(ui, opts)
1071 1071 timer(fn, title=title)
1072 1072 fm.end()
1073 1073
1074 1074 @command('perfrevset',
1075 1075 [('C', 'clear', False, 'clear volatile cache between each call.'),
1076 1076 ('', 'contexts', False, 'obtain changectx for each revision')]
1077 1077 + formatteropts, "REVSET")
1078 1078 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1079 1079 """benchmark the execution time of a revset
1080 1080
1081 1081 Use the --clean option if need to evaluate the impact of build volatile
1082 1082 revisions set cache on the revset execution. Volatile cache hold filtered
1083 1083 and obsolete related cache."""
1084 1084 timer, fm = gettimer(ui, opts)
1085 1085 def d():
1086 1086 if clear:
1087 1087 repo.invalidatevolatilesets()
1088 1088 if contexts:
1089 1089 for ctx in repo.set(expr): pass
1090 1090 else:
1091 1091 for r in repo.revs(expr): pass
1092 1092 timer(d)
1093 1093 fm.end()
1094 1094
1095 1095 @command('perfvolatilesets', formatteropts)
1096 1096 def perfvolatilesets(ui, repo, *names, **opts):
1097 1097 """benchmark the computation of various volatile set
1098 1098
1099 1099 Volatile set computes element related to filtering and obsolescence."""
1100 1100 timer, fm = gettimer(ui, opts)
1101 1101 repo = repo.unfiltered()
1102 1102
1103 1103 def getobs(name):
1104 1104 def d():
1105 1105 repo.invalidatevolatilesets()
1106 1106 obsolete.getrevs(repo, name)
1107 1107 return d
1108 1108
1109 1109 allobs = sorted(obsolete.cachefuncs)
1110 1110 if names:
1111 1111 allobs = [n for n in allobs if n in names]
1112 1112
1113 1113 for name in allobs:
1114 1114 timer(getobs(name), title=name)
1115 1115
1116 1116 def getfiltered(name):
1117 1117 def d():
1118 1118 repo.invalidatevolatilesets()
1119 1119 repoview.filterrevs(repo, name)
1120 1120 return d
1121 1121
1122 1122 allfilter = sorted(repoview.filtertable)
1123 1123 if names:
1124 1124 allfilter = [n for n in allfilter if n in names]
1125 1125
1126 1126 for name in allfilter:
1127 1127 timer(getfiltered(name), title=name)
1128 1128 fm.end()
1129 1129
1130 1130 @command('perfbranchmap',
1131 1131 [('f', 'full', False,
1132 1132 'Includes build time of subset'),
1133 1133 ] + formatteropts)
1134 1134 def perfbranchmap(ui, repo, full=False, **opts):
1135 1135 """benchmark the update of a branchmap
1136 1136
1137 1137 This benchmarks the full repo.branchmap() call with read and write disabled
1138 1138 """
1139 1139 timer, fm = gettimer(ui, opts)
1140 1140 def getbranchmap(filtername):
1141 1141 """generate a benchmark function for the filtername"""
1142 1142 if filtername is None:
1143 1143 view = repo
1144 1144 else:
1145 1145 view = repo.filtered(filtername)
1146 1146 def d():
1147 1147 if full:
1148 1148 view._branchcaches.clear()
1149 1149 else:
1150 1150 view._branchcaches.pop(filtername, None)
1151 1151 view.branchmap()
1152 1152 return d
1153 1153 # add filter in smaller subset to bigger subset
1154 1154 possiblefilters = set(repoview.filtertable)
1155 1155 subsettable = getbranchmapsubsettable()
1156 1156 allfilters = []
1157 1157 while possiblefilters:
1158 1158 for name in possiblefilters:
1159 1159 subset = subsettable.get(name)
1160 1160 if subset not in possiblefilters:
1161 1161 break
1162 1162 else:
1163 1163 assert False, 'subset cycle %s!' % possiblefilters
1164 1164 allfilters.append(name)
1165 1165 possiblefilters.remove(name)
1166 1166
1167 1167 # warm the cache
1168 1168 if not full:
1169 1169 for name in allfilters:
1170 1170 repo.filtered(name).branchmap()
1171 1171 # add unfiltered
1172 1172 allfilters.append(None)
1173 1173
1174 1174 branchcacheread = safeattrsetter(branchmap, 'read')
1175 1175 branchcachewrite = safeattrsetter(branchmap.branchcache, 'write')
1176 1176 branchcacheread.set(lambda repo: None)
1177 1177 branchcachewrite.set(lambda bc, repo: None)
1178 1178 try:
1179 1179 for name in allfilters:
1180 1180 timer(getbranchmap(name), title=str(name))
1181 1181 finally:
1182 1182 branchcacheread.restore()
1183 1183 branchcachewrite.restore()
1184 1184 fm.end()
1185 1185
1186 1186 @command('perfloadmarkers')
1187 1187 def perfloadmarkers(ui, repo):
1188 1188 """benchmark the time to parse the on-disk markers for a repo
1189 1189
1190 1190 Result is the number of markers in the repo."""
1191 1191 timer, fm = gettimer(ui)
1192 1192 svfs = getsvfs(repo)
1193 1193 timer(lambda: len(obsolete.obsstore(svfs)))
1194 1194 fm.end()
1195 1195
1196 1196 @command('perflrucachedict', formatteropts +
1197 1197 [('', 'size', 4, 'size of cache'),
1198 1198 ('', 'gets', 10000, 'number of key lookups'),
1199 1199 ('', 'sets', 10000, 'number of key sets'),
1200 1200 ('', 'mixed', 10000, 'number of mixed mode operations'),
1201 1201 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
1202 1202 norepo=True)
1203 1203 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1204 1204 mixedgetfreq=50, **opts):
1205 1205 def doinit():
1206 1206 for i in xrange(10000):
1207 1207 util.lrucachedict(size)
1208 1208
1209 1209 values = []
1210 1210 for i in xrange(size):
1211 1211 values.append(random.randint(0, sys.maxint))
1212 1212
1213 1213 # Get mode fills the cache and tests raw lookup performance with no
1214 1214 # eviction.
1215 1215 getseq = []
1216 1216 for i in xrange(gets):
1217 1217 getseq.append(random.choice(values))
1218 1218
1219 1219 def dogets():
1220 1220 d = util.lrucachedict(size)
1221 1221 for v in values:
1222 1222 d[v] = v
1223 1223 for key in getseq:
1224 1224 value = d[key]
1225 1225 value # silence pyflakes warning
1226 1226
1227 1227 # Set mode tests insertion speed with cache eviction.
1228 1228 setseq = []
1229 1229 for i in xrange(sets):
1230 1230 setseq.append(random.randint(0, sys.maxint))
1231 1231
1232 1232 def dosets():
1233 1233 d = util.lrucachedict(size)
1234 1234 for v in setseq:
1235 1235 d[v] = v
1236 1236
1237 1237 # Mixed mode randomly performs gets and sets with eviction.
1238 1238 mixedops = []
1239 1239 for i in xrange(mixed):
1240 1240 r = random.randint(0, 100)
1241 1241 if r < mixedgetfreq:
1242 1242 op = 0
1243 1243 else:
1244 1244 op = 1
1245 1245
1246 1246 mixedops.append((op, random.randint(0, size * 2)))
1247 1247
1248 1248 def domixed():
1249 1249 d = util.lrucachedict(size)
1250 1250
1251 1251 for op, v in mixedops:
1252 1252 if op == 0:
1253 1253 try:
1254 1254 d[v]
1255 1255 except KeyError:
1256 1256 pass
1257 1257 else:
1258 1258 d[v] = v
1259 1259
1260 1260 benches = [
1261 1261 (doinit, 'init'),
1262 1262 (dogets, 'gets'),
1263 1263 (dosets, 'sets'),
1264 1264 (domixed, 'mixed')
1265 1265 ]
1266 1266
1267 1267 for fn, title in benches:
1268 1268 timer, fm = gettimer(ui, opts)
1269 1269 timer(fn, title=title)
1270 1270 fm.end()
1271 1271
1272 1272 def uisetup(ui):
1273 1273 if (util.safehasattr(cmdutil, 'openrevlog') and
1274 1274 not util.safehasattr(commands, 'debugrevlogopts')):
1275 1275 # for "historical portability":
1276 1276 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1277 1277 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1278 1278 # openrevlog() should cause failure, because it has been
1279 1279 # available since 3.5 (or 49c583ca48c4).
1280 1280 def openrevlog(orig, repo, cmd, file_, opts):
1281 1281 if opts.get('dir') and not util.safehasattr(repo, 'dirlog'):
1282 1282 raise error.Abort("This version doesn't support --dir option",
1283 1283 hint="use 3.5 or later")
1284 1284 return orig(repo, cmd, file_, opts)
1285 1285 extensions.wrapfunction(cmdutil, 'openrevlog', openrevlog)
@@ -1,522 +1,522 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import array
11 11 import struct
12 import time
13 12
14 13 from .node import (
15 14 bin,
16 15 hex,
17 16 nullid,
18 17 nullrev,
19 18 )
20 19 from . import (
21 20 encoding,
22 21 error,
23 22 scmutil,
23 util,
24 24 )
25 25
26 26 array = array.array
27 27 calcsize = struct.calcsize
28 28 pack = struct.pack
29 29 unpack = struct.unpack
30 30
31 31 def _filename(repo):
32 32 """name of a branchcache file for a given repo or repoview"""
33 33 filename = "cache/branch2"
34 34 if repo.filtername:
35 35 filename = '%s-%s' % (filename, repo.filtername)
36 36 return filename
37 37
38 38 def read(repo):
39 39 try:
40 40 f = repo.vfs(_filename(repo))
41 41 lines = f.read().split('\n')
42 42 f.close()
43 43 except (IOError, OSError):
44 44 return None
45 45
46 46 try:
47 47 cachekey = lines.pop(0).split(" ", 2)
48 48 last, lrev = cachekey[:2]
49 49 last, lrev = bin(last), int(lrev)
50 50 filteredhash = None
51 51 if len(cachekey) > 2:
52 52 filteredhash = bin(cachekey[2])
53 53 partial = branchcache(tipnode=last, tiprev=lrev,
54 54 filteredhash=filteredhash)
55 55 if not partial.validfor(repo):
56 56 # invalidate the cache
57 57 raise ValueError('tip differs')
58 58 cl = repo.changelog
59 59 for l in lines:
60 60 if not l:
61 61 continue
62 62 node, state, label = l.split(" ", 2)
63 63 if state not in 'oc':
64 64 raise ValueError('invalid branch state')
65 65 label = encoding.tolocal(label.strip())
66 66 node = bin(node)
67 67 if not cl.hasnode(node):
68 68 raise ValueError('node %s does not exist' % hex(node))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72 except KeyboardInterrupt:
73 73 raise
74 74 except Exception as inst:
75 75 if repo.ui.debugflag:
76 76 msg = 'invalid branchheads cache'
77 77 if repo.filtername is not None:
78 78 msg += ' (%s)' % repo.filtername
79 79 msg += ': %s\n'
80 80 repo.ui.debug(msg % inst)
81 81 partial = None
82 82 return partial
83 83
84 84 ### Nearest subset relation
85 85 # Nearest subset of filter X is a filter Y so that:
86 86 # * Y is included in X,
87 87 # * X - Y is as small as possible.
88 88 # This create and ordering used for branchmap purpose.
89 89 # the ordering may be partial
90 90 subsettable = {None: 'visible',
91 91 'visible': 'served',
92 92 'served': 'immutable',
93 93 'immutable': 'base'}
94 94
95 95 def updatecache(repo):
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 partial = repo._branchcaches.get(filtername)
99 99
100 100 revs = []
101 101 if partial is None or not partial.validfor(repo):
102 102 partial = read(repo)
103 103 if partial is None:
104 104 subsetname = subsettable.get(filtername)
105 105 if subsetname is None:
106 106 partial = branchcache()
107 107 else:
108 108 subset = repo.filtered(subsetname)
109 109 partial = subset.branchmap().copy()
110 110 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
111 111 revs.extend(r for r in extrarevs if r <= partial.tiprev)
112 112 revs.extend(cl.revs(start=partial.tiprev + 1))
113 113 if revs:
114 114 partial.update(repo, revs)
115 115 partial.write(repo)
116 116
117 117 assert partial.validfor(repo), filtername
118 118 repo._branchcaches[repo.filtername] = partial
119 119
120 120 def replacecache(repo, bm):
121 121 """Replace the branchmap cache for a repo with a branch mapping.
122 122
123 123 This is likely only called during clone with a branch map from a remote.
124 124 """
125 125 rbheads = []
126 126 closed = []
127 127 for bheads in bm.itervalues():
128 128 rbheads.extend(bheads)
129 129 for h in bheads:
130 130 r = repo.changelog.rev(h)
131 131 b, c = repo.changelog.branchinfo(r)
132 132 if c:
133 133 closed.append(h)
134 134
135 135 if rbheads:
136 136 rtiprev = max((int(repo.changelog.rev(node))
137 137 for node in rbheads))
138 138 cache = branchcache(bm,
139 139 repo[rtiprev].node(),
140 140 rtiprev,
141 141 closednodes=closed)
142 142
143 143 # Try to stick it as low as possible
144 144 # filter above served are unlikely to be fetch from a clone
145 145 for candidate in ('base', 'immutable', 'served'):
146 146 rview = repo.filtered(candidate)
147 147 if cache.validfor(rview):
148 148 repo._branchcaches[candidate] = cache
149 149 cache.write(rview)
150 150 break
151 151
152 152 class branchcache(dict):
153 153 """A dict like object that hold branches heads cache.
154 154
155 155 This cache is used to avoid costly computations to determine all the
156 156 branch heads of a repo.
157 157
158 158 The cache is serialized on disk in the following format:
159 159
160 160 <tip hex node> <tip rev number> [optional filtered repo hex hash]
161 161 <branch head hex node> <open/closed state> <branch name>
162 162 <branch head hex node> <open/closed state> <branch name>
163 163 ...
164 164
165 165 The first line is used to check if the cache is still valid. If the
166 166 branch cache is for a filtered repo view, an optional third hash is
167 167 included that hashes the hashes of all filtered revisions.
168 168
169 169 The open/closed state is represented by a single letter 'o' or 'c'.
170 170 This field can be used to avoid changelog reads when determining if a
171 171 branch head closes a branch or not.
172 172 """
173 173
174 174 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
175 175 filteredhash=None, closednodes=None):
176 176 super(branchcache, self).__init__(entries)
177 177 self.tipnode = tipnode
178 178 self.tiprev = tiprev
179 179 self.filteredhash = filteredhash
180 180 # closednodes is a set of nodes that close their branch. If the branch
181 181 # cache has been updated, it may contain nodes that are no longer
182 182 # heads.
183 183 if closednodes is None:
184 184 self._closednodes = set()
185 185 else:
186 186 self._closednodes = closednodes
187 187
188 188 def validfor(self, repo):
189 189 """Is the cache content valid regarding a repo
190 190
191 191 - False when cached tipnode is unknown or if we detect a strip.
192 192 - True when cache is up to date or a subset of current repo."""
193 193 try:
194 194 return ((self.tipnode == repo.changelog.node(self.tiprev))
195 195 and (self.filteredhash == \
196 196 scmutil.filteredhash(repo, self.tiprev)))
197 197 except IndexError:
198 198 return False
199 199
200 200 def _branchtip(self, heads):
201 201 '''Return tuple with last open head in heads and false,
202 202 otherwise return last closed head and true.'''
203 203 tip = heads[-1]
204 204 closed = True
205 205 for h in reversed(heads):
206 206 if h not in self._closednodes:
207 207 tip = h
208 208 closed = False
209 209 break
210 210 return tip, closed
211 211
212 212 def branchtip(self, branch):
213 213 '''Return the tipmost open head on branch head, otherwise return the
214 214 tipmost closed head on branch.
215 215 Raise KeyError for unknown branch.'''
216 216 return self._branchtip(self[branch])[0]
217 217
218 218 def branchheads(self, branch, closed=False):
219 219 heads = self[branch]
220 220 if not closed:
221 221 heads = [h for h in heads if h not in self._closednodes]
222 222 return heads
223 223
224 224 def iterbranches(self):
225 225 for bn, heads in self.iteritems():
226 226 yield (bn, heads) + self._branchtip(heads)
227 227
228 228 def copy(self):
229 229 """return an deep copy of the branchcache object"""
230 230 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
231 231 self._closednodes)
232 232
233 233 def write(self, repo):
234 234 try:
235 235 f = repo.vfs(_filename(repo), "w", atomictemp=True)
236 236 cachekey = [hex(self.tipnode), str(self.tiprev)]
237 237 if self.filteredhash is not None:
238 238 cachekey.append(hex(self.filteredhash))
239 239 f.write(" ".join(cachekey) + '\n')
240 240 nodecount = 0
241 241 for label, nodes in sorted(self.iteritems()):
242 242 for node in nodes:
243 243 nodecount += 1
244 244 if node in self._closednodes:
245 245 state = 'c'
246 246 else:
247 247 state = 'o'
248 248 f.write("%s %s %s\n" % (hex(node), state,
249 249 encoding.fromlocal(label)))
250 250 f.close()
251 251 repo.ui.log('branchcache',
252 252 'wrote %s branch cache with %d labels and %d nodes\n',
253 253 repo.filtername, len(self), nodecount)
254 254 except (IOError, OSError, error.Abort) as inst:
255 255 repo.ui.debug("couldn't write branch cache: %s\n" % inst)
256 256 # Abort may be raise by read only opener
257 257 pass
258 258
259 259 def update(self, repo, revgen):
260 260 """Given a branchhead cache, self, that may have extra nodes or be
261 261 missing heads, and a generator of nodes that are strictly a superset of
262 262 heads missing, this function updates self to be correct.
263 263 """
264 starttime = time.time()
264 starttime = util.timer()
265 265 cl = repo.changelog
266 266 # collect new branch entries
267 267 newbranches = {}
268 268 getbranchinfo = repo.revbranchcache().branchinfo
269 269 for r in revgen:
270 270 branch, closesbranch = getbranchinfo(r)
271 271 newbranches.setdefault(branch, []).append(r)
272 272 if closesbranch:
273 273 self._closednodes.add(cl.node(r))
274 274
275 275 # fetch current topological heads to speed up filtering
276 276 topoheads = set(cl.headrevs())
277 277
278 278 # if older branchheads are reachable from new ones, they aren't
279 279 # really branchheads. Note checking parents is insufficient:
280 280 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
281 281 for branch, newheadrevs in newbranches.iteritems():
282 282 bheads = self.setdefault(branch, [])
283 283 bheadset = set(cl.rev(node) for node in bheads)
284 284
285 285 # This have been tested True on all internal usage of this function.
286 286 # run it again in case of doubt
287 287 # assert not (set(bheadrevs) & set(newheadrevs))
288 288 newheadrevs.sort()
289 289 bheadset.update(newheadrevs)
290 290
291 291 # This prunes out two kinds of heads - heads that are superseded by
292 292 # a head in newheadrevs, and newheadrevs that are not heads because
293 293 # an existing head is their descendant.
294 294 uncertain = bheadset - topoheads
295 295 if uncertain:
296 296 floorrev = min(uncertain)
297 297 ancestors = set(cl.ancestors(newheadrevs, floorrev))
298 298 bheadset -= ancestors
299 299 bheadrevs = sorted(bheadset)
300 300 self[branch] = [cl.node(rev) for rev in bheadrevs]
301 301 tiprev = bheadrevs[-1]
302 302 if tiprev > self.tiprev:
303 303 self.tipnode = cl.node(tiprev)
304 304 self.tiprev = tiprev
305 305
306 306 if not self.validfor(repo):
307 307 # cache key are not valid anymore
308 308 self.tipnode = nullid
309 309 self.tiprev = nullrev
310 310 for heads in self.values():
311 311 tiprev = max(cl.rev(node) for node in heads)
312 312 if tiprev > self.tiprev:
313 313 self.tipnode = cl.node(tiprev)
314 314 self.tiprev = tiprev
315 315 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
316 316
317 duration = time.time() - starttime
317 duration = util.timer() - starttime
318 318 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
319 319 repo.filtername, duration)
320 320
321 321 # Revision branch info cache
322 322
323 323 _rbcversion = '-v1'
324 324 _rbcnames = 'cache/rbc-names' + _rbcversion
325 325 _rbcrevs = 'cache/rbc-revs' + _rbcversion
326 326 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
327 327 _rbcrecfmt = '>4sI'
328 328 _rbcrecsize = calcsize(_rbcrecfmt)
329 329 _rbcnodelen = 4
330 330 _rbcbranchidxmask = 0x7fffffff
331 331 _rbccloseflag = 0x80000000
332 332
333 333 class revbranchcache(object):
334 334 """Persistent cache, mapping from revision number to branch name and close.
335 335 This is a low level cache, independent of filtering.
336 336
337 337 Branch names are stored in rbc-names in internal encoding separated by 0.
338 338 rbc-names is append-only, and each branch name is only stored once and will
339 339 thus have a unique index.
340 340
341 341 The branch info for each revision is stored in rbc-revs as constant size
342 342 records. The whole file is read into memory, but it is only 'parsed' on
343 343 demand. The file is usually append-only but will be truncated if repo
344 344 modification is detected.
345 345 The record for each revision contains the first 4 bytes of the
346 346 corresponding node hash, and the record is only used if it still matches.
347 347 Even a completely trashed rbc-revs fill thus still give the right result
348 348 while converging towards full recovery ... assuming no incorrectly matching
349 349 node hashes.
350 350 The record also contains 4 bytes where 31 bits contains the index of the
351 351 branch and the last bit indicate that it is a branch close commit.
352 352 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
353 353 and will grow with it but be 1/8th of its size.
354 354 """
355 355
356 356 def __init__(self, repo, readonly=True):
357 357 assert repo.filtername is None
358 358 self._repo = repo
359 359 self._names = [] # branch names in local encoding with static index
360 360 self._rbcrevs = array('c') # structs of type _rbcrecfmt
361 361 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
362 362 try:
363 363 bndata = repo.vfs.read(_rbcnames)
364 364 self._rbcsnameslen = len(bndata) # for verification before writing
365 365 self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
366 366 except (IOError, OSError):
367 367 if readonly:
368 368 # don't try to use cache - fall back to the slow path
369 369 self.branchinfo = self._branchinfo
370 370
371 371 if self._names:
372 372 try:
373 373 data = repo.vfs.read(_rbcrevs)
374 374 self._rbcrevs.fromstring(data)
375 375 except (IOError, OSError) as inst:
376 376 repo.ui.debug("couldn't read revision branch cache: %s\n" %
377 377 inst)
378 378 # remember number of good records on disk
379 379 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
380 380 len(repo.changelog))
381 381 if self._rbcrevslen == 0:
382 382 self._names = []
383 383 self._rbcnamescount = len(self._names) # number of names read at
384 384 # _rbcsnameslen
385 385 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
386 386
387 387 def _clear(self):
388 388 self._rbcsnameslen = 0
389 389 del self._names[:]
390 390 self._rbcnamescount = 0
391 391 self._namesreverse.clear()
392 392 self._rbcrevslen = len(self._repo.changelog)
393 393 self._rbcrevs = array('c')
394 394 self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
395 395
396 396 def branchinfo(self, rev):
397 397 """Return branch name and close flag for rev, using and updating
398 398 persistent cache."""
399 399 changelog = self._repo.changelog
400 400 rbcrevidx = rev * _rbcrecsize
401 401
402 402 # avoid negative index, changelog.read(nullrev) is fast without cache
403 403 if rev == nullrev:
404 404 return changelog.branchinfo(rev)
405 405
406 406 # if requested rev isn't allocated, grow and cache the rev info
407 407 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
408 408 return self._branchinfo(rev)
409 409
410 410 # fast path: extract data from cache, use it if node is matching
411 411 reponode = changelog.node(rev)[:_rbcnodelen]
412 412 cachenode, branchidx = unpack(
413 413 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
414 414 close = bool(branchidx & _rbccloseflag)
415 415 if close:
416 416 branchidx &= _rbcbranchidxmask
417 417 if cachenode == '\0\0\0\0':
418 418 pass
419 419 elif cachenode == reponode:
420 420 try:
421 421 return self._names[branchidx], close
422 422 except IndexError:
423 423 # recover from invalid reference to unknown branch
424 424 self._repo.ui.debug("referenced branch names not found"
425 425 " - rebuilding revision branch cache from scratch\n")
426 426 self._clear()
427 427 else:
428 428 # rev/node map has changed, invalidate the cache from here up
429 429 self._repo.ui.debug("history modification detected - truncating "
430 430 "revision branch cache to revision %s\n" % rev)
431 431 truncate = rbcrevidx + _rbcrecsize
432 432 del self._rbcrevs[truncate:]
433 433 self._rbcrevslen = min(self._rbcrevslen, truncate)
434 434
435 435 # fall back to slow path and make sure it will be written to disk
436 436 return self._branchinfo(rev)
437 437
438 438 def _branchinfo(self, rev):
439 439 """Retrieve branch info from changelog and update _rbcrevs"""
440 440 changelog = self._repo.changelog
441 441 b, close = changelog.branchinfo(rev)
442 442 if b in self._namesreverse:
443 443 branchidx = self._namesreverse[b]
444 444 else:
445 445 branchidx = len(self._names)
446 446 self._names.append(b)
447 447 self._namesreverse[b] = branchidx
448 448 reponode = changelog.node(rev)
449 449 if close:
450 450 branchidx |= _rbccloseflag
451 451 self._setcachedata(rev, reponode, branchidx)
452 452 return b, close
453 453
454 454 def _setcachedata(self, rev, node, branchidx):
455 455 """Writes the node's branch data to the in-memory cache data."""
456 456 rbcrevidx = rev * _rbcrecsize
457 457 rec = array('c')
458 458 rec.fromstring(pack(_rbcrecfmt, node, branchidx))
459 459 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 460 self._rbcrevs.extend('\0' *
461 461 (len(self._repo.changelog) * _rbcrecsize -
462 462 len(self._rbcrevs)))
463 463 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
464 464 self._rbcrevslen = min(self._rbcrevslen, rev)
465 465
466 466 tr = self._repo.currenttransaction()
467 467 if tr:
468 468 tr.addfinalize('write-revbranchcache', self.write)
469 469
470 470 def write(self, tr=None):
471 471 """Save branch cache if it is dirty."""
472 472 repo = self._repo
473 473 wlock = None
474 474 step = ''
475 475 try:
476 476 if self._rbcnamescount < len(self._names):
477 477 step = ' names'
478 478 wlock = repo.wlock(wait=False)
479 479 if self._rbcnamescount != 0:
480 480 f = repo.vfs.open(_rbcnames, 'ab')
481 481 if f.tell() == self._rbcsnameslen:
482 482 f.write('\0')
483 483 else:
484 484 f.close()
485 485 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
486 486 self._rbcnamescount = 0
487 487 self._rbcrevslen = 0
488 488 if self._rbcnamescount == 0:
489 489 # before rewriting names, make sure references are removed
490 490 repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
491 491 f = repo.vfs.open(_rbcnames, 'wb')
492 492 f.write('\0'.join(encoding.fromlocal(b)
493 493 for b in self._names[self._rbcnamescount:]))
494 494 self._rbcsnameslen = f.tell()
495 495 f.close()
496 496 self._rbcnamescount = len(self._names)
497 497
498 498 start = self._rbcrevslen * _rbcrecsize
499 499 if start != len(self._rbcrevs):
500 500 step = ''
501 501 if wlock is None:
502 502 wlock = repo.wlock(wait=False)
503 503 revs = min(len(repo.changelog),
504 504 len(self._rbcrevs) // _rbcrecsize)
505 505 f = repo.vfs.open(_rbcrevs, 'ab')
506 506 if f.tell() != start:
507 507 repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
508 508 f.seek(start)
509 509 if f.tell() != start:
510 510 start = 0
511 511 f.seek(start)
512 512 f.truncate()
513 513 end = revs * _rbcrecsize
514 514 f.write(self._rbcrevs[start:end])
515 515 f.close()
516 516 self._rbcrevslen = revs
517 517 except (IOError, OSError, error.Abort, error.LockError) as inst:
518 518 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
519 519 % (step, inst))
520 520 finally:
521 521 if wlock is not None:
522 522 wlock.release()
@@ -1,889 +1,889 b''
1 1 # dispatch.py - command dispatching for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import atexit
11 11 import difflib
12 12 import errno
13 13 import getopt
14 14 import os
15 15 import pdb
16 16 import re
17 17 import signal
18 18 import sys
19 19 import time
20 20 import traceback
21 21
22 22
23 23 from .i18n import _
24 24
25 25 from . import (
26 26 cmdutil,
27 27 color,
28 28 commands,
29 29 debugcommands,
30 30 demandimport,
31 31 encoding,
32 32 error,
33 33 extensions,
34 34 fancyopts,
35 35 fileset,
36 36 hg,
37 37 hook,
38 38 profiling,
39 39 pycompat,
40 40 revset,
41 41 scmutil,
42 42 templatefilters,
43 43 templatekw,
44 44 templater,
45 45 ui as uimod,
46 46 util,
47 47 )
48 48
49 49 class request(object):
50 50 def __init__(self, args, ui=None, repo=None, fin=None, fout=None,
51 51 ferr=None):
52 52 self.args = args
53 53 self.ui = ui
54 54 self.repo = repo
55 55
56 56 # input/output/error streams
57 57 self.fin = fin
58 58 self.fout = fout
59 59 self.ferr = ferr
60 60
61 61 def run():
62 62 "run the command in sys.argv"
63 63 sys.exit((dispatch(request(pycompat.sysargv[1:])) or 0) & 255)
64 64
65 65 def _getsimilar(symbols, value):
66 66 sim = lambda x: difflib.SequenceMatcher(None, value, x).ratio()
67 67 # The cutoff for similarity here is pretty arbitrary. It should
68 68 # probably be investigated and tweaked.
69 69 return [s for s in symbols if sim(s) > 0.6]
70 70
71 71 def _reportsimilar(write, similar):
72 72 if len(similar) == 1:
73 73 write(_("(did you mean %s?)\n") % similar[0])
74 74 elif similar:
75 75 ss = ", ".join(sorted(similar))
76 76 write(_("(did you mean one of %s?)\n") % ss)
77 77
78 78 def _formatparse(write, inst):
79 79 similar = []
80 80 if isinstance(inst, error.UnknownIdentifier):
81 81 # make sure to check fileset first, as revset can invoke fileset
82 82 similar = _getsimilar(inst.symbols, inst.function)
83 83 if len(inst.args) > 1:
84 84 write(_("hg: parse error at %s: %s\n") %
85 85 (inst.args[1], inst.args[0]))
86 86 if (inst.args[0][0] == ' '):
87 87 write(_("unexpected leading whitespace\n"))
88 88 else:
89 89 write(_("hg: parse error: %s\n") % inst.args[0])
90 90 _reportsimilar(write, similar)
91 91 if inst.hint:
92 92 write(_("(%s)\n") % inst.hint)
93 93
94 94 def dispatch(req):
95 95 "run the command specified in req.args"
96 96 if req.ferr:
97 97 ferr = req.ferr
98 98 elif req.ui:
99 99 ferr = req.ui.ferr
100 100 else:
101 101 ferr = util.stderr
102 102
103 103 try:
104 104 if not req.ui:
105 105 req.ui = uimod.ui.load()
106 106 if '--traceback' in req.args:
107 107 req.ui.setconfig('ui', 'traceback', 'on', '--traceback')
108 108
109 109 # set ui streams from the request
110 110 if req.fin:
111 111 req.ui.fin = req.fin
112 112 if req.fout:
113 113 req.ui.fout = req.fout
114 114 if req.ferr:
115 115 req.ui.ferr = req.ferr
116 116 except error.Abort as inst:
117 117 ferr.write(_("abort: %s\n") % inst)
118 118 if inst.hint:
119 119 ferr.write(_("(%s)\n") % inst.hint)
120 120 return -1
121 121 except error.ParseError as inst:
122 122 _formatparse(ferr.write, inst)
123 123 return -1
124 124
125 125 msg = ' '.join(' ' in a and repr(a) or a for a in req.args)
126 starttime = time.time()
126 starttime = util.timer()
127 127 ret = None
128 128 try:
129 129 ret = _runcatch(req)
130 130 except KeyboardInterrupt:
131 131 try:
132 132 req.ui.warn(_("interrupted!\n"))
133 133 except IOError as inst:
134 134 if inst.errno != errno.EPIPE:
135 135 raise
136 136 ret = -1
137 137 finally:
138 duration = time.time() - starttime
138 duration = util.timer() - starttime
139 139 req.ui.flush()
140 140 req.ui.log("commandfinish", "%s exited %s after %0.2f seconds\n",
141 141 msg, ret or 0, duration)
142 142 return ret
143 143
144 144 def _runcatch(req):
145 145 def catchterm(*args):
146 146 raise error.SignalInterrupt
147 147
148 148 ui = req.ui
149 149 try:
150 150 for name in 'SIGBREAK', 'SIGHUP', 'SIGTERM':
151 151 num = getattr(signal, name, None)
152 152 if num:
153 153 signal.signal(num, catchterm)
154 154 except ValueError:
155 155 pass # happens if called in a thread
156 156
157 157 def _runcatchfunc():
158 158 try:
159 159 debugger = 'pdb'
160 160 debugtrace = {
161 161 'pdb' : pdb.set_trace
162 162 }
163 163 debugmortem = {
164 164 'pdb' : pdb.post_mortem
165 165 }
166 166
167 167 # read --config before doing anything else
168 168 # (e.g. to change trust settings for reading .hg/hgrc)
169 169 cfgs = _parseconfig(req.ui, _earlygetopt(['--config'], req.args))
170 170
171 171 if req.repo:
172 172 # copy configs that were passed on the cmdline (--config) to
173 173 # the repo ui
174 174 for sec, name, val in cfgs:
175 175 req.repo.ui.setconfig(sec, name, val, source='--config')
176 176
177 177 # developer config: ui.debugger
178 178 debugger = ui.config("ui", "debugger")
179 179 debugmod = pdb
180 180 if not debugger or ui.plain():
181 181 # if we are in HGPLAIN mode, then disable custom debugging
182 182 debugger = 'pdb'
183 183 elif '--debugger' in req.args:
184 184 # This import can be slow for fancy debuggers, so only
185 185 # do it when absolutely necessary, i.e. when actual
186 186 # debugging has been requested
187 187 with demandimport.deactivated():
188 188 try:
189 189 debugmod = __import__(debugger)
190 190 except ImportError:
191 191 pass # Leave debugmod = pdb
192 192
193 193 debugtrace[debugger] = debugmod.set_trace
194 194 debugmortem[debugger] = debugmod.post_mortem
195 195
196 196 # enter the debugger before command execution
197 197 if '--debugger' in req.args:
198 198 ui.warn(_("entering debugger - "
199 199 "type c to continue starting hg or h for help\n"))
200 200
201 201 if (debugger != 'pdb' and
202 202 debugtrace[debugger] == debugtrace['pdb']):
203 203 ui.warn(_("%s debugger specified "
204 204 "but its module was not found\n") % debugger)
205 205 with demandimport.deactivated():
206 206 debugtrace[debugger]()
207 207 try:
208 208 return _dispatch(req)
209 209 finally:
210 210 ui.flush()
211 211 except: # re-raises
212 212 # enter the debugger when we hit an exception
213 213 if '--debugger' in req.args:
214 214 traceback.print_exc()
215 215 debugmortem[debugger](sys.exc_info()[2])
216 216 ui.traceback()
217 217 raise
218 218
219 219 return callcatch(ui, _runcatchfunc)
220 220
221 221 def callcatch(ui, func):
222 222 """like scmutil.callcatch but handles more high-level exceptions about
223 223 config parsing and commands. besides, use handlecommandexception to handle
224 224 uncaught exceptions.
225 225 """
226 226 try:
227 227 return scmutil.callcatch(ui, func)
228 228 except error.AmbiguousCommand as inst:
229 229 ui.warn(_("hg: command '%s' is ambiguous:\n %s\n") %
230 230 (inst.args[0], " ".join(inst.args[1])))
231 231 except error.CommandError as inst:
232 232 if inst.args[0]:
233 233 ui.warn(_("hg %s: %s\n") % (inst.args[0], inst.args[1]))
234 234 commands.help_(ui, inst.args[0], full=False, command=True)
235 235 else:
236 236 ui.warn(_("hg: %s\n") % inst.args[1])
237 237 commands.help_(ui, 'shortlist')
238 238 except error.ParseError as inst:
239 239 _formatparse(ui.warn, inst)
240 240 return -1
241 241 except error.UnknownCommand as inst:
242 242 ui.warn(_("hg: unknown command '%s'\n") % inst.args[0])
243 243 try:
244 244 # check if the command is in a disabled extension
245 245 # (but don't check for extensions themselves)
246 246 commands.help_(ui, inst.args[0], unknowncmd=True)
247 247 except (error.UnknownCommand, error.Abort):
248 248 suggested = False
249 249 if len(inst.args) == 2:
250 250 sim = _getsimilar(inst.args[1], inst.args[0])
251 251 if sim:
252 252 _reportsimilar(ui.warn, sim)
253 253 suggested = True
254 254 if not suggested:
255 255 commands.help_(ui, 'shortlist')
256 256 except IOError:
257 257 raise
258 258 except KeyboardInterrupt:
259 259 raise
260 260 except: # probably re-raises
261 261 if not handlecommandexception(ui):
262 262 raise
263 263
264 264 return -1
265 265
266 266 def aliasargs(fn, givenargs):
267 267 args = getattr(fn, 'args', [])
268 268 if args:
269 269 cmd = ' '.join(map(util.shellquote, args))
270 270
271 271 nums = []
272 272 def replacer(m):
273 273 num = int(m.group(1)) - 1
274 274 nums.append(num)
275 275 if num < len(givenargs):
276 276 return givenargs[num]
277 277 raise error.Abort(_('too few arguments for command alias'))
278 278 cmd = re.sub(r'\$(\d+|\$)', replacer, cmd)
279 279 givenargs = [x for i, x in enumerate(givenargs)
280 280 if i not in nums]
281 281 args = pycompat.shlexsplit(cmd)
282 282 return args + givenargs
283 283
284 284 def aliasinterpolate(name, args, cmd):
285 285 '''interpolate args into cmd for shell aliases
286 286
287 287 This also handles $0, $@ and "$@".
288 288 '''
289 289 # util.interpolate can't deal with "$@" (with quotes) because it's only
290 290 # built to match prefix + patterns.
291 291 replacemap = dict(('$%d' % (i + 1), arg) for i, arg in enumerate(args))
292 292 replacemap['$0'] = name
293 293 replacemap['$$'] = '$'
294 294 replacemap['$@'] = ' '.join(args)
295 295 # Typical Unix shells interpolate "$@" (with quotes) as all the positional
296 296 # parameters, separated out into words. Emulate the same behavior here by
297 297 # quoting the arguments individually. POSIX shells will then typically
298 298 # tokenize each argument into exactly one word.
299 299 replacemap['"$@"'] = ' '.join(util.shellquote(arg) for arg in args)
300 300 # escape '\$' for regex
301 301 regex = '|'.join(replacemap.keys()).replace('$', r'\$')
302 302 r = re.compile(regex)
303 303 return r.sub(lambda x: replacemap[x.group()], cmd)
304 304
305 305 class cmdalias(object):
306 306 def __init__(self, name, definition, cmdtable, source):
307 307 self.name = self.cmd = name
308 308 self.cmdname = ''
309 309 self.definition = definition
310 310 self.fn = None
311 311 self.givenargs = []
312 312 self.opts = []
313 313 self.help = ''
314 314 self.badalias = None
315 315 self.unknowncmd = False
316 316 self.source = source
317 317
318 318 try:
319 319 aliases, entry = cmdutil.findcmd(self.name, cmdtable)
320 320 for alias, e in cmdtable.iteritems():
321 321 if e is entry:
322 322 self.cmd = alias
323 323 break
324 324 self.shadows = True
325 325 except error.UnknownCommand:
326 326 self.shadows = False
327 327
328 328 if not self.definition:
329 329 self.badalias = _("no definition for alias '%s'") % self.name
330 330 return
331 331
332 332 if self.definition.startswith('!'):
333 333 self.shell = True
334 334 def fn(ui, *args):
335 335 env = {'HG_ARGS': ' '.join((self.name,) + args)}
336 336 def _checkvar(m):
337 337 if m.groups()[0] == '$':
338 338 return m.group()
339 339 elif int(m.groups()[0]) <= len(args):
340 340 return m.group()
341 341 else:
342 342 ui.debug("No argument found for substitution "
343 343 "of %i variable in alias '%s' definition."
344 344 % (int(m.groups()[0]), self.name))
345 345 return ''
346 346 cmd = re.sub(r'\$(\d+|\$)', _checkvar, self.definition[1:])
347 347 cmd = aliasinterpolate(self.name, args, cmd)
348 348 return ui.system(cmd, environ=env)
349 349 self.fn = fn
350 350 return
351 351
352 352 try:
353 353 args = pycompat.shlexsplit(self.definition)
354 354 except ValueError as inst:
355 355 self.badalias = (_("error in definition for alias '%s': %s")
356 356 % (self.name, inst))
357 357 return
358 358 self.cmdname = cmd = args.pop(0)
359 359 self.givenargs = args
360 360
361 361 for invalidarg in ("--cwd", "-R", "--repository", "--repo", "--config"):
362 362 if _earlygetopt([invalidarg], args):
363 363 self.badalias = (_("error in definition for alias '%s': %s may "
364 364 "only be given on the command line")
365 365 % (self.name, invalidarg))
366 366 return
367 367
368 368 try:
369 369 tableentry = cmdutil.findcmd(cmd, cmdtable, False)[1]
370 370 if len(tableentry) > 2:
371 371 self.fn, self.opts, self.help = tableentry
372 372 else:
373 373 self.fn, self.opts = tableentry
374 374
375 375 if self.help.startswith("hg " + cmd):
376 376 # drop prefix in old-style help lines so hg shows the alias
377 377 self.help = self.help[4 + len(cmd):]
378 378 self.__doc__ = self.fn.__doc__
379 379
380 380 except error.UnknownCommand:
381 381 self.badalias = (_("alias '%s' resolves to unknown command '%s'")
382 382 % (self.name, cmd))
383 383 self.unknowncmd = True
384 384 except error.AmbiguousCommand:
385 385 self.badalias = (_("alias '%s' resolves to ambiguous command '%s'")
386 386 % (self.name, cmd))
387 387
388 388 @property
389 389 def args(self):
390 390 args = map(util.expandpath, self.givenargs)
391 391 return aliasargs(self.fn, args)
392 392
393 393 def __getattr__(self, name):
394 394 adefaults = {'norepo': True, 'optionalrepo': False, 'inferrepo': False}
395 395 if name not in adefaults:
396 396 raise AttributeError(name)
397 397 if self.badalias or util.safehasattr(self, 'shell'):
398 398 return adefaults[name]
399 399 return getattr(self.fn, name)
400 400
401 401 def __call__(self, ui, *args, **opts):
402 402 if self.badalias:
403 403 hint = None
404 404 if self.unknowncmd:
405 405 try:
406 406 # check if the command is in a disabled extension
407 407 cmd, ext = extensions.disabledcmd(ui, self.cmdname)[:2]
408 408 hint = _("'%s' is provided by '%s' extension") % (cmd, ext)
409 409 except error.UnknownCommand:
410 410 pass
411 411 raise error.Abort(self.badalias, hint=hint)
412 412 if self.shadows:
413 413 ui.debug("alias '%s' shadows command '%s'\n" %
414 414 (self.name, self.cmdname))
415 415
416 416 ui.log('commandalias', "alias '%s' expands to '%s'\n",
417 417 self.name, self.definition)
418 418 if util.safehasattr(self, 'shell'):
419 419 return self.fn(ui, *args, **opts)
420 420 else:
421 421 try:
422 422 return util.checksignature(self.fn)(ui, *args, **opts)
423 423 except error.SignatureError:
424 424 args = ' '.join([self.cmdname] + self.args)
425 425 ui.debug("alias '%s' expands to '%s'\n" % (self.name, args))
426 426 raise
427 427
428 428 def addaliases(ui, cmdtable):
429 429 # aliases are processed after extensions have been loaded, so they
430 430 # may use extension commands. Aliases can also use other alias definitions,
431 431 # but only if they have been defined prior to the current definition.
432 432 for alias, definition in ui.configitems('alias'):
433 433 source = ui.configsource('alias', alias)
434 434 aliasdef = cmdalias(alias, definition, cmdtable, source)
435 435
436 436 try:
437 437 olddef = cmdtable[aliasdef.cmd][0]
438 438 if olddef.definition == aliasdef.definition:
439 439 continue
440 440 except (KeyError, AttributeError):
441 441 # definition might not exist or it might not be a cmdalias
442 442 pass
443 443
444 444 cmdtable[aliasdef.name] = (aliasdef, aliasdef.opts, aliasdef.help)
445 445
446 446 def _parse(ui, args):
447 447 options = {}
448 448 cmdoptions = {}
449 449
450 450 try:
451 451 args = fancyopts.fancyopts(args, commands.globalopts, options)
452 452 except getopt.GetoptError as inst:
453 453 raise error.CommandError(None, inst)
454 454
455 455 if args:
456 456 cmd, args = args[0], args[1:]
457 457 aliases, entry = cmdutil.findcmd(cmd, commands.table,
458 458 ui.configbool("ui", "strict"))
459 459 cmd = aliases[0]
460 460 args = aliasargs(entry[0], args)
461 461 defaults = ui.config("defaults", cmd)
462 462 if defaults:
463 463 args = map(util.expandpath, pycompat.shlexsplit(defaults)) + args
464 464 c = list(entry[1])
465 465 else:
466 466 cmd = None
467 467 c = []
468 468
469 469 # combine global options into local
470 470 for o in commands.globalopts:
471 471 c.append((o[0], o[1], options[o[1]], o[3]))
472 472
473 473 try:
474 474 args = fancyopts.fancyopts(args, c, cmdoptions, gnu=True)
475 475 except getopt.GetoptError as inst:
476 476 raise error.CommandError(cmd, inst)
477 477
478 478 # separate global options back out
479 479 for o in commands.globalopts:
480 480 n = o[1]
481 481 options[n] = cmdoptions[n]
482 482 del cmdoptions[n]
483 483
484 484 return (cmd, cmd and entry[0] or None, args, options, cmdoptions)
485 485
486 486 def _parseconfig(ui, config):
487 487 """parse the --config options from the command line"""
488 488 configs = []
489 489
490 490 for cfg in config:
491 491 try:
492 492 name, value = [cfgelem.strip()
493 493 for cfgelem in cfg.split('=', 1)]
494 494 section, name = name.split('.', 1)
495 495 if not section or not name:
496 496 raise IndexError
497 497 ui.setconfig(section, name, value, '--config')
498 498 configs.append((section, name, value))
499 499 except (IndexError, ValueError):
500 500 raise error.Abort(_('malformed --config option: %r '
501 501 '(use --config section.name=value)') % cfg)
502 502
503 503 return configs
504 504
505 505 def _earlygetopt(aliases, args):
506 506 """Return list of values for an option (or aliases).
507 507
508 508 The values are listed in the order they appear in args.
509 509 The options and values are removed from args.
510 510
511 511 >>> args = ['x', '--cwd', 'foo', 'y']
512 512 >>> _earlygetopt(['--cwd'], args), args
513 513 (['foo'], ['x', 'y'])
514 514
515 515 >>> args = ['x', '--cwd=bar', 'y']
516 516 >>> _earlygetopt(['--cwd'], args), args
517 517 (['bar'], ['x', 'y'])
518 518
519 519 >>> args = ['x', '-R', 'foo', 'y']
520 520 >>> _earlygetopt(['-R'], args), args
521 521 (['foo'], ['x', 'y'])
522 522
523 523 >>> args = ['x', '-Rbar', 'y']
524 524 >>> _earlygetopt(['-R'], args), args
525 525 (['bar'], ['x', 'y'])
526 526 """
527 527 try:
528 528 argcount = args.index("--")
529 529 except ValueError:
530 530 argcount = len(args)
531 531 shortopts = [opt for opt in aliases if len(opt) == 2]
532 532 values = []
533 533 pos = 0
534 534 while pos < argcount:
535 535 fullarg = arg = args[pos]
536 536 equals = arg.find('=')
537 537 if equals > -1:
538 538 arg = arg[:equals]
539 539 if arg in aliases:
540 540 del args[pos]
541 541 if equals > -1:
542 542 values.append(fullarg[equals + 1:])
543 543 argcount -= 1
544 544 else:
545 545 if pos + 1 >= argcount:
546 546 # ignore and let getopt report an error if there is no value
547 547 break
548 548 values.append(args.pop(pos))
549 549 argcount -= 2
550 550 elif arg[:2] in shortopts:
551 551 # short option can have no following space, e.g. hg log -Rfoo
552 552 values.append(args.pop(pos)[2:])
553 553 argcount -= 1
554 554 else:
555 555 pos += 1
556 556 return values
557 557
558 558 def runcommand(lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions):
559 559 # run pre-hook, and abort if it fails
560 560 hook.hook(lui, repo, "pre-%s" % cmd, True, args=" ".join(fullargs),
561 561 pats=cmdpats, opts=cmdoptions)
562 562 try:
563 563 ret = _runcommand(ui, options, cmd, d)
564 564 # run post-hook, passing command result
565 565 hook.hook(lui, repo, "post-%s" % cmd, False, args=" ".join(fullargs),
566 566 result=ret, pats=cmdpats, opts=cmdoptions)
567 567 except Exception:
568 568 # run failure hook and re-raise
569 569 hook.hook(lui, repo, "fail-%s" % cmd, False, args=" ".join(fullargs),
570 570 pats=cmdpats, opts=cmdoptions)
571 571 raise
572 572 return ret
573 573
574 574 def _getlocal(ui, rpath, wd=None):
575 575 """Return (path, local ui object) for the given target path.
576 576
577 577 Takes paths in [cwd]/.hg/hgrc into account."
578 578 """
579 579 if wd is None:
580 580 try:
581 581 wd = pycompat.getcwd()
582 582 except OSError as e:
583 583 raise error.Abort(_("error getting current working directory: %s") %
584 584 e.strerror)
585 585 path = cmdutil.findrepo(wd) or ""
586 586 if not path:
587 587 lui = ui
588 588 else:
589 589 lui = ui.copy()
590 590 lui.readconfig(os.path.join(path, ".hg", "hgrc"), path)
591 591
592 592 if rpath and rpath[-1]:
593 593 path = lui.expandpath(rpath[-1])
594 594 lui = ui.copy()
595 595 lui.readconfig(os.path.join(path, ".hg", "hgrc"), path)
596 596
597 597 return path, lui
598 598
599 599 def _checkshellalias(lui, ui, args):
600 600 """Return the function to run the shell alias, if it is required"""
601 601 options = {}
602 602
603 603 try:
604 604 args = fancyopts.fancyopts(args, commands.globalopts, options)
605 605 except getopt.GetoptError:
606 606 return
607 607
608 608 if not args:
609 609 return
610 610
611 611 cmdtable = commands.table
612 612
613 613 cmd = args[0]
614 614 try:
615 615 strict = ui.configbool("ui", "strict")
616 616 aliases, entry = cmdutil.findcmd(cmd, cmdtable, strict)
617 617 except (error.AmbiguousCommand, error.UnknownCommand):
618 618 return
619 619
620 620 cmd = aliases[0]
621 621 fn = entry[0]
622 622
623 623 if cmd and util.safehasattr(fn, 'shell'):
624 624 d = lambda: fn(ui, *args[1:])
625 625 return lambda: runcommand(lui, None, cmd, args[:1], ui, options, d,
626 626 [], {})
627 627
628 628 _loaded = set()
629 629
630 630 # list of (objname, loadermod, loadername) tuple:
631 631 # - objname is the name of an object in extension module, from which
632 632 # extra information is loaded
633 633 # - loadermod is the module where loader is placed
634 634 # - loadername is the name of the function, which takes (ui, extensionname,
635 635 # extraobj) arguments
636 636 extraloaders = [
637 637 ('cmdtable', commands, 'loadcmdtable'),
638 638 ('colortable', color, 'loadcolortable'),
639 639 ('filesetpredicate', fileset, 'loadpredicate'),
640 640 ('revsetpredicate', revset, 'loadpredicate'),
641 641 ('templatefilter', templatefilters, 'loadfilter'),
642 642 ('templatefunc', templater, 'loadfunction'),
643 643 ('templatekeyword', templatekw, 'loadkeyword'),
644 644 ]
645 645
646 646 def _dispatch(req):
647 647 args = req.args
648 648 ui = req.ui
649 649
650 650 # check for cwd
651 651 cwd = _earlygetopt(['--cwd'], args)
652 652 if cwd:
653 653 os.chdir(cwd[-1])
654 654
655 655 rpath = _earlygetopt(["-R", "--repository", "--repo"], args)
656 656 path, lui = _getlocal(ui, rpath)
657 657
658 658 # Side-effect of accessing is debugcommands module is guaranteed to be
659 659 # imported and commands.table is populated.
660 660 debugcommands.command
661 661
662 662 uis = set([ui, lui])
663 663
664 664 if req.repo:
665 665 uis.add(req.repo.ui)
666 666
667 667 if '--profile' in args:
668 668 for ui_ in uis:
669 669 ui_.setconfig('profiling', 'enabled', 'true', '--profile')
670 670
671 671 with profiling.maybeprofile(lui):
672 672 # Configure extensions in phases: uisetup, extsetup, cmdtable, and
673 673 # reposetup. Programs like TortoiseHg will call _dispatch several
674 674 # times so we keep track of configured extensions in _loaded.
675 675 extensions.loadall(lui)
676 676 exts = [ext for ext in extensions.extensions() if ext[0] not in _loaded]
677 677 # Propagate any changes to lui.__class__ by extensions
678 678 ui.__class__ = lui.__class__
679 679
680 680 # (uisetup and extsetup are handled in extensions.loadall)
681 681
682 682 for name, module in exts:
683 683 for objname, loadermod, loadername in extraloaders:
684 684 extraobj = getattr(module, objname, None)
685 685 if extraobj is not None:
686 686 getattr(loadermod, loadername)(ui, name, extraobj)
687 687 _loaded.add(name)
688 688
689 689 # (reposetup is handled in hg.repository)
690 690
691 691 addaliases(lui, commands.table)
692 692
693 693 # All aliases and commands are completely defined, now.
694 694 # Check abbreviation/ambiguity of shell alias.
695 695 shellaliasfn = _checkshellalias(lui, ui, args)
696 696 if shellaliasfn:
697 697 return shellaliasfn()
698 698
699 699 # check for fallback encoding
700 700 fallback = lui.config('ui', 'fallbackencoding')
701 701 if fallback:
702 702 encoding.fallbackencoding = fallback
703 703
704 704 fullargs = args
705 705 cmd, func, args, options, cmdoptions = _parse(lui, args)
706 706
707 707 if options["config"]:
708 708 raise error.Abort(_("option --config may not be abbreviated!"))
709 709 if options["cwd"]:
710 710 raise error.Abort(_("option --cwd may not be abbreviated!"))
711 711 if options["repository"]:
712 712 raise error.Abort(_(
713 713 "option -R has to be separated from other options (e.g. not "
714 714 "-qR) and --repository may only be abbreviated as --repo!"))
715 715
716 716 if options["encoding"]:
717 717 encoding.encoding = options["encoding"]
718 718 if options["encodingmode"]:
719 719 encoding.encodingmode = options["encodingmode"]
720 720 if options["time"]:
721 721 def get_times():
722 722 t = os.times()
723 723 if t[4] == 0.0:
724 724 # Windows leaves this as zero, so use time.clock()
725 725 t = (t[0], t[1], t[2], t[3], time.clock())
726 726 return t
727 727 s = get_times()
728 728 def print_time():
729 729 t = get_times()
730 730 ui.warn(
731 731 _("time: real %.3f secs (user %.3f+%.3f sys %.3f+%.3f)\n") %
732 732 (t[4]-s[4], t[0]-s[0], t[2]-s[2], t[1]-s[1], t[3]-s[3]))
733 733 atexit.register(print_time)
734 734
735 735 if options['verbose'] or options['debug'] or options['quiet']:
736 736 for opt in ('verbose', 'debug', 'quiet'):
737 737 val = str(bool(options[opt]))
738 738 for ui_ in uis:
739 739 ui_.setconfig('ui', opt, val, '--' + opt)
740 740
741 741 if options['traceback']:
742 742 for ui_ in uis:
743 743 ui_.setconfig('ui', 'traceback', 'on', '--traceback')
744 744
745 745 if options['noninteractive']:
746 746 for ui_ in uis:
747 747 ui_.setconfig('ui', 'interactive', 'off', '-y')
748 748
749 749 if cmdoptions.get('insecure', False):
750 750 for ui_ in uis:
751 751 ui_.insecureconnections = True
752 752
753 753 if options['version']:
754 754 return commands.version_(ui)
755 755 if options['help']:
756 756 return commands.help_(ui, cmd, command=cmd is not None)
757 757 elif not cmd:
758 758 return commands.help_(ui, 'shortlist')
759 759
760 760 repo = None
761 761 cmdpats = args[:]
762 762 if not func.norepo:
763 763 # use the repo from the request only if we don't have -R
764 764 if not rpath and not cwd:
765 765 repo = req.repo
766 766
767 767 if repo:
768 768 # set the descriptors of the repo ui to those of ui
769 769 repo.ui.fin = ui.fin
770 770 repo.ui.fout = ui.fout
771 771 repo.ui.ferr = ui.ferr
772 772 else:
773 773 try:
774 774 repo = hg.repository(ui, path=path)
775 775 if not repo.local():
776 776 raise error.Abort(_("repository '%s' is not local")
777 777 % path)
778 778 repo.ui.setconfig("bundle", "mainreporoot", repo.root,
779 779 'repo')
780 780 except error.RequirementError:
781 781 raise
782 782 except error.RepoError:
783 783 if rpath and rpath[-1]: # invalid -R path
784 784 raise
785 785 if not func.optionalrepo:
786 786 if func.inferrepo and args and not path:
787 787 # try to infer -R from command args
788 788 repos = map(cmdutil.findrepo, args)
789 789 guess = repos[0]
790 790 if guess and repos.count(guess) == len(repos):
791 791 req.args = ['--repository', guess] + fullargs
792 792 return _dispatch(req)
793 793 if not path:
794 794 raise error.RepoError(_("no repository found in"
795 795 " '%s' (.hg not found)")
796 796 % pycompat.getcwd())
797 797 raise
798 798 if repo:
799 799 ui = repo.ui
800 800 if options['hidden']:
801 801 repo = repo.unfiltered()
802 802 args.insert(0, repo)
803 803 elif rpath:
804 804 ui.warn(_("warning: --repository ignored\n"))
805 805
806 806 msg = ' '.join(' ' in a and repr(a) or a for a in fullargs)
807 807 ui.log("command", '%s\n', msg)
808 808 strcmdopt = pycompat.strkwargs(cmdoptions)
809 809 d = lambda: util.checksignature(func)(ui, *args, **strcmdopt)
810 810 try:
811 811 return runcommand(lui, repo, cmd, fullargs, ui, options, d,
812 812 cmdpats, cmdoptions)
813 813 finally:
814 814 if repo and repo != req.repo:
815 815 repo.close()
816 816
817 817 def _runcommand(ui, options, cmd, cmdfunc):
818 818 """Run a command function, possibly with profiling enabled."""
819 819 try:
820 820 return cmdfunc()
821 821 except error.SignatureError:
822 822 raise error.CommandError(cmd, _('invalid arguments'))
823 823
824 824 def _exceptionwarning(ui):
825 825 """Produce a warning message for the current active exception"""
826 826
827 827 # For compatibility checking, we discard the portion of the hg
828 828 # version after the + on the assumption that if a "normal
829 829 # user" is running a build with a + in it the packager
830 830 # probably built from fairly close to a tag and anyone with a
831 831 # 'make local' copy of hg (where the version number can be out
832 832 # of date) will be clueful enough to notice the implausible
833 833 # version number and try updating.
834 834 ct = util.versiontuple(n=2)
835 835 worst = None, ct, ''
836 836 if ui.config('ui', 'supportcontact', None) is None:
837 837 for name, mod in extensions.extensions():
838 838 testedwith = getattr(mod, 'testedwith', '')
839 839 report = getattr(mod, 'buglink', _('the extension author.'))
840 840 if not testedwith.strip():
841 841 # We found an untested extension. It's likely the culprit.
842 842 worst = name, 'unknown', report
843 843 break
844 844
845 845 # Never blame on extensions bundled with Mercurial.
846 846 if extensions.ismoduleinternal(mod):
847 847 continue
848 848
849 849 tested = [util.versiontuple(t, 2) for t in testedwith.split()]
850 850 if ct in tested:
851 851 continue
852 852
853 853 lower = [t for t in tested if t < ct]
854 854 nearest = max(lower or tested)
855 855 if worst[0] is None or nearest < worst[1]:
856 856 worst = name, nearest, report
857 857 if worst[0] is not None:
858 858 name, testedwith, report = worst
859 859 if not isinstance(testedwith, str):
860 860 testedwith = '.'.join([str(c) for c in testedwith])
861 861 warning = (_('** Unknown exception encountered with '
862 862 'possibly-broken third-party extension %s\n'
863 863 '** which supports versions %s of Mercurial.\n'
864 864 '** Please disable %s and try your action again.\n'
865 865 '** If that fixes the bug please report it to %s\n')
866 866 % (name, testedwith, name, report))
867 867 else:
868 868 bugtracker = ui.config('ui', 'supportcontact', None)
869 869 if bugtracker is None:
870 870 bugtracker = _("https://mercurial-scm.org/wiki/BugTracker")
871 871 warning = (_("** unknown exception encountered, "
872 872 "please report by visiting\n** ") + bugtracker + '\n')
873 873 warning += ((_("** Python %s\n") % sys.version.replace('\n', '')) +
874 874 (_("** Mercurial Distributed SCM (version %s)\n") %
875 875 util.version()) +
876 876 (_("** Extensions loaded: %s\n") %
877 877 ", ".join([x[0] for x in extensions.extensions()])))
878 878 return warning
879 879
880 880 def handlecommandexception(ui):
881 881 """Produce a warning message for broken commands
882 882
883 883 Called when handling an exception; the exception is reraised if
884 884 this function returns False, ignored otherwise.
885 885 """
886 886 warning = _exceptionwarning(ui)
887 887 ui.log("commandexception", "%s\n%s\n", warning, traceback.format_exc())
888 888 ui.warn(warning)
889 889 return False # re-raise the exception
@@ -1,266 +1,265 b''
1 1 # hook.py - hook support for mercurial
2 2 #
3 3 # Copyright 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 import time
13 12
14 13 from .i18n import _
15 14 from . import (
16 15 demandimport,
17 16 error,
18 17 extensions,
19 18 pycompat,
20 19 util,
21 20 )
22 21
23 22 def _pythonhook(ui, repo, name, hname, funcname, args, throw):
24 23 '''call python hook. hook is callable object, looked up as
25 24 name in python module. if callable returns "true", hook
26 25 fails, else passes. if hook raises exception, treated as
27 26 hook failure. exception propagates if throw is "true".
28 27
29 28 reason for "true" meaning "hook failed" is so that
30 29 unmodified commands (e.g. mercurial.commands.update) can
31 30 be run as hooks without wrappers to convert return values.'''
32 31
33 32 if callable(funcname):
34 33 obj = funcname
35 34 funcname = obj.__module__ + "." + obj.__name__
36 35 else:
37 36 d = funcname.rfind('.')
38 37 if d == -1:
39 38 raise error.HookLoadError(
40 39 _('%s hook is invalid: "%s" not in a module')
41 40 % (hname, funcname))
42 41 modname = funcname[:d]
43 42 oldpaths = sys.path
44 43 if util.mainfrozen():
45 44 # binary installs require sys.path manipulation
46 45 modpath, modfile = os.path.split(modname)
47 46 if modpath and modfile:
48 47 sys.path = sys.path[:] + [modpath]
49 48 modname = modfile
50 49 with demandimport.deactivated():
51 50 try:
52 51 obj = __import__(modname)
53 52 except (ImportError, SyntaxError):
54 53 e1 = sys.exc_info()
55 54 try:
56 55 # extensions are loaded with hgext_ prefix
57 56 obj = __import__("hgext_%s" % modname)
58 57 except (ImportError, SyntaxError):
59 58 e2 = sys.exc_info()
60 59 if ui.tracebackflag:
61 60 ui.warn(_('exception from first failed import '
62 61 'attempt:\n'))
63 62 ui.traceback(e1)
64 63 if ui.tracebackflag:
65 64 ui.warn(_('exception from second failed import '
66 65 'attempt:\n'))
67 66 ui.traceback(e2)
68 67
69 68 if not ui.tracebackflag:
70 69 tracebackhint = _(
71 70 'run with --traceback for stack trace')
72 71 else:
73 72 tracebackhint = None
74 73 raise error.HookLoadError(
75 74 _('%s hook is invalid: import of "%s" failed') %
76 75 (hname, modname), hint=tracebackhint)
77 76 sys.path = oldpaths
78 77 try:
79 78 for p in funcname.split('.')[1:]:
80 79 obj = getattr(obj, p)
81 80 except AttributeError:
82 81 raise error.HookLoadError(
83 82 _('%s hook is invalid: "%s" is not defined')
84 83 % (hname, funcname))
85 84 if not callable(obj):
86 85 raise error.HookLoadError(
87 86 _('%s hook is invalid: "%s" is not callable')
88 87 % (hname, funcname))
89 88
90 89 ui.note(_("calling hook %s: %s\n") % (hname, funcname))
91 starttime = time.time()
90 starttime = util.timer()
92 91
93 92 try:
94 93 r = obj(ui=ui, repo=repo, hooktype=name, **args)
95 94 except Exception as exc:
96 95 if isinstance(exc, error.Abort):
97 96 ui.warn(_('error: %s hook failed: %s\n') %
98 97 (hname, exc.args[0]))
99 98 else:
100 99 ui.warn(_('error: %s hook raised an exception: '
101 100 '%s\n') % (hname, exc))
102 101 if throw:
103 102 raise
104 103 if not ui.tracebackflag:
105 104 ui.warn(_('(run with --traceback for stack trace)\n'))
106 105 ui.traceback()
107 106 return True, True
108 107 finally:
109 duration = time.time() - starttime
108 duration = util.timer() - starttime
110 109 ui.log('pythonhook', 'pythonhook-%s: %s finished in %0.2f seconds\n',
111 110 name, funcname, duration)
112 111 if r:
113 112 if throw:
114 113 raise error.HookAbort(_('%s hook failed') % hname)
115 114 ui.warn(_('warning: %s hook failed\n') % hname)
116 115 return r, False
117 116
118 117 def _exthook(ui, repo, name, cmd, args, throw):
119 118 ui.note(_("running hook %s: %s\n") % (name, cmd))
120 119
121 starttime = time.time()
120 starttime = util.timer()
122 121 env = {}
123 122
124 123 # make in-memory changes visible to external process
125 124 if repo is not None:
126 125 tr = repo.currenttransaction()
127 126 repo.dirstate.write(tr)
128 127 if tr and tr.writepending():
129 128 env['HG_PENDING'] = repo.root
130 129
131 130 for k, v in args.iteritems():
132 131 if callable(v):
133 132 v = v()
134 133 if isinstance(v, dict):
135 134 # make the dictionary element order stable across Python
136 135 # implementations
137 136 v = ('{' +
138 137 ', '.join('%r: %r' % i for i in sorted(v.iteritems())) +
139 138 '}')
140 139 env['HG_' + k.upper()] = v
141 140
142 141 if repo:
143 142 cwd = repo.root
144 143 else:
145 144 cwd = pycompat.getcwd()
146 145 r = ui.system(cmd, environ=env, cwd=cwd)
147 146
148 duration = time.time() - starttime
147 duration = util.timer() - starttime
149 148 ui.log('exthook', 'exthook-%s: %s finished in %0.2f seconds\n',
150 149 name, cmd, duration)
151 150 if r:
152 151 desc, r = util.explainexit(r)
153 152 if throw:
154 153 raise error.HookAbort(_('%s hook %s') % (name, desc))
155 154 ui.warn(_('warning: %s hook %s\n') % (name, desc))
156 155 return r
157 156
158 157 # represent an untrusted hook command
159 158 _fromuntrusted = object()
160 159
161 160 def _allhooks(ui):
162 161 """return a list of (hook-id, cmd) pairs sorted by priority"""
163 162 hooks = _hookitems(ui)
164 163 # Be careful in this section, propagating the real commands from untrusted
165 164 # sources would create a security vulnerability, make sure anything altered
166 165 # in that section uses "_fromuntrusted" as its command.
167 166 untrustedhooks = _hookitems(ui, _untrusted=True)
168 167 for name, value in untrustedhooks.items():
169 168 trustedvalue = hooks.get(name, (None, None, name, _fromuntrusted))
170 169 if value != trustedvalue:
171 170 (lp, lo, lk, lv) = trustedvalue
172 171 hooks[name] = (lp, lo, lk, _fromuntrusted)
173 172 # (end of the security sensitive section)
174 173 return [(k, v) for p, o, k, v in sorted(hooks.values())]
175 174
176 175 def _hookitems(ui, _untrusted=False):
177 176 """return all hooks items ready to be sorted"""
178 177 hooks = {}
179 178 for name, cmd in ui.configitems('hooks', untrusted=_untrusted):
180 179 if not name.startswith('priority'):
181 180 priority = ui.configint('hooks', 'priority.%s' % name, 0)
182 181 hooks[name] = (-priority, len(hooks), name, cmd)
183 182 return hooks
184 183
185 184 _redirect = False
186 185 def redirect(state):
187 186 global _redirect
188 187 _redirect = state
189 188
190 189 def hook(ui, repo, name, throw=False, **args):
191 190 if not ui.callhooks:
192 191 return False
193 192
194 193 hooks = []
195 194 for hname, cmd in _allhooks(ui):
196 195 if hname.split('.')[0] == name and cmd:
197 196 hooks.append((hname, cmd))
198 197
199 198 res = runhooks(ui, repo, name, hooks, throw=throw, **args)
200 199 r = False
201 200 for hname, cmd in hooks:
202 201 r = res[hname][0] or r
203 202 return r
204 203
205 204 def runhooks(ui, repo, name, hooks, throw=False, **args):
206 205 res = {}
207 206 oldstdout = -1
208 207
209 208 try:
210 209 for hname, cmd in hooks:
211 210 if oldstdout == -1 and _redirect:
212 211 try:
213 212 stdoutno = util.stdout.fileno()
214 213 stderrno = util.stderr.fileno()
215 214 # temporarily redirect stdout to stderr, if possible
216 215 if stdoutno >= 0 and stderrno >= 0:
217 216 util.stdout.flush()
218 217 oldstdout = os.dup(stdoutno)
219 218 os.dup2(stderrno, stdoutno)
220 219 except (OSError, AttributeError):
221 220 # files seem to be bogus, give up on redirecting (WSGI, etc)
222 221 pass
223 222
224 223 if cmd is _fromuntrusted:
225 224 if throw:
226 225 raise error.HookAbort(
227 226 _('untrusted hook %s not executed') % name,
228 227 hint = _("see 'hg help config.trusted'"))
229 228 ui.warn(_('warning: untrusted hook %s not executed\n') % name)
230 229 r = 1
231 230 raised = False
232 231 elif callable(cmd):
233 232 r, raised = _pythonhook(ui, repo, name, hname, cmd, args, throw)
234 233 elif cmd.startswith('python:'):
235 234 if cmd.count(':') >= 2:
236 235 path, cmd = cmd[7:].rsplit(':', 1)
237 236 path = util.expandpath(path)
238 237 if repo:
239 238 path = os.path.join(repo.root, path)
240 239 try:
241 240 mod = extensions.loadpath(path, 'hghook.%s' % hname)
242 241 except Exception:
243 242 ui.write(_("loading %s hook failed:\n") % hname)
244 243 raise
245 244 hookfn = getattr(mod, cmd)
246 245 else:
247 246 hookfn = cmd[7:].strip()
248 247 r, raised = _pythonhook(ui, repo, name, hname, hookfn, args,
249 248 throw)
250 249 else:
251 250 r = _exthook(ui, repo, hname, cmd, args, throw)
252 251 raised = False
253 252
254 253 res[hname] = r, raised
255 254
256 255 # The stderr is fully buffered on Windows when connected to a pipe.
257 256 # A forcible flush is required to make small stderr data in the
258 257 # remote side available to the client immediately.
259 258 util.stderr.flush()
260 259 finally:
261 260 if _redirect and oldstdout >= 0:
262 261 util.stdout.flush() # write hook output to stderr fd
263 262 os.dup2(oldstdout, stdoutno)
264 263 os.close(oldstdout)
265 264
266 265 return res
@@ -1,193 +1,192 b''
1 1 # profiling.py - profiling functions
2 2 #
3 3 # Copyright 2016 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import contextlib
11 import time
12 11
13 12 from .i18n import _
14 13 from . import (
15 14 encoding,
16 15 error,
17 16 util,
18 17 )
19 18
20 19 @contextlib.contextmanager
21 20 def lsprofile(ui, fp):
22 21 format = ui.config('profiling', 'format', default='text')
23 22 field = ui.config('profiling', 'sort', default='inlinetime')
24 23 limit = ui.configint('profiling', 'limit', default=30)
25 24 climit = ui.configint('profiling', 'nested', default=0)
26 25
27 26 if format not in ['text', 'kcachegrind']:
28 27 ui.warn(_("unrecognized profiling format '%s'"
29 28 " - Ignored\n") % format)
30 29 format = 'text'
31 30
32 31 try:
33 32 from . import lsprof
34 33 except ImportError:
35 34 raise error.Abort(_(
36 35 'lsprof not available - install from '
37 36 'http://codespeak.net/svn/user/arigo/hack/misc/lsprof/'))
38 37 p = lsprof.Profiler()
39 38 p.enable(subcalls=True)
40 39 try:
41 40 yield
42 41 finally:
43 42 p.disable()
44 43
45 44 if format == 'kcachegrind':
46 45 from . import lsprofcalltree
47 46 calltree = lsprofcalltree.KCacheGrind(p)
48 47 calltree.output(fp)
49 48 else:
50 49 # format == 'text'
51 50 stats = lsprof.Stats(p.getstats())
52 51 stats.sort(field)
53 52 stats.pprint(limit=limit, file=fp, climit=climit)
54 53
55 54 @contextlib.contextmanager
56 55 def flameprofile(ui, fp):
57 56 try:
58 57 from flamegraph import flamegraph
59 58 except ImportError:
60 59 raise error.Abort(_(
61 60 'flamegraph not available - install from '
62 61 'https://github.com/evanhempel/python-flamegraph'))
63 62 # developer config: profiling.freq
64 63 freq = ui.configint('profiling', 'freq', default=1000)
65 64 filter_ = None
66 65 collapse_recursion = True
67 66 thread = flamegraph.ProfileThread(fp, 1.0 / freq,
68 67 filter_, collapse_recursion)
69 start_time = time.clock()
68 start_time = util.timer()
70 69 try:
71 70 thread.start()
72 71 yield
73 72 finally:
74 73 thread.stop()
75 74 thread.join()
76 75 print('Collected %d stack frames (%d unique) in %2.2f seconds.' % (
77 time.clock() - start_time, thread.num_frames(),
76 util.timer() - start_time, thread.num_frames(),
78 77 thread.num_frames(unique=True)))
79 78
80 79 @contextlib.contextmanager
81 80 def statprofile(ui, fp):
82 81 from . import statprof
83 82
84 83 freq = ui.configint('profiling', 'freq', default=1000)
85 84 if freq > 0:
86 85 # Cannot reset when profiler is already active. So silently no-op.
87 86 if statprof.state.profile_level == 0:
88 87 statprof.reset(freq)
89 88 else:
90 89 ui.warn(_("invalid sampling frequency '%s' - ignoring\n") % freq)
91 90
92 91 statprof.start(mechanism='thread')
93 92
94 93 try:
95 94 yield
96 95 finally:
97 96 data = statprof.stop()
98 97
99 98 profformat = ui.config('profiling', 'statformat', 'hotpath')
100 99
101 100 formats = {
102 101 'byline': statprof.DisplayFormats.ByLine,
103 102 'bymethod': statprof.DisplayFormats.ByMethod,
104 103 'hotpath': statprof.DisplayFormats.Hotpath,
105 104 'json': statprof.DisplayFormats.Json,
106 105 'chrome': statprof.DisplayFormats.Chrome,
107 106 }
108 107
109 108 if profformat in formats:
110 109 displayformat = formats[profformat]
111 110 else:
112 111 ui.warn(_('unknown profiler output format: %s\n') % profformat)
113 112 displayformat = statprof.DisplayFormats.Hotpath
114 113
115 114 kwargs = {}
116 115
117 116 def fraction(s):
118 117 if s.endswith('%'):
119 118 v = float(s[:-1]) / 100
120 119 else:
121 120 v = float(s)
122 121 if 0 <= v <= 1:
123 122 return v
124 123 raise ValueError(s)
125 124
126 125 if profformat == 'chrome':
127 126 showmin = ui.configwith(fraction, 'profiling', 'showmin', 0.005)
128 127 showmax = ui.configwith(fraction, 'profiling', 'showmax', 0.999)
129 128 kwargs.update(minthreshold=showmin, maxthreshold=showmax)
130 129
131 130 statprof.display(fp, data=data, format=displayformat, **kwargs)
132 131
133 132 @contextlib.contextmanager
134 133 def profile(ui):
135 134 """Start profiling.
136 135
137 136 Profiling is active when the context manager is active. When the context
138 137 manager exits, profiling results will be written to the configured output.
139 138 """
140 139 profiler = encoding.environ.get('HGPROF')
141 140 if profiler is None:
142 141 profiler = ui.config('profiling', 'type', default='stat')
143 142 if profiler not in ('ls', 'stat', 'flame'):
144 143 ui.warn(_("unrecognized profiler '%s' - ignored\n") % profiler)
145 144 profiler = 'stat'
146 145
147 146 output = ui.config('profiling', 'output')
148 147
149 148 if output == 'blackbox':
150 149 fp = util.stringio()
151 150 elif output:
152 151 path = ui.expandpath(output)
153 152 fp = open(path, 'wb')
154 153 else:
155 154 fp = ui.ferr
156 155
157 156 try:
158 157 if profiler == 'ls':
159 158 proffn = lsprofile
160 159 elif profiler == 'flame':
161 160 proffn = flameprofile
162 161 else:
163 162 proffn = statprofile
164 163
165 164 with proffn(ui, fp):
166 165 yield
167 166
168 167 finally:
169 168 if output:
170 169 if output == 'blackbox':
171 170 val = 'Profile:\n%s' % fp.getvalue()
172 171 # ui.log treats the input as a format string,
173 172 # so we need to escape any % signs.
174 173 val = val.replace('%', '%%')
175 174 ui.log('profile', val)
176 175 fp.close()
177 176
178 177 @contextlib.contextmanager
179 178 def maybeprofile(ui):
180 179 """Profile if enabled, else do nothing.
181 180
182 181 This context manager can be used to optionally profile if profiling
183 182 is enabled. Otherwise, it does nothing.
184 183
185 184 The purpose of this context manager is to make calling code simpler:
186 185 just use a single code path for calling into code you may want to profile
187 186 and this function determines whether to start profiling.
188 187 """
189 188 if ui.configbool('profiling', 'enabled'):
190 189 with profile(ui):
191 190 yield
192 191 else:
193 192 yield
@@ -1,1102 +1,1101 b''
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Matt Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import hashlib
13 13 import stat
14 14 import tempfile
15 import time
16 15
17 16 from .i18n import _
18 17 from .node import short
19 18 from . import (
20 19 bundle2,
21 20 changegroup,
22 21 changelog,
23 22 error,
24 23 exchange,
25 24 manifest,
26 25 obsolete,
27 26 revlog,
28 27 scmutil,
29 28 util,
30 29 )
31 30
32 31 def _bundle(repo, bases, heads, node, suffix, compress=True):
33 32 """create a bundle with the specified revisions as a backup"""
34 33 cgversion = changegroup.safeversion(repo)
35 34
36 35 cg = changegroup.changegroupsubset(repo, bases, heads, 'strip',
37 36 version=cgversion)
38 37 backupdir = "strip-backup"
39 38 vfs = repo.vfs
40 39 if not vfs.isdir(backupdir):
41 40 vfs.mkdir(backupdir)
42 41
43 42 # Include a hash of all the nodes in the filename for uniqueness
44 43 allcommits = repo.set('%ln::%ln', bases, heads)
45 44 allhashes = sorted(c.hex() for c in allcommits)
46 45 totalhash = hashlib.sha1(''.join(allhashes)).hexdigest()
47 46 name = "%s/%s-%s-%s.hg" % (backupdir, short(node), totalhash[:8], suffix)
48 47
49 48 comp = None
50 49 if cgversion != '01':
51 50 bundletype = "HG20"
52 51 if compress:
53 52 comp = 'BZ'
54 53 elif compress:
55 54 bundletype = "HG10BZ"
56 55 else:
57 56 bundletype = "HG10UN"
58 57 return bundle2.writebundle(repo.ui, cg, name, bundletype, vfs,
59 58 compression=comp)
60 59
61 60 def _collectfiles(repo, striprev):
62 61 """find out the filelogs affected by the strip"""
63 62 files = set()
64 63
65 64 for x in xrange(striprev, len(repo)):
66 65 files.update(repo[x].files())
67 66
68 67 return sorted(files)
69 68
70 69 def _collectbrokencsets(repo, files, striprev):
71 70 """return the changesets which will be broken by the truncation"""
72 71 s = set()
73 72 def collectone(revlog):
74 73 _, brokenset = revlog.getstrippoint(striprev)
75 74 s.update([revlog.linkrev(r) for r in brokenset])
76 75
77 76 collectone(repo.manifestlog._revlog)
78 77 for fname in files:
79 78 collectone(repo.file(fname))
80 79
81 80 return s
82 81
83 82 def strip(ui, repo, nodelist, backup=True, topic='backup'):
84 83 # This function operates within a transaction of its own, but does
85 84 # not take any lock on the repo.
86 85 # Simple way to maintain backwards compatibility for this
87 86 # argument.
88 87 if backup in ['none', 'strip']:
89 88 backup = False
90 89
91 90 repo = repo.unfiltered()
92 91 repo.destroying()
93 92
94 93 cl = repo.changelog
95 94 # TODO handle undo of merge sets
96 95 if isinstance(nodelist, str):
97 96 nodelist = [nodelist]
98 97 striplist = [cl.rev(node) for node in nodelist]
99 98 striprev = min(striplist)
100 99
101 100 files = _collectfiles(repo, striprev)
102 101 saverevs = _collectbrokencsets(repo, files, striprev)
103 102
104 103 # Some revisions with rev > striprev may not be descendants of striprev.
105 104 # We have to find these revisions and put them in a bundle, so that
106 105 # we can restore them after the truncations.
107 106 # To create the bundle we use repo.changegroupsubset which requires
108 107 # the list of heads and bases of the set of interesting revisions.
109 108 # (head = revision in the set that has no descendant in the set;
110 109 # base = revision in the set that has no ancestor in the set)
111 110 tostrip = set(striplist)
112 111 saveheads = set(saverevs)
113 112 for r in cl.revs(start=striprev + 1):
114 113 if any(p in tostrip for p in cl.parentrevs(r)):
115 114 tostrip.add(r)
116 115
117 116 if r not in tostrip:
118 117 saverevs.add(r)
119 118 saveheads.difference_update(cl.parentrevs(r))
120 119 saveheads.add(r)
121 120 saveheads = [cl.node(r) for r in saveheads]
122 121
123 122 # compute base nodes
124 123 if saverevs:
125 124 descendants = set(cl.descendants(saverevs))
126 125 saverevs.difference_update(descendants)
127 126 savebases = [cl.node(r) for r in saverevs]
128 127 stripbases = [cl.node(r) for r in tostrip]
129 128
130 129 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)), but
131 130 # is much faster
132 131 newbmtarget = repo.revs('max(parents(%ld) - (%ld))', tostrip, tostrip)
133 132 if newbmtarget:
134 133 newbmtarget = repo[newbmtarget.first()].node()
135 134 else:
136 135 newbmtarget = '.'
137 136
138 137 bm = repo._bookmarks
139 138 updatebm = []
140 139 for m in bm:
141 140 rev = repo[bm[m]].rev()
142 141 if rev in tostrip:
143 142 updatebm.append(m)
144 143
145 144 # create a changegroup for all the branches we need to keep
146 145 backupfile = None
147 146 vfs = repo.vfs
148 147 node = nodelist[-1]
149 148 if backup:
150 149 backupfile = _bundle(repo, stripbases, cl.heads(), node, topic)
151 150 repo.ui.status(_("saved backup bundle to %s\n") %
152 151 vfs.join(backupfile))
153 152 repo.ui.log("backupbundle", "saved backup bundle to %s\n",
154 153 vfs.join(backupfile))
155 154 tmpbundlefile = None
156 155 if saveheads:
157 156 # do not compress temporary bundle if we remove it from disk later
158 157 tmpbundlefile = _bundle(repo, savebases, saveheads, node, 'temp',
159 158 compress=False)
160 159
161 160 mfst = repo.manifestlog._revlog
162 161
163 162 curtr = repo.currenttransaction()
164 163 if curtr is not None:
165 164 del curtr # avoid carrying reference to transaction for nothing
166 165 msg = _('programming error: cannot strip from inside a transaction')
167 166 raise error.Abort(msg, hint=_('contact your extension maintainer'))
168 167
169 168 try:
170 169 with repo.transaction("strip") as tr:
171 170 offset = len(tr.entries)
172 171
173 172 tr.startgroup()
174 173 cl.strip(striprev, tr)
175 174 mfst.strip(striprev, tr)
176 175 if 'treemanifest' in repo.requirements: # safe but unnecessary
177 176 # otherwise
178 177 for unencoded, encoded, size in repo.store.datafiles():
179 178 if (unencoded.startswith('meta/') and
180 179 unencoded.endswith('00manifest.i')):
181 180 dir = unencoded[5:-12]
182 181 repo.manifestlog._revlog.dirlog(dir).strip(striprev, tr)
183 182 for fn in files:
184 183 repo.file(fn).strip(striprev, tr)
185 184 tr.endgroup()
186 185
187 186 for i in xrange(offset, len(tr.entries)):
188 187 file, troffset, ignore = tr.entries[i]
189 188 with repo.svfs(file, 'a', checkambig=True) as fp:
190 189 fp.truncate(troffset)
191 190 if troffset == 0:
192 191 repo.store.markremoved(file)
193 192
194 193 if tmpbundlefile:
195 194 ui.note(_("adding branch\n"))
196 195 f = vfs.open(tmpbundlefile, "rb")
197 196 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
198 197 if not repo.ui.verbose:
199 198 # silence internal shuffling chatter
200 199 repo.ui.pushbuffer()
201 200 if isinstance(gen, bundle2.unbundle20):
202 201 with repo.transaction('strip') as tr:
203 202 tr.hookargs = {'source': 'strip',
204 203 'url': 'bundle:' + vfs.join(tmpbundlefile)}
205 204 bundle2.applybundle(repo, gen, tr, source='strip',
206 205 url='bundle:' + vfs.join(tmpbundlefile))
207 206 else:
208 207 gen.apply(repo, 'strip', 'bundle:' + vfs.join(tmpbundlefile),
209 208 True)
210 209 if not repo.ui.verbose:
211 210 repo.ui.popbuffer()
212 211 f.close()
213 212 repo._phasecache.invalidate()
214 213
215 214 for m in updatebm:
216 215 bm[m] = repo[newbmtarget].node()
217 216 lock = tr = None
218 217 try:
219 218 lock = repo.lock()
220 219 tr = repo.transaction('repair')
221 220 bm.recordchange(tr)
222 221 tr.close()
223 222 finally:
224 223 tr.release()
225 224 lock.release()
226 225
227 226 # remove undo files
228 227 for undovfs, undofile in repo.undofiles():
229 228 try:
230 229 undovfs.unlink(undofile)
231 230 except OSError as e:
232 231 if e.errno != errno.ENOENT:
233 232 ui.warn(_('error removing %s: %s\n') %
234 233 (undovfs.join(undofile), str(e)))
235 234
236 235 except: # re-raises
237 236 if backupfile:
238 237 ui.warn(_("strip failed, backup bundle stored in '%s'\n")
239 238 % vfs.join(backupfile))
240 239 if tmpbundlefile:
241 240 ui.warn(_("strip failed, unrecovered changes stored in '%s'\n")
242 241 % vfs.join(tmpbundlefile))
243 242 ui.warn(_("(fix the problem, then recover the changesets with "
244 243 "\"hg unbundle '%s'\")\n") % vfs.join(tmpbundlefile))
245 244 raise
246 245 else:
247 246 if tmpbundlefile:
248 247 # Remove temporary bundle only if there were no exceptions
249 248 vfs.unlink(tmpbundlefile)
250 249
251 250 repo.destroyed()
252 251 # return the backup file path (or None if 'backup' was False) so
253 252 # extensions can use it
254 253 return backupfile
255 254
256 255 def rebuildfncache(ui, repo):
257 256 """Rebuilds the fncache file from repo history.
258 257
259 258 Missing entries will be added. Extra entries will be removed.
260 259 """
261 260 repo = repo.unfiltered()
262 261
263 262 if 'fncache' not in repo.requirements:
264 263 ui.warn(_('(not rebuilding fncache because repository does not '
265 264 'support fncache)\n'))
266 265 return
267 266
268 267 with repo.lock():
269 268 fnc = repo.store.fncache
270 269 # Trigger load of fncache.
271 270 if 'irrelevant' in fnc:
272 271 pass
273 272
274 273 oldentries = set(fnc.entries)
275 274 newentries = set()
276 275 seenfiles = set()
277 276
278 277 repolen = len(repo)
279 278 for rev in repo:
280 279 ui.progress(_('rebuilding'), rev, total=repolen,
281 280 unit=_('changesets'))
282 281
283 282 ctx = repo[rev]
284 283 for f in ctx.files():
285 284 # This is to minimize I/O.
286 285 if f in seenfiles:
287 286 continue
288 287 seenfiles.add(f)
289 288
290 289 i = 'data/%s.i' % f
291 290 d = 'data/%s.d' % f
292 291
293 292 if repo.store._exists(i):
294 293 newentries.add(i)
295 294 if repo.store._exists(d):
296 295 newentries.add(d)
297 296
298 297 ui.progress(_('rebuilding'), None)
299 298
300 299 if 'treemanifest' in repo.requirements: # safe but unnecessary otherwise
301 300 for dir in util.dirs(seenfiles):
302 301 i = 'meta/%s/00manifest.i' % dir
303 302 d = 'meta/%s/00manifest.d' % dir
304 303
305 304 if repo.store._exists(i):
306 305 newentries.add(i)
307 306 if repo.store._exists(d):
308 307 newentries.add(d)
309 308
310 309 addcount = len(newentries - oldentries)
311 310 removecount = len(oldentries - newentries)
312 311 for p in sorted(oldentries - newentries):
313 312 ui.write(_('removing %s\n') % p)
314 313 for p in sorted(newentries - oldentries):
315 314 ui.write(_('adding %s\n') % p)
316 315
317 316 if addcount or removecount:
318 317 ui.write(_('%d items added, %d removed from fncache\n') %
319 318 (addcount, removecount))
320 319 fnc.entries = newentries
321 320 fnc._dirty = True
322 321
323 322 with repo.transaction('fncache') as tr:
324 323 fnc.write(tr)
325 324 else:
326 325 ui.write(_('fncache already up to date\n'))
327 326
328 327 def stripbmrevset(repo, mark):
329 328 """
330 329 The revset to strip when strip is called with -B mark
331 330
332 331 Needs to live here so extensions can use it and wrap it even when strip is
333 332 not enabled or not present on a box.
334 333 """
335 334 return repo.revs("ancestors(bookmark(%s)) - "
336 335 "ancestors(head() and not bookmark(%s)) - "
337 336 "ancestors(bookmark() and not bookmark(%s))",
338 337 mark, mark, mark)
339 338
340 339 def deleteobsmarkers(obsstore, indices):
341 340 """Delete some obsmarkers from obsstore and return how many were deleted
342 341
343 342 'indices' is a list of ints which are the indices
344 343 of the markers to be deleted.
345 344
346 345 Every invocation of this function completely rewrites the obsstore file,
347 346 skipping the markers we want to be removed. The new temporary file is
348 347 created, remaining markers are written there and on .close() this file
349 348 gets atomically renamed to obsstore, thus guaranteeing consistency."""
350 349 if not indices:
351 350 # we don't want to rewrite the obsstore with the same content
352 351 return
353 352
354 353 left = []
355 354 current = obsstore._all
356 355 n = 0
357 356 for i, m in enumerate(current):
358 357 if i in indices:
359 358 n += 1
360 359 continue
361 360 left.append(m)
362 361
363 362 newobsstorefile = obsstore.svfs('obsstore', 'w', atomictemp=True)
364 363 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
365 364 newobsstorefile.write(bytes)
366 365 newobsstorefile.close()
367 366 return n
368 367
369 368 def upgraderequiredsourcerequirements(repo):
370 369 """Obtain requirements required to be present to upgrade a repo.
371 370
372 371 An upgrade will not be allowed if the repository doesn't have the
373 372 requirements returned by this function.
374 373 """
375 374 return set([
376 375 # Introduced in Mercurial 0.9.2.
377 376 'revlogv1',
378 377 # Introduced in Mercurial 0.9.2.
379 378 'store',
380 379 ])
381 380
382 381 def upgradeblocksourcerequirements(repo):
383 382 """Obtain requirements that will prevent an upgrade from occurring.
384 383
385 384 An upgrade cannot be performed if the source repository contains a
386 385 requirements in the returned set.
387 386 """
388 387 return set([
389 388 # The upgrade code does not yet support these experimental features.
390 389 # This is an artificial limitation.
391 390 'manifestv2',
392 391 'treemanifest',
393 392 # This was a precursor to generaldelta and was never enabled by default.
394 393 # It should (hopefully) not exist in the wild.
395 394 'parentdelta',
396 395 # Upgrade should operate on the actual store, not the shared link.
397 396 'shared',
398 397 ])
399 398
400 399 def upgradesupportremovedrequirements(repo):
401 400 """Obtain requirements that can be removed during an upgrade.
402 401
403 402 If an upgrade were to create a repository that dropped a requirement,
404 403 the dropped requirement must appear in the returned set for the upgrade
405 404 to be allowed.
406 405 """
407 406 return set()
408 407
409 408 def upgradesupporteddestrequirements(repo):
410 409 """Obtain requirements that upgrade supports in the destination.
411 410
412 411 If the result of the upgrade would create requirements not in this set,
413 412 the upgrade is disallowed.
414 413
415 414 Extensions should monkeypatch this to add their custom requirements.
416 415 """
417 416 return set([
418 417 'dotencode',
419 418 'fncache',
420 419 'generaldelta',
421 420 'revlogv1',
422 421 'store',
423 422 ])
424 423
425 424 def upgradeallowednewrequirements(repo):
426 425 """Obtain requirements that can be added to a repository during upgrade.
427 426
428 427 This is used to disallow proposed requirements from being added when
429 428 they weren't present before.
430 429
431 430 We use a list of allowed requirement additions instead of a list of known
432 431 bad additions because the whitelist approach is safer and will prevent
433 432 future, unknown requirements from accidentally being added.
434 433 """
435 434 return set([
436 435 'dotencode',
437 436 'fncache',
438 437 'generaldelta',
439 438 ])
440 439
441 440 deficiency = 'deficiency'
442 441 optimisation = 'optimization'
443 442
444 443 class upgradeimprovement(object):
445 444 """Represents an improvement that can be made as part of an upgrade.
446 445
447 446 The following attributes are defined on each instance:
448 447
449 448 name
450 449 Machine-readable string uniquely identifying this improvement. It
451 450 will be mapped to an action later in the upgrade process.
452 451
453 452 type
454 453 Either ``deficiency`` or ``optimisation``. A deficiency is an obvious
455 454 problem. An optimization is an action (sometimes optional) that
456 455 can be taken to further improve the state of the repository.
457 456
458 457 description
459 458 Message intended for humans explaining the improvement in more detail,
460 459 including the implications of it. For ``deficiency`` types, should be
461 460 worded in the present tense. For ``optimisation`` types, should be
462 461 worded in the future tense.
463 462
464 463 upgrademessage
465 464 Message intended for humans explaining what an upgrade addressing this
466 465 issue will do. Should be worded in the future tense.
467 466
468 467 fromdefault (``deficiency`` types only)
469 468 Boolean indicating whether the current (deficient) state deviates
470 469 from Mercurial's default configuration.
471 470
472 471 fromconfig (``deficiency`` types only)
473 472 Boolean indicating whether the current (deficient) state deviates
474 473 from the current Mercurial configuration.
475 474 """
476 475 def __init__(self, name, type, description, upgrademessage, **kwargs):
477 476 self.name = name
478 477 self.type = type
479 478 self.description = description
480 479 self.upgrademessage = upgrademessage
481 480
482 481 for k, v in kwargs.items():
483 482 setattr(self, k, v)
484 483
485 484 def upgradefindimprovements(repo):
486 485 """Determine improvements that can be made to the repo during upgrade.
487 486
488 487 Returns a list of ``upgradeimprovement`` describing repository deficiencies
489 488 and optimizations.
490 489 """
491 490 # Avoid cycle: cmdutil -> repair -> localrepo -> cmdutil
492 491 from . import localrepo
493 492
494 493 newreporeqs = localrepo.newreporequirements(repo)
495 494
496 495 improvements = []
497 496
498 497 # We could detect lack of revlogv1 and store here, but they were added
499 498 # in 0.9.2 and we don't support upgrading repos without these
500 499 # requirements, so let's not bother.
501 500
502 501 if 'fncache' not in repo.requirements:
503 502 improvements.append(upgradeimprovement(
504 503 name='fncache',
505 504 type=deficiency,
506 505 description=_('long and reserved filenames may not work correctly; '
507 506 'repository performance is sub-optimal'),
508 507 upgrademessage=_('repository will be more resilient to storing '
509 508 'certain paths and performance of certain '
510 509 'operations should be improved'),
511 510 fromdefault=True,
512 511 fromconfig='fncache' in newreporeqs))
513 512
514 513 if 'dotencode' not in repo.requirements:
515 514 improvements.append(upgradeimprovement(
516 515 name='dotencode',
517 516 type=deficiency,
518 517 description=_('storage of filenames beginning with a period or '
519 518 'space may not work correctly'),
520 519 upgrademessage=_('repository will be better able to store files '
521 520 'beginning with a space or period'),
522 521 fromdefault=True,
523 522 fromconfig='dotencode' in newreporeqs))
524 523
525 524 if 'generaldelta' not in repo.requirements:
526 525 improvements.append(upgradeimprovement(
527 526 name='generaldelta',
528 527 type=deficiency,
529 528 description=_('deltas within internal storage are unable to '
530 529 'choose optimal revisions; repository is larger and '
531 530 'slower than it could be; interaction with other '
532 531 'repositories may require extra network and CPU '
533 532 'resources, making "hg push" and "hg pull" slower'),
534 533 upgrademessage=_('repository storage will be able to create '
535 534 'optimal deltas; new repository data will be '
536 535 'smaller and read times should decrease; '
537 536 'interacting with other repositories using this '
538 537 'storage model should require less network and '
539 538 'CPU resources, making "hg push" and "hg pull" '
540 539 'faster'),
541 540 fromdefault=True,
542 541 fromconfig='generaldelta' in newreporeqs))
543 542
544 543 # Mercurial 4.0 changed changelogs to not use delta chains. Search for
545 544 # changelogs with deltas.
546 545 cl = repo.changelog
547 546 for rev in cl:
548 547 chainbase = cl.chainbase(rev)
549 548 if chainbase != rev:
550 549 improvements.append(upgradeimprovement(
551 550 name='removecldeltachain',
552 551 type=deficiency,
553 552 description=_('changelog storage is using deltas instead of '
554 553 'raw entries; changelog reading and any '
555 554 'operation relying on changelog data are slower '
556 555 'than they could be'),
557 556 upgrademessage=_('changelog storage will be reformated to '
558 557 'store raw entries; changelog reading will be '
559 558 'faster; changelog size may be reduced'),
560 559 fromdefault=True,
561 560 fromconfig=True))
562 561 break
563 562
564 563 # Now for the optimizations.
565 564
566 565 # These are unconditionally added. There is logic later that figures out
567 566 # which ones to apply.
568 567
569 568 improvements.append(upgradeimprovement(
570 569 name='redeltaparent',
571 570 type=optimisation,
572 571 description=_('deltas within internal storage will be recalculated to '
573 572 'choose an optimal base revision where this was not '
574 573 'already done; the size of the repository may shrink and '
575 574 'various operations may become faster; the first time '
576 575 'this optimization is performed could slow down upgrade '
577 576 'execution considerably; subsequent invocations should '
578 577 'not run noticeably slower'),
579 578 upgrademessage=_('deltas within internal storage will choose a new '
580 579 'base revision if needed')))
581 580
582 581 improvements.append(upgradeimprovement(
583 582 name='redeltamultibase',
584 583 type=optimisation,
585 584 description=_('deltas within internal storage will be recalculated '
586 585 'against multiple base revision and the smallest '
587 586 'difference will be used; the size of the repository may '
588 587 'shrink significantly when there are many merges; this '
589 588 'optimization will slow down execution in proportion to '
590 589 'the number of merges in the repository and the amount '
591 590 'of files in the repository; this slow down should not '
592 591 'be significant unless there are tens of thousands of '
593 592 'files and thousands of merges'),
594 593 upgrademessage=_('deltas within internal storage will choose an '
595 594 'optimal delta by computing deltas against multiple '
596 595 'parents; may slow down execution time '
597 596 'significantly')))
598 597
599 598 improvements.append(upgradeimprovement(
600 599 name='redeltaall',
601 600 type=optimisation,
602 601 description=_('deltas within internal storage will always be '
603 602 'recalculated without reusing prior deltas; this will '
604 603 'likely make execution run several times slower; this '
605 604 'optimization is typically not needed'),
606 605 upgrademessage=_('deltas within internal storage will be fully '
607 606 'recomputed; this will likely drastically slow down '
608 607 'execution time')))
609 608
610 609 return improvements
611 610
612 611 def upgradedetermineactions(repo, improvements, sourcereqs, destreqs,
613 612 optimize):
614 613 """Determine upgrade actions that will be performed.
615 614
616 615 Given a list of improvements as returned by ``upgradefindimprovements``,
617 616 determine the list of upgrade actions that will be performed.
618 617
619 618 The role of this function is to filter improvements if needed, apply
620 619 recommended optimizations from the improvements list that make sense,
621 620 etc.
622 621
623 622 Returns a list of action names.
624 623 """
625 624 newactions = []
626 625
627 626 knownreqs = upgradesupporteddestrequirements(repo)
628 627
629 628 for i in improvements:
630 629 name = i.name
631 630
632 631 # If the action is a requirement that doesn't show up in the
633 632 # destination requirements, prune the action.
634 633 if name in knownreqs and name not in destreqs:
635 634 continue
636 635
637 636 if i.type == deficiency:
638 637 newactions.append(name)
639 638
640 639 newactions.extend(o for o in sorted(optimize) if o not in newactions)
641 640
642 641 # FUTURE consider adding some optimizations here for certain transitions.
643 642 # e.g. adding generaldelta could schedule parent redeltas.
644 643
645 644 return newactions
646 645
647 646 def _revlogfrompath(repo, path):
648 647 """Obtain a revlog from a repo path.
649 648
650 649 An instance of the appropriate class is returned.
651 650 """
652 651 if path == '00changelog.i':
653 652 return changelog.changelog(repo.svfs)
654 653 elif path.endswith('00manifest.i'):
655 654 mandir = path[:-len('00manifest.i')]
656 655 return manifest.manifestrevlog(repo.svfs, dir=mandir)
657 656 else:
658 657 # Filelogs don't do anything special with settings. So we can use a
659 658 # vanilla revlog.
660 659 return revlog.revlog(repo.svfs, path)
661 660
662 661 def _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse, aggressivemergedeltas):
663 662 """Copy revlogs between 2 repos."""
664 663 revcount = 0
665 664 srcsize = 0
666 665 srcrawsize = 0
667 666 dstsize = 0
668 667 fcount = 0
669 668 frevcount = 0
670 669 fsrcsize = 0
671 670 frawsize = 0
672 671 fdstsize = 0
673 672 mcount = 0
674 673 mrevcount = 0
675 674 msrcsize = 0
676 675 mrawsize = 0
677 676 mdstsize = 0
678 677 crevcount = 0
679 678 csrcsize = 0
680 679 crawsize = 0
681 680 cdstsize = 0
682 681
683 682 # Perform a pass to collect metadata. This validates we can open all
684 683 # source files and allows a unified progress bar to be displayed.
685 684 for unencoded, encoded, size in srcrepo.store.walk():
686 685 if unencoded.endswith('.d'):
687 686 continue
688 687
689 688 rl = _revlogfrompath(srcrepo, unencoded)
690 689 revcount += len(rl)
691 690
692 691 datasize = 0
693 692 rawsize = 0
694 693 idx = rl.index
695 694 for rev in rl:
696 695 e = idx[rev]
697 696 datasize += e[1]
698 697 rawsize += e[2]
699 698
700 699 srcsize += datasize
701 700 srcrawsize += rawsize
702 701
703 702 # This is for the separate progress bars.
704 703 if isinstance(rl, changelog.changelog):
705 704 crevcount += len(rl)
706 705 csrcsize += datasize
707 706 crawsize += rawsize
708 707 elif isinstance(rl, manifest.manifestrevlog):
709 708 mcount += 1
710 709 mrevcount += len(rl)
711 710 msrcsize += datasize
712 711 mrawsize += rawsize
713 712 elif isinstance(rl, revlog.revlog):
714 713 fcount += 1
715 714 frevcount += len(rl)
716 715 fsrcsize += datasize
717 716 frawsize += rawsize
718 717
719 718 if not revcount:
720 719 return
721 720
722 721 ui.write(_('migrating %d total revisions (%d in filelogs, %d in manifests, '
723 722 '%d in changelog)\n') %
724 723 (revcount, frevcount, mrevcount, crevcount))
725 724 ui.write(_('migrating %s in store; %s tracked data\n') % (
726 725 (util.bytecount(srcsize), util.bytecount(srcrawsize))))
727 726
728 727 # Used to keep track of progress.
729 728 progress = []
730 729 def oncopiedrevision(rl, rev, node):
731 730 progress[1] += 1
732 731 srcrepo.ui.progress(progress[0], progress[1], total=progress[2])
733 732
734 733 # Do the actual copying.
735 734 # FUTURE this operation can be farmed off to worker processes.
736 735 seen = set()
737 736 for unencoded, encoded, size in srcrepo.store.walk():
738 737 if unencoded.endswith('.d'):
739 738 continue
740 739
741 740 oldrl = _revlogfrompath(srcrepo, unencoded)
742 741 newrl = _revlogfrompath(dstrepo, unencoded)
743 742
744 743 if isinstance(oldrl, changelog.changelog) and 'c' not in seen:
745 744 ui.write(_('finished migrating %d manifest revisions across %d '
746 745 'manifests; change in size: %s\n') %
747 746 (mrevcount, mcount, util.bytecount(mdstsize - msrcsize)))
748 747
749 748 ui.write(_('migrating changelog containing %d revisions '
750 749 '(%s in store; %s tracked data)\n') %
751 750 (crevcount, util.bytecount(csrcsize),
752 751 util.bytecount(crawsize)))
753 752 seen.add('c')
754 753 progress[:] = [_('changelog revisions'), 0, crevcount]
755 754 elif isinstance(oldrl, manifest.manifestrevlog) and 'm' not in seen:
756 755 ui.write(_('finished migrating %d filelog revisions across %d '
757 756 'filelogs; change in size: %s\n') %
758 757 (frevcount, fcount, util.bytecount(fdstsize - fsrcsize)))
759 758
760 759 ui.write(_('migrating %d manifests containing %d revisions '
761 760 '(%s in store; %s tracked data)\n') %
762 761 (mcount, mrevcount, util.bytecount(msrcsize),
763 762 util.bytecount(mrawsize)))
764 763 seen.add('m')
765 764 progress[:] = [_('manifest revisions'), 0, mrevcount]
766 765 elif 'f' not in seen:
767 766 ui.write(_('migrating %d filelogs containing %d revisions '
768 767 '(%s in store; %s tracked data)\n') %
769 768 (fcount, frevcount, util.bytecount(fsrcsize),
770 769 util.bytecount(frawsize)))
771 770 seen.add('f')
772 771 progress[:] = [_('file revisions'), 0, frevcount]
773 772
774 773 ui.progress(progress[0], progress[1], total=progress[2])
775 774
776 775 ui.note(_('cloning %d revisions from %s\n') % (len(oldrl), unencoded))
777 776 oldrl.clone(tr, newrl, addrevisioncb=oncopiedrevision,
778 777 deltareuse=deltareuse,
779 778 aggressivemergedeltas=aggressivemergedeltas)
780 779
781 780 datasize = 0
782 781 idx = newrl.index
783 782 for rev in newrl:
784 783 datasize += idx[rev][1]
785 784
786 785 dstsize += datasize
787 786
788 787 if isinstance(newrl, changelog.changelog):
789 788 cdstsize += datasize
790 789 elif isinstance(newrl, manifest.manifestrevlog):
791 790 mdstsize += datasize
792 791 else:
793 792 fdstsize += datasize
794 793
795 794 ui.progress(progress[0], None)
796 795
797 796 ui.write(_('finished migrating %d changelog revisions; change in size: '
798 797 '%s\n') % (crevcount, util.bytecount(cdstsize - csrcsize)))
799 798
800 799 ui.write(_('finished migrating %d total revisions; total change in store '
801 800 'size: %s\n') % (revcount, util.bytecount(dstsize - srcsize)))
802 801
803 802 def _upgradefilterstorefile(srcrepo, dstrepo, requirements, path, mode, st):
804 803 """Determine whether to copy a store file during upgrade.
805 804
806 805 This function is called when migrating store files from ``srcrepo`` to
807 806 ``dstrepo`` as part of upgrading a repository.
808 807
809 808 Args:
810 809 srcrepo: repo we are copying from
811 810 dstrepo: repo we are copying to
812 811 requirements: set of requirements for ``dstrepo``
813 812 path: store file being examined
814 813 mode: the ``ST_MODE`` file type of ``path``
815 814 st: ``stat`` data structure for ``path``
816 815
817 816 Function should return ``True`` if the file is to be copied.
818 817 """
819 818 # Skip revlogs.
820 819 if path.endswith(('.i', '.d')):
821 820 return False
822 821 # Skip transaction related files.
823 822 if path.startswith('undo'):
824 823 return False
825 824 # Only copy regular files.
826 825 if mode != stat.S_IFREG:
827 826 return False
828 827 # Skip other skipped files.
829 828 if path in ('lock', 'fncache'):
830 829 return False
831 830
832 831 return True
833 832
834 833 def _upgradefinishdatamigration(ui, srcrepo, dstrepo, requirements):
835 834 """Hook point for extensions to perform additional actions during upgrade.
836 835
837 836 This function is called after revlogs and store files have been copied but
838 837 before the new store is swapped into the original location.
839 838 """
840 839
841 840 def _upgraderepo(ui, srcrepo, dstrepo, requirements, actions):
842 841 """Do the low-level work of upgrading a repository.
843 842
844 843 The upgrade is effectively performed as a copy between a source
845 844 repository and a temporary destination repository.
846 845
847 846 The source repository is unmodified for as long as possible so the
848 847 upgrade can abort at any time without causing loss of service for
849 848 readers and without corrupting the source repository.
850 849 """
851 850 assert srcrepo.currentwlock()
852 851 assert dstrepo.currentwlock()
853 852
854 853 ui.write(_('(it is safe to interrupt this process any time before '
855 854 'data migration completes)\n'))
856 855
857 856 if 'redeltaall' in actions:
858 857 deltareuse = revlog.revlog.DELTAREUSENEVER
859 858 elif 'redeltaparent' in actions:
860 859 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
861 860 elif 'redeltamultibase' in actions:
862 861 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
863 862 else:
864 863 deltareuse = revlog.revlog.DELTAREUSEALWAYS
865 864
866 865 with dstrepo.transaction('upgrade') as tr:
867 866 _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse,
868 867 'redeltamultibase' in actions)
869 868
870 869 # Now copy other files in the store directory.
871 870 for p, kind, st in srcrepo.store.vfs.readdir('', stat=True):
872 871 if not _upgradefilterstorefile(srcrepo, dstrepo, requirements,
873 872 p, kind, st):
874 873 continue
875 874
876 875 srcrepo.ui.write(_('copying %s\n') % p)
877 876 src = srcrepo.store.vfs.join(p)
878 877 dst = dstrepo.store.vfs.join(p)
879 878 util.copyfile(src, dst, copystat=True)
880 879
881 880 _upgradefinishdatamigration(ui, srcrepo, dstrepo, requirements)
882 881
883 882 ui.write(_('data fully migrated to temporary repository\n'))
884 883
885 884 backuppath = tempfile.mkdtemp(prefix='upgradebackup.', dir=srcrepo.path)
886 885 backupvfs = scmutil.vfs(backuppath)
887 886
888 887 # Make a backup of requires file first, as it is the first to be modified.
889 888 util.copyfile(srcrepo.join('requires'), backupvfs.join('requires'))
890 889
891 890 # We install an arbitrary requirement that clients must not support
892 891 # as a mechanism to lock out new clients during the data swap. This is
893 892 # better than allowing a client to continue while the repository is in
894 893 # an inconsistent state.
895 894 ui.write(_('marking source repository as being upgraded; clients will be '
896 895 'unable to read from repository\n'))
897 896 scmutil.writerequires(srcrepo.vfs,
898 897 srcrepo.requirements | set(['upgradeinprogress']))
899 898
900 899 ui.write(_('starting in-place swap of repository data\n'))
901 900 ui.write(_('replaced files will be backed up at %s\n') %
902 901 backuppath)
903 902
904 903 # Now swap in the new store directory. Doing it as a rename should make
905 904 # the operation nearly instantaneous and atomic (at least in well-behaved
906 905 # environments).
907 906 ui.write(_('replacing store...\n'))
908 tstart = time.time()
907 tstart = util.timer()
909 908 util.rename(srcrepo.spath, backupvfs.join('store'))
910 909 util.rename(dstrepo.spath, srcrepo.spath)
911 elapsed = time.time() - tstart
910 elapsed = util.timer() - tstart
912 911 ui.write(_('store replacement complete; repository was inconsistent for '
913 912 '%0.1fs\n') % elapsed)
914 913
915 914 # We first write the requirements file. Any new requirements will lock
916 915 # out legacy clients.
917 916 ui.write(_('finalizing requirements file and making repository readable '
918 917 'again\n'))
919 918 scmutil.writerequires(srcrepo.vfs, requirements)
920 919
921 920 # The lock file from the old store won't be removed because nothing has a
922 921 # reference to its new location. So clean it up manually. Alternatively, we
923 922 # could update srcrepo.svfs and other variables to point to the new
924 923 # location. This is simpler.
925 924 backupvfs.unlink('store/lock')
926 925
927 926 return backuppath
928 927
929 928 def upgraderepo(ui, repo, run=False, optimize=None):
930 929 """Upgrade a repository in place."""
931 930 # Avoid cycle: cmdutil -> repair -> localrepo -> cmdutil
932 931 from . import localrepo
933 932
934 933 optimize = set(optimize or [])
935 934 repo = repo.unfiltered()
936 935
937 936 # Ensure the repository can be upgraded.
938 937 missingreqs = upgraderequiredsourcerequirements(repo) - repo.requirements
939 938 if missingreqs:
940 939 raise error.Abort(_('cannot upgrade repository; requirement '
941 940 'missing: %s') % _(', ').join(sorted(missingreqs)))
942 941
943 942 blockedreqs = upgradeblocksourcerequirements(repo) & repo.requirements
944 943 if blockedreqs:
945 944 raise error.Abort(_('cannot upgrade repository; unsupported source '
946 945 'requirement: %s') %
947 946 _(', ').join(sorted(blockedreqs)))
948 947
949 948 # FUTURE there is potentially a need to control the wanted requirements via
950 949 # command arguments or via an extension hook point.
951 950 newreqs = localrepo.newreporequirements(repo)
952 951
953 952 noremovereqs = (repo.requirements - newreqs -
954 953 upgradesupportremovedrequirements(repo))
955 954 if noremovereqs:
956 955 raise error.Abort(_('cannot upgrade repository; requirement would be '
957 956 'removed: %s') % _(', ').join(sorted(noremovereqs)))
958 957
959 958 noaddreqs = (newreqs - repo.requirements -
960 959 upgradeallowednewrequirements(repo))
961 960 if noaddreqs:
962 961 raise error.Abort(_('cannot upgrade repository; do not support adding '
963 962 'requirement: %s') %
964 963 _(', ').join(sorted(noaddreqs)))
965 964
966 965 unsupportedreqs = newreqs - upgradesupporteddestrequirements(repo)
967 966 if unsupportedreqs:
968 967 raise error.Abort(_('cannot upgrade repository; do not support '
969 968 'destination requirement: %s') %
970 969 _(', ').join(sorted(unsupportedreqs)))
971 970
972 971 # Find and validate all improvements that can be made.
973 972 improvements = upgradefindimprovements(repo)
974 973 for i in improvements:
975 974 if i.type not in (deficiency, optimisation):
976 975 raise error.Abort(_('unexpected improvement type %s for %s') % (
977 976 i.type, i.name))
978 977
979 978 # Validate arguments.
980 979 unknownoptimize = optimize - set(i.name for i in improvements
981 980 if i.type == optimisation)
982 981 if unknownoptimize:
983 982 raise error.Abort(_('unknown optimization action requested: %s') %
984 983 ', '.join(sorted(unknownoptimize)),
985 984 hint=_('run without arguments to see valid '
986 985 'optimizations'))
987 986
988 987 actions = upgradedetermineactions(repo, improvements, repo.requirements,
989 988 newreqs, optimize)
990 989
991 990 def printrequirements():
992 991 ui.write(_('requirements\n'))
993 992 ui.write(_(' preserved: %s\n') %
994 993 _(', ').join(sorted(newreqs & repo.requirements)))
995 994
996 995 if repo.requirements - newreqs:
997 996 ui.write(_(' removed: %s\n') %
998 997 _(', ').join(sorted(repo.requirements - newreqs)))
999 998
1000 999 if newreqs - repo.requirements:
1001 1000 ui.write(_(' added: %s\n') %
1002 1001 _(', ').join(sorted(newreqs - repo.requirements)))
1003 1002
1004 1003 ui.write('\n')
1005 1004
1006 1005 def printupgradeactions():
1007 1006 for action in actions:
1008 1007 for i in improvements:
1009 1008 if i.name == action:
1010 1009 ui.write('%s\n %s\n\n' %
1011 1010 (i.name, i.upgrademessage))
1012 1011
1013 1012 if not run:
1014 1013 fromdefault = []
1015 1014 fromconfig = []
1016 1015 optimizations = []
1017 1016
1018 1017 for i in improvements:
1019 1018 assert i.type in (deficiency, optimisation)
1020 1019 if i.type == deficiency:
1021 1020 if i.fromdefault:
1022 1021 fromdefault.append(i)
1023 1022 if i.fromconfig:
1024 1023 fromconfig.append(i)
1025 1024 else:
1026 1025 optimizations.append(i)
1027 1026
1028 1027 if fromdefault or fromconfig:
1029 1028 fromconfignames = set(x.name for x in fromconfig)
1030 1029 onlydefault = [i for i in fromdefault
1031 1030 if i.name not in fromconfignames]
1032 1031
1033 1032 if fromconfig:
1034 1033 ui.write(_('repository lacks features recommended by '
1035 1034 'current config options:\n\n'))
1036 1035 for i in fromconfig:
1037 1036 ui.write('%s\n %s\n\n' % (i.name, i.description))
1038 1037
1039 1038 if onlydefault:
1040 1039 ui.write(_('repository lacks features used by the default '
1041 1040 'config options:\n\n'))
1042 1041 for i in onlydefault:
1043 1042 ui.write('%s\n %s\n\n' % (i.name, i.description))
1044 1043
1045 1044 ui.write('\n')
1046 1045 else:
1047 1046 ui.write(_('(no feature deficiencies found in existing '
1048 1047 'repository)\n'))
1049 1048
1050 1049 ui.write(_('performing an upgrade with "--run" will make the following '
1051 1050 'changes:\n\n'))
1052 1051
1053 1052 printrequirements()
1054 1053 printupgradeactions()
1055 1054
1056 1055 unusedoptimize = [i for i in improvements
1057 1056 if i.name not in actions and i.type == optimisation]
1058 1057 if unusedoptimize:
1059 1058 ui.write(_('additional optimizations are available by specifying '
1060 1059 '"--optimize <name>":\n\n'))
1061 1060 for i in unusedoptimize:
1062 1061 ui.write(_('%s\n %s\n\n') % (i.name, i.description))
1063 1062 return
1064 1063
1065 1064 # Else we're in the run=true case.
1066 1065 ui.write(_('upgrade will perform the following actions:\n\n'))
1067 1066 printrequirements()
1068 1067 printupgradeactions()
1069 1068
1070 1069 ui.write(_('beginning upgrade...\n'))
1071 1070 with repo.wlock():
1072 1071 with repo.lock():
1073 1072 ui.write(_('repository locked and read-only\n'))
1074 1073 # Our strategy for upgrading the repository is to create a new,
1075 1074 # temporary repository, write data to it, then do a swap of the
1076 1075 # data. There are less heavyweight ways to do this, but it is easier
1077 1076 # to create a new repo object than to instantiate all the components
1078 1077 # (like the store) separately.
1079 1078 tmppath = tempfile.mkdtemp(prefix='upgrade.', dir=repo.path)
1080 1079 backuppath = None
1081 1080 try:
1082 1081 ui.write(_('creating temporary repository to stage migrated '
1083 1082 'data: %s\n') % tmppath)
1084 1083 dstrepo = localrepo.localrepository(repo.baseui,
1085 1084 path=tmppath,
1086 1085 create=True)
1087 1086
1088 1087 with dstrepo.wlock():
1089 1088 with dstrepo.lock():
1090 1089 backuppath = _upgraderepo(ui, repo, dstrepo, newreqs,
1091 1090 actions)
1092 1091
1093 1092 finally:
1094 1093 ui.write(_('removing temporary repository %s\n') % tmppath)
1095 1094 repo.vfs.rmtree(tmppath, forcibly=True)
1096 1095
1097 1096 if backuppath:
1098 1097 ui.warn(_('copy of old repository backed up at %s\n') %
1099 1098 backuppath)
1100 1099 ui.warn(_('the old repository will not be deleted; remove '
1101 1100 'it to free up disk space once the upgraded '
1102 1101 'repository is verified\n'))
@@ -1,398 +1,397 b''
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 import time
12 11
13 12 from .i18n import _
14 13 from . import (
15 14 branchmap,
16 15 error,
17 16 store,
18 17 util,
19 18 )
20 19
21 20 def canperformstreamclone(pullop, bailifbundle2supported=False):
22 21 """Whether it is possible to perform a streaming clone as part of pull.
23 22
24 23 ``bailifbundle2supported`` will cause the function to return False if
25 24 bundle2 stream clones are supported. It should only be called by the
26 25 legacy stream clone code path.
27 26
28 27 Returns a tuple of (supported, requirements). ``supported`` is True if
29 28 streaming clone is supported and False otherwise. ``requirements`` is
30 29 a set of repo requirements from the remote, or ``None`` if stream clone
31 30 isn't supported.
32 31 """
33 32 repo = pullop.repo
34 33 remote = pullop.remote
35 34
36 35 bundle2supported = False
37 36 if pullop.canusebundle2:
38 37 if 'v1' in pullop.remotebundle2caps.get('stream', []):
39 38 bundle2supported = True
40 39 # else
41 40 # Server doesn't support bundle2 stream clone or doesn't support
42 41 # the versions we support. Fall back and possibly allow legacy.
43 42
44 43 # Ensures legacy code path uses available bundle2.
45 44 if bailifbundle2supported and bundle2supported:
46 45 return False, None
47 46 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
48 47 #elif not bailifbundle2supported and not bundle2supported:
49 48 # return False, None
50 49
51 50 # Streaming clone only works on empty repositories.
52 51 if len(repo):
53 52 return False, None
54 53
55 54 # Streaming clone only works if all data is being requested.
56 55 if pullop.heads:
57 56 return False, None
58 57
59 58 streamrequested = pullop.streamclonerequested
60 59
61 60 # If we don't have a preference, let the server decide for us. This
62 61 # likely only comes into play in LANs.
63 62 if streamrequested is None:
64 63 # The server can advertise whether to prefer streaming clone.
65 64 streamrequested = remote.capable('stream-preferred')
66 65
67 66 if not streamrequested:
68 67 return False, None
69 68
70 69 # In order for stream clone to work, the client has to support all the
71 70 # requirements advertised by the server.
72 71 #
73 72 # The server advertises its requirements via the "stream" and "streamreqs"
74 73 # capability. "stream" (a value-less capability) is advertised if and only
75 74 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
76 75 # is advertised and contains a comma-delimited list of requirements.
77 76 requirements = set()
78 77 if remote.capable('stream'):
79 78 requirements.add('revlogv1')
80 79 else:
81 80 streamreqs = remote.capable('streamreqs')
82 81 # This is weird and shouldn't happen with modern servers.
83 82 if not streamreqs:
84 83 return False, None
85 84
86 85 streamreqs = set(streamreqs.split(','))
87 86 # Server requires something we don't support. Bail.
88 87 if streamreqs - repo.supportedformats:
89 88 return False, None
90 89 requirements = streamreqs
91 90
92 91 return True, requirements
93 92
94 93 def maybeperformlegacystreamclone(pullop):
95 94 """Possibly perform a legacy stream clone operation.
96 95
97 96 Legacy stream clones are performed as part of pull but before all other
98 97 operations.
99 98
100 99 A legacy stream clone will not be performed if a bundle2 stream clone is
101 100 supported.
102 101 """
103 102 supported, requirements = canperformstreamclone(pullop)
104 103
105 104 if not supported:
106 105 return
107 106
108 107 repo = pullop.repo
109 108 remote = pullop.remote
110 109
111 110 # Save remote branchmap. We will use it later to speed up branchcache
112 111 # creation.
113 112 rbranchmap = None
114 113 if remote.capable('branchmap'):
115 114 rbranchmap = remote.branchmap()
116 115
117 116 repo.ui.status(_('streaming all changes\n'))
118 117
119 118 fp = remote.stream_out()
120 119 l = fp.readline()
121 120 try:
122 121 resp = int(l)
123 122 except ValueError:
124 123 raise error.ResponseError(
125 124 _('unexpected response from remote server:'), l)
126 125 if resp == 1:
127 126 raise error.Abort(_('operation forbidden by server'))
128 127 elif resp == 2:
129 128 raise error.Abort(_('locking the remote repository failed'))
130 129 elif resp != 0:
131 130 raise error.Abort(_('the server sent an unknown error code'))
132 131
133 132 l = fp.readline()
134 133 try:
135 134 filecount, bytecount = map(int, l.split(' ', 1))
136 135 except (ValueError, TypeError):
137 136 raise error.ResponseError(
138 137 _('unexpected response from remote server:'), l)
139 138
140 139 with repo.lock():
141 140 consumev1(repo, fp, filecount, bytecount)
142 141
143 142 # new requirements = old non-format requirements +
144 143 # new format-related remote requirements
145 144 # requirements from the streamed-in repository
146 145 repo.requirements = requirements | (
147 146 repo.requirements - repo.supportedformats)
148 147 repo._applyopenerreqs()
149 148 repo._writerequirements()
150 149
151 150 if rbranchmap:
152 151 branchmap.replacecache(repo, rbranchmap)
153 152
154 153 repo.invalidate()
155 154
156 155 def allowservergeneration(ui):
157 156 """Whether streaming clones are allowed from the server."""
158 157 return ui.configbool('server', 'uncompressed', True, untrusted=True)
159 158
160 159 # This is it's own function so extensions can override it.
161 160 def _walkstreamfiles(repo):
162 161 return repo.store.walk()
163 162
164 163 def generatev1(repo):
165 164 """Emit content for version 1 of a streaming clone.
166 165
167 166 This returns a 3-tuple of (file count, byte size, data iterator).
168 167
169 168 The data iterator consists of N entries for each file being transferred.
170 169 Each file entry starts as a line with the file name and integer size
171 170 delimited by a null byte.
172 171
173 172 The raw file data follows. Following the raw file data is the next file
174 173 entry, or EOF.
175 174
176 175 When used on the wire protocol, an additional line indicating protocol
177 176 success will be prepended to the stream. This function is not responsible
178 177 for adding it.
179 178
180 179 This function will obtain a repository lock to ensure a consistent view of
181 180 the store is captured. It therefore may raise LockError.
182 181 """
183 182 entries = []
184 183 total_bytes = 0
185 184 # Get consistent snapshot of repo, lock during scan.
186 185 with repo.lock():
187 186 repo.ui.debug('scanning\n')
188 187 for name, ename, size in _walkstreamfiles(repo):
189 188 if size:
190 189 entries.append((name, size))
191 190 total_bytes += size
192 191
193 192 repo.ui.debug('%d files, %d bytes to transfer\n' %
194 193 (len(entries), total_bytes))
195 194
196 195 svfs = repo.svfs
197 196 oldaudit = svfs.mustaudit
198 197 debugflag = repo.ui.debugflag
199 198 svfs.mustaudit = False
200 199
201 200 def emitrevlogdata():
202 201 try:
203 202 for name, size in entries:
204 203 if debugflag:
205 204 repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
206 205 # partially encode name over the wire for backwards compat
207 206 yield '%s\0%d\n' % (store.encodedir(name), size)
208 207 if size <= 65536:
209 208 with svfs(name, 'rb') as fp:
210 209 yield fp.read(size)
211 210 else:
212 211 for chunk in util.filechunkiter(svfs(name), limit=size):
213 212 yield chunk
214 213 finally:
215 214 svfs.mustaudit = oldaudit
216 215
217 216 return len(entries), total_bytes, emitrevlogdata()
218 217
219 218 def generatev1wireproto(repo):
220 219 """Emit content for version 1 of streaming clone suitable for the wire.
221 220
222 221 This is the data output from ``generatev1()`` with a header line
223 222 indicating file count and byte size.
224 223 """
225 224 filecount, bytecount, it = generatev1(repo)
226 225 yield '%d %d\n' % (filecount, bytecount)
227 226 for chunk in it:
228 227 yield chunk
229 228
230 229 def generatebundlev1(repo, compression='UN'):
231 230 """Emit content for version 1 of a stream clone bundle.
232 231
233 232 The first 4 bytes of the output ("HGS1") denote this as stream clone
234 233 bundle version 1.
235 234
236 235 The next 2 bytes indicate the compression type. Only "UN" is currently
237 236 supported.
238 237
239 238 The next 16 bytes are two 64-bit big endian unsigned integers indicating
240 239 file count and byte count, respectively.
241 240
242 241 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
243 242 of the requirements string, including a trailing \0. The following N bytes
244 243 are the requirements string, which is ASCII containing a comma-delimited
245 244 list of repo requirements that are needed to support the data.
246 245
247 246 The remaining content is the output of ``generatev1()`` (which may be
248 247 compressed in the future).
249 248
250 249 Returns a tuple of (requirements, data generator).
251 250 """
252 251 if compression != 'UN':
253 252 raise ValueError('we do not support the compression argument yet')
254 253
255 254 requirements = repo.requirements & repo.supportedformats
256 255 requires = ','.join(sorted(requirements))
257 256
258 257 def gen():
259 258 yield 'HGS1'
260 259 yield compression
261 260
262 261 filecount, bytecount, it = generatev1(repo)
263 262 repo.ui.status(_('writing %d bytes for %d files\n') %
264 263 (bytecount, filecount))
265 264
266 265 yield struct.pack('>QQ', filecount, bytecount)
267 266 yield struct.pack('>H', len(requires) + 1)
268 267 yield requires + '\0'
269 268
270 269 # This is where we'll add compression in the future.
271 270 assert compression == 'UN'
272 271
273 272 seen = 0
274 273 repo.ui.progress(_('bundle'), 0, total=bytecount, unit=_('bytes'))
275 274
276 275 for chunk in it:
277 276 seen += len(chunk)
278 277 repo.ui.progress(_('bundle'), seen, total=bytecount,
279 278 unit=_('bytes'))
280 279 yield chunk
281 280
282 281 repo.ui.progress(_('bundle'), None)
283 282
284 283 return requirements, gen()
285 284
286 285 def consumev1(repo, fp, filecount, bytecount):
287 286 """Apply the contents from version 1 of a streaming clone file handle.
288 287
289 288 This takes the output from "stream_out" and applies it to the specified
290 289 repository.
291 290
292 291 Like "stream_out," the status line added by the wire protocol is not
293 292 handled by this function.
294 293 """
295 294 with repo.lock():
296 295 repo.ui.status(_('%d files to transfer, %s of data\n') %
297 296 (filecount, util.bytecount(bytecount)))
298 297 handled_bytes = 0
299 298 repo.ui.progress(_('clone'), 0, total=bytecount, unit=_('bytes'))
300 start = time.time()
299 start = util.timer()
301 300
302 301 # TODO: get rid of (potential) inconsistency
303 302 #
304 303 # If transaction is started and any @filecache property is
305 304 # changed at this point, it causes inconsistency between
306 305 # in-memory cached property and streamclone-ed file on the
307 306 # disk. Nested transaction prevents transaction scope "clone"
308 307 # below from writing in-memory changes out at the end of it,
309 308 # even though in-memory changes are discarded at the end of it
310 309 # regardless of transaction nesting.
311 310 #
312 311 # But transaction nesting can't be simply prohibited, because
313 312 # nesting occurs also in ordinary case (e.g. enabling
314 313 # clonebundles).
315 314
316 315 with repo.transaction('clone'):
317 316 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
318 317 for i in xrange(filecount):
319 318 # XXX doesn't support '\n' or '\r' in filenames
320 319 l = fp.readline()
321 320 try:
322 321 name, size = l.split('\0', 1)
323 322 size = int(size)
324 323 except (ValueError, TypeError):
325 324 raise error.ResponseError(
326 325 _('unexpected response from remote server:'), l)
327 326 if repo.ui.debugflag:
328 327 repo.ui.debug('adding %s (%s)\n' %
329 328 (name, util.bytecount(size)))
330 329 # for backwards compat, name was partially encoded
331 330 path = store.decodedir(name)
332 331 with repo.svfs(path, 'w', backgroundclose=True) as ofp:
333 332 for chunk in util.filechunkiter(fp, limit=size):
334 333 handled_bytes += len(chunk)
335 334 repo.ui.progress(_('clone'), handled_bytes,
336 335 total=bytecount, unit=_('bytes'))
337 336 ofp.write(chunk)
338 337
339 338 # force @filecache properties to be reloaded from
340 339 # streamclone-ed file at next access
341 340 repo.invalidate(clearfilecache=True)
342 341
343 elapsed = time.time() - start
342 elapsed = util.timer() - start
344 343 if elapsed <= 0:
345 344 elapsed = 0.001
346 345 repo.ui.progress(_('clone'), None)
347 346 repo.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
348 347 (util.bytecount(bytecount), elapsed,
349 348 util.bytecount(bytecount / elapsed)))
350 349
351 350 def readbundle1header(fp):
352 351 compression = fp.read(2)
353 352 if compression != 'UN':
354 353 raise error.Abort(_('only uncompressed stream clone bundles are '
355 354 'supported; got %s') % compression)
356 355
357 356 filecount, bytecount = struct.unpack('>QQ', fp.read(16))
358 357 requireslen = struct.unpack('>H', fp.read(2))[0]
359 358 requires = fp.read(requireslen)
360 359
361 360 if not requires.endswith('\0'):
362 361 raise error.Abort(_('malformed stream clone bundle: '
363 362 'requirements not properly encoded'))
364 363
365 364 requirements = set(requires.rstrip('\0').split(','))
366 365
367 366 return filecount, bytecount, requirements
368 367
369 368 def applybundlev1(repo, fp):
370 369 """Apply the content from a stream clone bundle version 1.
371 370
372 371 We assume the 4 byte header has been read and validated and the file handle
373 372 is at the 2 byte compression identifier.
374 373 """
375 374 if len(repo):
376 375 raise error.Abort(_('cannot apply stream clone bundle on non-empty '
377 376 'repo'))
378 377
379 378 filecount, bytecount, requirements = readbundle1header(fp)
380 379 missingreqs = requirements - repo.supportedformats
381 380 if missingreqs:
382 381 raise error.Abort(_('unable to apply stream clone: '
383 382 'unsupported format: %s') %
384 383 ', '.join(sorted(missingreqs)))
385 384
386 385 consumev1(repo, fp, filecount, bytecount)
387 386
388 387 class streamcloneapplier(object):
389 388 """Class to manage applying streaming clone bundles.
390 389
391 390 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
392 391 readers to perform bundle type-specific functionality.
393 392 """
394 393 def __init__(self, fh):
395 394 self._fh = fh
396 395
397 396 def apply(self, repo):
398 397 return applybundlev1(repo, self._fh)
@@ -1,571 +1,570 b''
1 1 # tags.py - read tag info from local repository
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2009 Greg Ward <greg@gerg.ca>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 # Currently this module only deals with reading and caching tags.
10 10 # Eventually, it could take care of updating (adding/removing/moving)
11 11 # tags too.
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import array
16 16 import errno
17 import time
18 17
19 18 from .node import (
20 19 bin,
21 20 hex,
22 21 nullid,
23 22 short,
24 23 )
25 24 from . import (
26 25 encoding,
27 26 error,
28 27 util,
29 28 )
30 29
31 30 array = array.array
32 31
33 32 # Tags computation can be expensive and caches exist to make it fast in
34 33 # the common case.
35 34 #
36 35 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
37 36 # each revision in the repository. The file is effectively an array of
38 37 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
39 38 # details.
40 39 #
41 40 # The .hgtags filenode cache grows in proportion to the length of the
42 41 # changelog. The file is truncated when the # changelog is stripped.
43 42 #
44 43 # The purpose of the filenode cache is to avoid the most expensive part
45 44 # of finding global tags, which is looking up the .hgtags filenode in the
46 45 # manifest for each head. This can take dozens or over 100ms for
47 46 # repositories with very large manifests. Multiplied by dozens or even
48 47 # hundreds of heads and there is a significant performance concern.
49 48 #
50 49 # There also exist a separate cache file for each repository filter.
51 50 # These "tags-*" files store information about the history of tags.
52 51 #
53 52 # The tags cache files consists of a cache validation line followed by
54 53 # a history of tags.
55 54 #
56 55 # The cache validation line has the format:
57 56 #
58 57 # <tiprev> <tipnode> [<filteredhash>]
59 58 #
60 59 # <tiprev> is an integer revision and <tipnode> is a 40 character hex
61 60 # node for that changeset. These redundantly identify the repository
62 61 # tip from the time the cache was written. In addition, <filteredhash>,
63 62 # if present, is a 40 character hex hash of the contents of the filtered
64 63 # revisions for this filter. If the set of filtered revs changes, the
65 64 # hash will change and invalidate the cache.
66 65 #
67 66 # The history part of the tags cache consists of lines of the form:
68 67 #
69 68 # <node> <tag>
70 69 #
71 70 # (This format is identical to that of .hgtags files.)
72 71 #
73 72 # <tag> is the tag name and <node> is the 40 character hex changeset
74 73 # the tag is associated with.
75 74 #
76 75 # Tags are written sorted by tag name.
77 76 #
78 77 # Tags associated with multiple changesets have an entry for each changeset.
79 78 # The most recent changeset (in terms of revlog ordering for the head
80 79 # setting it) for each tag is last.
81 80
82 81 def findglobaltags(ui, repo, alltags, tagtypes):
83 82 '''Find global tags in a repo.
84 83
85 84 "alltags" maps tag name to (node, hist) 2-tuples.
86 85
87 86 "tagtypes" maps tag name to tag type. Global tags always have the
88 87 "global" tag type.
89 88
90 89 The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
91 90 should be passed in.
92 91
93 92 The tags cache is read and updated as a side-effect of calling.
94 93 '''
95 94 # This is so we can be lazy and assume alltags contains only global
96 95 # tags when we pass it to _writetagcache().
97 96 assert len(alltags) == len(tagtypes) == 0, \
98 97 "findglobaltags() should be called first"
99 98
100 99 (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
101 100 if cachetags is not None:
102 101 assert not shouldwrite
103 102 # XXX is this really 100% correct? are there oddball special
104 103 # cases where a global tag should outrank a local tag but won't,
105 104 # because cachetags does not contain rank info?
106 105 _updatetags(cachetags, 'global', alltags, tagtypes)
107 106 return
108 107
109 108 seen = set() # set of fnode
110 109 fctx = None
111 110 for head in reversed(heads): # oldest to newest
112 111 assert head in repo.changelog.nodemap, \
113 112 "tag cache returned bogus head %s" % short(head)
114 113
115 114 fnode = tagfnode.get(head)
116 115 if fnode and fnode not in seen:
117 116 seen.add(fnode)
118 117 if not fctx:
119 118 fctx = repo.filectx('.hgtags', fileid=fnode)
120 119 else:
121 120 fctx = fctx.filectx(fnode)
122 121
123 122 filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
124 123 _updatetags(filetags, 'global', alltags, tagtypes)
125 124
126 125 # and update the cache (if necessary)
127 126 if shouldwrite:
128 127 _writetagcache(ui, repo, valid, alltags)
129 128
130 129 def readlocaltags(ui, repo, alltags, tagtypes):
131 130 '''Read local tags in repo. Update alltags and tagtypes.'''
132 131 try:
133 132 data = repo.vfs.read("localtags")
134 133 except IOError as inst:
135 134 if inst.errno != errno.ENOENT:
136 135 raise
137 136 return
138 137
139 138 # localtags is in the local encoding; re-encode to UTF-8 on
140 139 # input for consistency with the rest of this module.
141 140 filetags = _readtags(
142 141 ui, repo, data.splitlines(), "localtags",
143 142 recode=encoding.fromlocal)
144 143
145 144 # remove tags pointing to invalid nodes
146 145 cl = repo.changelog
147 146 for t in filetags.keys():
148 147 try:
149 148 cl.rev(filetags[t][0])
150 149 except (LookupError, ValueError):
151 150 del filetags[t]
152 151
153 152 _updatetags(filetags, "local", alltags, tagtypes)
154 153
155 154 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
156 155 '''Read tag definitions from a file (or any source of lines).
157 156
158 157 This function returns two sortdicts with similar information:
159 158
160 159 - the first dict, bintaghist, contains the tag information as expected by
161 160 the _readtags function, i.e. a mapping from tag name to (node, hist):
162 161 - node is the node id from the last line read for that name,
163 162 - hist is the list of node ids previously associated with it (in file
164 163 order). All node ids are binary, not hex.
165 164
166 165 - the second dict, hextaglines, is a mapping from tag name to a list of
167 166 [hexnode, line number] pairs, ordered from the oldest to the newest node.
168 167
169 168 When calcnodelines is False the hextaglines dict is not calculated (an
170 169 empty dict is returned). This is done to improve this function's
171 170 performance in cases where the line numbers are not needed.
172 171 '''
173 172
174 173 bintaghist = util.sortdict()
175 174 hextaglines = util.sortdict()
176 175 count = 0
177 176
178 177 def dbg(msg):
179 178 ui.debug("%s, line %s: %s\n" % (fn, count, msg))
180 179
181 180 for nline, line in enumerate(lines):
182 181 count += 1
183 182 if not line:
184 183 continue
185 184 try:
186 185 (nodehex, name) = line.split(" ", 1)
187 186 except ValueError:
188 187 dbg("cannot parse entry")
189 188 continue
190 189 name = name.strip()
191 190 if recode:
192 191 name = recode(name)
193 192 try:
194 193 nodebin = bin(nodehex)
195 194 except TypeError:
196 195 dbg("node '%s' is not well formed" % nodehex)
197 196 continue
198 197
199 198 # update filetags
200 199 if calcnodelines:
201 200 # map tag name to a list of line numbers
202 201 if name not in hextaglines:
203 202 hextaglines[name] = []
204 203 hextaglines[name].append([nodehex, nline])
205 204 continue
206 205 # map tag name to (node, hist)
207 206 if name not in bintaghist:
208 207 bintaghist[name] = []
209 208 bintaghist[name].append(nodebin)
210 209 return bintaghist, hextaglines
211 210
212 211 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
213 212 '''Read tag definitions from a file (or any source of lines).
214 213
215 214 Returns a mapping from tag name to (node, hist).
216 215
217 216 "node" is the node id from the last line read for that name. "hist"
218 217 is the list of node ids previously associated with it (in file order).
219 218 All node ids are binary, not hex.
220 219 '''
221 220 filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
222 221 calcnodelines=calcnodelines)
223 222 # util.sortdict().__setitem__ is much slower at replacing then inserting
224 223 # new entries. The difference can matter if there are thousands of tags.
225 224 # Create a new sortdict to avoid the performance penalty.
226 225 newtags = util.sortdict()
227 226 for tag, taghist in filetags.items():
228 227 newtags[tag] = (taghist[-1], taghist[:-1])
229 228 return newtags
230 229
231 230 def _updatetags(filetags, tagtype, alltags, tagtypes):
232 231 '''Incorporate the tag info read from one file into the two
233 232 dictionaries, alltags and tagtypes, that contain all tag
234 233 info (global across all heads plus local).'''
235 234
236 235 for name, nodehist in filetags.iteritems():
237 236 if name not in alltags:
238 237 alltags[name] = nodehist
239 238 tagtypes[name] = tagtype
240 239 continue
241 240
242 241 # we prefer alltags[name] if:
243 242 # it supersedes us OR
244 243 # mutual supersedes and it has a higher rank
245 244 # otherwise we win because we're tip-most
246 245 anode, ahist = nodehist
247 246 bnode, bhist = alltags[name]
248 247 if (bnode != anode and anode in bhist and
249 248 (bnode not in ahist or len(bhist) > len(ahist))):
250 249 anode = bnode
251 250 else:
252 251 tagtypes[name] = tagtype
253 252 ahist.extend([n for n in bhist if n not in ahist])
254 253 alltags[name] = anode, ahist
255 254
256 255 def _filename(repo):
257 256 """name of a tagcache file for a given repo or repoview"""
258 257 filename = 'cache/tags2'
259 258 if repo.filtername:
260 259 filename = '%s-%s' % (filename, repo.filtername)
261 260 return filename
262 261
263 262 def _readtagcache(ui, repo):
264 263 '''Read the tag cache.
265 264
266 265 Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
267 266
268 267 If the cache is completely up-to-date, "cachetags" is a dict of the
269 268 form returned by _readtags() and "heads", "fnodes", and "validinfo" are
270 269 None and "shouldwrite" is False.
271 270
272 271 If the cache is not up to date, "cachetags" is None. "heads" is a list
273 272 of all heads currently in the repository, ordered from tip to oldest.
274 273 "validinfo" is a tuple describing cache validation info. This is used
275 274 when writing the tags cache. "fnodes" is a mapping from head to .hgtags
276 275 filenode. "shouldwrite" is True.
277 276
278 277 If the cache is not up to date, the caller is responsible for reading tag
279 278 info from each returned head. (See findglobaltags().)
280 279 '''
281 280 from . import scmutil # avoid cycle
282 281
283 282 try:
284 283 cachefile = repo.vfs(_filename(repo), 'r')
285 284 # force reading the file for static-http
286 285 cachelines = iter(cachefile)
287 286 except IOError:
288 287 cachefile = None
289 288
290 289 cacherev = None
291 290 cachenode = None
292 291 cachehash = None
293 292 if cachefile:
294 293 try:
295 294 validline = next(cachelines)
296 295 validline = validline.split()
297 296 cacherev = int(validline[0])
298 297 cachenode = bin(validline[1])
299 298 if len(validline) > 2:
300 299 cachehash = bin(validline[2])
301 300 except Exception:
302 301 # corruption of the cache, just recompute it.
303 302 pass
304 303
305 304 tipnode = repo.changelog.tip()
306 305 tiprev = len(repo.changelog) - 1
307 306
308 307 # Case 1 (common): tip is the same, so nothing has changed.
309 308 # (Unchanged tip trivially means no changesets have been added.
310 309 # But, thanks to localrepository.destroyed(), it also means none
311 310 # have been destroyed by strip or rollback.)
312 311 if (cacherev == tiprev
313 312 and cachenode == tipnode
314 313 and cachehash == scmutil.filteredhash(repo, tiprev)):
315 314 tags = _readtags(ui, repo, cachelines, cachefile.name)
316 315 cachefile.close()
317 316 return (None, None, None, tags, False)
318 317 if cachefile:
319 318 cachefile.close() # ignore rest of file
320 319
321 320 valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
322 321
323 322 repoheads = repo.heads()
324 323 # Case 2 (uncommon): empty repo; get out quickly and don't bother
325 324 # writing an empty cache.
326 325 if repoheads == [nullid]:
327 326 return ([], {}, valid, {}, False)
328 327
329 328 # Case 3 (uncommon): cache file missing or empty.
330 329
331 330 # Case 4 (uncommon): tip rev decreased. This should only happen
332 331 # when we're called from localrepository.destroyed(). Refresh the
333 332 # cache so future invocations will not see disappeared heads in the
334 333 # cache.
335 334
336 335 # Case 5 (common): tip has changed, so we've added/replaced heads.
337 336
338 337 # As it happens, the code to handle cases 3, 4, 5 is the same.
339 338
340 339 # N.B. in case 4 (nodes destroyed), "new head" really means "newly
341 340 # exposed".
342 341 if not len(repo.file('.hgtags')):
343 342 # No tags have ever been committed, so we can avoid a
344 343 # potentially expensive search.
345 344 return ([], {}, valid, None, True)
346 345
347 starttime = time.time()
346 starttime = util.timer()
348 347
349 348 # Now we have to lookup the .hgtags filenode for every new head.
350 349 # This is the most expensive part of finding tags, so performance
351 350 # depends primarily on the size of newheads. Worst case: no cache
352 351 # file, so newheads == repoheads.
353 352 fnodescache = hgtagsfnodescache(repo.unfiltered())
354 353 cachefnode = {}
355 354 for head in reversed(repoheads):
356 355 fnode = fnodescache.getfnode(head)
357 356 if fnode != nullid:
358 357 cachefnode[head] = fnode
359 358
360 359 fnodescache.write()
361 360
362 duration = time.time() - starttime
361 duration = util.timer() - starttime
363 362 ui.log('tagscache',
364 363 '%d/%d cache hits/lookups in %0.4f '
365 364 'seconds\n',
366 365 fnodescache.hitcount, fnodescache.lookupcount, duration)
367 366
368 367 # Caller has to iterate over all heads, but can use the filenodes in
369 368 # cachefnode to get to each .hgtags revision quickly.
370 369 return (repoheads, cachefnode, valid, None, True)
371 370
372 371 def _writetagcache(ui, repo, valid, cachetags):
373 372 filename = _filename(repo)
374 373 try:
375 374 cachefile = repo.vfs(filename, 'w', atomictemp=True)
376 375 except (OSError, IOError):
377 376 return
378 377
379 378 ui.log('tagscache', 'writing .hg/%s with %d tags\n',
380 379 filename, len(cachetags))
381 380
382 381 if valid[2]:
383 382 cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
384 383 else:
385 384 cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
386 385
387 386 # Tag names in the cache are in UTF-8 -- which is the whole reason
388 387 # we keep them in UTF-8 throughout this module. If we converted
389 388 # them local encoding on input, we would lose info writing them to
390 389 # the cache.
391 390 for (name, (node, hist)) in sorted(cachetags.iteritems()):
392 391 for n in hist:
393 392 cachefile.write("%s %s\n" % (hex(n), name))
394 393 cachefile.write("%s %s\n" % (hex(node), name))
395 394
396 395 try:
397 396 cachefile.close()
398 397 except (OSError, IOError):
399 398 pass
400 399
401 400 _fnodescachefile = 'cache/hgtagsfnodes1'
402 401 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
403 402 _fnodesmissingrec = '\xff' * 24
404 403
405 404 class hgtagsfnodescache(object):
406 405 """Persistent cache mapping revisions to .hgtags filenodes.
407 406
408 407 The cache is an array of records. Each item in the array corresponds to
409 408 a changelog revision. Values in the array contain the first 4 bytes of
410 409 the node hash and the 20 bytes .hgtags filenode for that revision.
411 410
412 411 The first 4 bytes are present as a form of verification. Repository
413 412 stripping and rewriting may change the node at a numeric revision in the
414 413 changelog. The changeset fragment serves as a verifier to detect
415 414 rewriting. This logic is shared with the rev branch cache (see
416 415 branchmap.py).
417 416
418 417 The instance holds in memory the full cache content but entries are
419 418 only parsed on read.
420 419
421 420 Instances behave like lists. ``c[i]`` works where i is a rev or
422 421 changeset node. Missing indexes are populated automatically on access.
423 422 """
424 423 def __init__(self, repo):
425 424 assert repo.filtername is None
426 425
427 426 self._repo = repo
428 427
429 428 # Only for reporting purposes.
430 429 self.lookupcount = 0
431 430 self.hitcount = 0
432 431
433 432 self._raw = array('c')
434 433
435 434 try:
436 435 data = repo.vfs.read(_fnodescachefile)
437 436 except (OSError, IOError):
438 437 data = ""
439 438 self._raw.fromstring(data)
440 439
441 440 # The end state of self._raw is an array that is of the exact length
442 441 # required to hold a record for every revision in the repository.
443 442 # We truncate or extend the array as necessary. self._dirtyoffset is
444 443 # defined to be the start offset at which we need to write the output
445 444 # file. This offset is also adjusted when new entries are calculated
446 445 # for array members.
447 446 cllen = len(repo.changelog)
448 447 wantedlen = cllen * _fnodesrecsize
449 448 rawlen = len(self._raw)
450 449
451 450 self._dirtyoffset = None
452 451
453 452 if rawlen < wantedlen:
454 453 self._dirtyoffset = rawlen
455 454 self._raw.extend('\xff' * (wantedlen - rawlen))
456 455 elif rawlen > wantedlen:
457 456 # There's no easy way to truncate array instances. This seems
458 457 # slightly less evil than copying a potentially large array slice.
459 458 for i in range(rawlen - wantedlen):
460 459 self._raw.pop()
461 460 self._dirtyoffset = len(self._raw)
462 461
463 462 def getfnode(self, node, computemissing=True):
464 463 """Obtain the filenode of the .hgtags file at a specified revision.
465 464
466 465 If the value is in the cache, the entry will be validated and returned.
467 466 Otherwise, the filenode will be computed and returned unless
468 467 "computemissing" is False, in which case None will be returned without
469 468 any potentially expensive computation being performed.
470 469
471 470 If an .hgtags does not exist at the specified revision, nullid is
472 471 returned.
473 472 """
474 473 ctx = self._repo[node]
475 474 rev = ctx.rev()
476 475
477 476 self.lookupcount += 1
478 477
479 478 offset = rev * _fnodesrecsize
480 479 record = self._raw[offset:offset + _fnodesrecsize].tostring()
481 480 properprefix = node[0:4]
482 481
483 482 # Validate and return existing entry.
484 483 if record != _fnodesmissingrec:
485 484 fileprefix = record[0:4]
486 485
487 486 if fileprefix == properprefix:
488 487 self.hitcount += 1
489 488 return record[4:]
490 489
491 490 # Fall through.
492 491
493 492 # If we get here, the entry is either missing or invalid.
494 493
495 494 if not computemissing:
496 495 return None
497 496
498 497 # Populate missing entry.
499 498 try:
500 499 fnode = ctx.filenode('.hgtags')
501 500 except error.LookupError:
502 501 # No .hgtags file on this revision.
503 502 fnode = nullid
504 503
505 504 self._writeentry(offset, properprefix, fnode)
506 505 return fnode
507 506
508 507 def setfnode(self, node, fnode):
509 508 """Set the .hgtags filenode for a given changeset."""
510 509 assert len(fnode) == 20
511 510 ctx = self._repo[node]
512 511
513 512 # Do a lookup first to avoid writing if nothing has changed.
514 513 if self.getfnode(ctx.node(), computemissing=False) == fnode:
515 514 return
516 515
517 516 self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
518 517
519 518 def _writeentry(self, offset, prefix, fnode):
520 519 # Slices on array instances only accept other array.
521 520 entry = array('c', prefix + fnode)
522 521 self._raw[offset:offset + _fnodesrecsize] = entry
523 522 # self._dirtyoffset could be None.
524 523 self._dirtyoffset = min(self._dirtyoffset, offset) or 0
525 524
526 525 def write(self):
527 526 """Perform all necessary writes to cache file.
528 527
529 528 This may no-op if no writes are needed or if a write lock could
530 529 not be obtained.
531 530 """
532 531 if self._dirtyoffset is None:
533 532 return
534 533
535 534 data = self._raw[self._dirtyoffset:]
536 535 if not data:
537 536 return
538 537
539 538 repo = self._repo
540 539
541 540 try:
542 541 lock = repo.wlock(wait=False)
543 542 except error.LockError:
544 543 repo.ui.log('tagscache',
545 544 'not writing .hg/%s because lock cannot be acquired\n' %
546 545 (_fnodescachefile))
547 546 return
548 547
549 548 try:
550 549 f = repo.vfs.open(_fnodescachefile, 'ab')
551 550 try:
552 551 # if the file has been truncated
553 552 actualoffset = f.tell()
554 553 if actualoffset < self._dirtyoffset:
555 554 self._dirtyoffset = actualoffset
556 555 data = self._raw[self._dirtyoffset:]
557 556 f.seek(self._dirtyoffset)
558 557 f.truncate()
559 558 repo.ui.log('tagscache',
560 559 'writing %d bytes to %s\n' % (
561 560 len(data), _fnodescachefile))
562 561 f.write(data)
563 562 self._dirtyoffset = None
564 563 finally:
565 564 f.close()
566 565 except (IOError, OSError) as inst:
567 566 repo.ui.log('tagscache',
568 567 "couldn't write %s: %s\n" % (
569 568 _fnodescachefile, inst))
570 569 finally:
571 570 lock.release()
@@ -1,3556 +1,3556 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), 'wb', 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 unlinkpath = platform.unlinkpath
139 139 username = platform.username
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 # disable Python's problematic floating point timestamps (issue4836)
146 146 # (Python hypocritically says you shouldn't change this behavior in
147 147 # libraries, and sure enough Mercurial is not a library.)
148 148 os.stat_float_times(False)
149 149
150 150 def safehasattr(thing, attr):
151 151 return getattr(thing, attr, _notset) is not _notset
152 152
153 153 def bitsfrom(container):
154 154 bits = 0
155 155 for bit in container:
156 156 bits |= bit
157 157 return bits
158 158
159 159 DIGESTS = {
160 160 'md5': hashlib.md5,
161 161 'sha1': hashlib.sha1,
162 162 'sha512': hashlib.sha512,
163 163 }
164 164 # List of digest types from strongest to weakest
165 165 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
166 166
167 167 for k in DIGESTS_BY_STRENGTH:
168 168 assert k in DIGESTS
169 169
170 170 class digester(object):
171 171 """helper to compute digests.
172 172
173 173 This helper can be used to compute one or more digests given their name.
174 174
175 175 >>> d = digester(['md5', 'sha1'])
176 176 >>> d.update('foo')
177 177 >>> [k for k in sorted(d)]
178 178 ['md5', 'sha1']
179 179 >>> d['md5']
180 180 'acbd18db4cc2f85cedef654fccc4a4d8'
181 181 >>> d['sha1']
182 182 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
183 183 >>> digester.preferred(['md5', 'sha1'])
184 184 'sha1'
185 185 """
186 186
187 187 def __init__(self, digests, s=''):
188 188 self._hashes = {}
189 189 for k in digests:
190 190 if k not in DIGESTS:
191 191 raise Abort(_('unknown digest type: %s') % k)
192 192 self._hashes[k] = DIGESTS[k]()
193 193 if s:
194 194 self.update(s)
195 195
196 196 def update(self, data):
197 197 for h in self._hashes.values():
198 198 h.update(data)
199 199
200 200 def __getitem__(self, key):
201 201 if key not in DIGESTS:
202 202 raise Abort(_('unknown digest type: %s') % k)
203 203 return self._hashes[key].hexdigest()
204 204
205 205 def __iter__(self):
206 206 return iter(self._hashes)
207 207
208 208 @staticmethod
209 209 def preferred(supported):
210 210 """returns the strongest digest type in both supported and DIGESTS."""
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 if k in supported:
214 214 return k
215 215 return None
216 216
217 217 class digestchecker(object):
218 218 """file handle wrapper that additionally checks content against a given
219 219 size and digests.
220 220
221 221 d = digestchecker(fh, size, {'md5': '...'})
222 222
223 223 When multiple digests are given, all of them are validated.
224 224 """
225 225
226 226 def __init__(self, fh, size, digests):
227 227 self._fh = fh
228 228 self._size = size
229 229 self._got = 0
230 230 self._digests = dict(digests)
231 231 self._digester = digester(self._digests.keys())
232 232
233 233 def read(self, length=-1):
234 234 content = self._fh.read(length)
235 235 self._digester.update(content)
236 236 self._got += len(content)
237 237 return content
238 238
239 239 def validate(self):
240 240 if self._size != self._got:
241 241 raise Abort(_('size mismatch: expected %d, got %d') %
242 242 (self._size, self._got))
243 243 for k, v in self._digests.items():
244 244 if v != self._digester[k]:
245 245 # i18n: first parameter is a digest name
246 246 raise Abort(_('%s mismatch: expected %s, got %s') %
247 247 (k, v, self._digester[k]))
248 248
249 249 try:
250 250 buffer = buffer
251 251 except NameError:
252 252 if not pycompat.ispy3:
253 253 def buffer(sliceable, offset=0, length=None):
254 254 if length is not None:
255 255 return sliceable[offset:offset + length]
256 256 return sliceable[offset:]
257 257 else:
258 258 def buffer(sliceable, offset=0, length=None):
259 259 if length is not None:
260 260 return memoryview(sliceable)[offset:offset + length]
261 261 return memoryview(sliceable)[offset:]
262 262
263 263 closefds = pycompat.osname == 'posix'
264 264
265 265 _chunksize = 4096
266 266
267 267 class bufferedinputpipe(object):
268 268 """a manually buffered input pipe
269 269
270 270 Python will not let us use buffered IO and lazy reading with 'polling' at
271 271 the same time. We cannot probe the buffer state and select will not detect
272 272 that data are ready to read if they are already buffered.
273 273
274 274 This class let us work around that by implementing its own buffering
275 275 (allowing efficient readline) while offering a way to know if the buffer is
276 276 empty from the output (allowing collaboration of the buffer with polling).
277 277
278 278 This class lives in the 'util' module because it makes use of the 'os'
279 279 module from the python stdlib.
280 280 """
281 281
282 282 def __init__(self, input):
283 283 self._input = input
284 284 self._buffer = []
285 285 self._eof = False
286 286 self._lenbuf = 0
287 287
288 288 @property
289 289 def hasbuffer(self):
290 290 """True is any data is currently buffered
291 291
292 292 This will be used externally a pre-step for polling IO. If there is
293 293 already data then no polling should be set in place."""
294 294 return bool(self._buffer)
295 295
296 296 @property
297 297 def closed(self):
298 298 return self._input.closed
299 299
300 300 def fileno(self):
301 301 return self._input.fileno()
302 302
303 303 def close(self):
304 304 return self._input.close()
305 305
306 306 def read(self, size):
307 307 while (not self._eof) and (self._lenbuf < size):
308 308 self._fillbuffer()
309 309 return self._frombuffer(size)
310 310
311 311 def readline(self, *args, **kwargs):
312 312 if 1 < len(self._buffer):
313 313 # this should not happen because both read and readline end with a
314 314 # _frombuffer call that collapse it.
315 315 self._buffer = [''.join(self._buffer)]
316 316 self._lenbuf = len(self._buffer[0])
317 317 lfi = -1
318 318 if self._buffer:
319 319 lfi = self._buffer[-1].find('\n')
320 320 while (not self._eof) and lfi < 0:
321 321 self._fillbuffer()
322 322 if self._buffer:
323 323 lfi = self._buffer[-1].find('\n')
324 324 size = lfi + 1
325 325 if lfi < 0: # end of file
326 326 size = self._lenbuf
327 327 elif 1 < len(self._buffer):
328 328 # we need to take previous chunks into account
329 329 size += self._lenbuf - len(self._buffer[-1])
330 330 return self._frombuffer(size)
331 331
332 332 def _frombuffer(self, size):
333 333 """return at most 'size' data from the buffer
334 334
335 335 The data are removed from the buffer."""
336 336 if size == 0 or not self._buffer:
337 337 return ''
338 338 buf = self._buffer[0]
339 339 if 1 < len(self._buffer):
340 340 buf = ''.join(self._buffer)
341 341
342 342 data = buf[:size]
343 343 buf = buf[len(data):]
344 344 if buf:
345 345 self._buffer = [buf]
346 346 self._lenbuf = len(buf)
347 347 else:
348 348 self._buffer = []
349 349 self._lenbuf = 0
350 350 return data
351 351
352 352 def _fillbuffer(self):
353 353 """read data to the buffer"""
354 354 data = os.read(self._input.fileno(), _chunksize)
355 355 if not data:
356 356 self._eof = True
357 357 else:
358 358 self._lenbuf += len(data)
359 359 self._buffer.append(data)
360 360
361 361 def popen2(cmd, env=None, newlines=False):
362 362 # Setting bufsize to -1 lets the system decide the buffer size.
363 363 # The default for bufsize is 0, meaning unbuffered. This leads to
364 364 # poor performance on Mac OS X: http://bugs.python.org/issue4194
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 universal_newlines=newlines,
369 369 env=env)
370 370 return p.stdin, p.stdout
371 371
372 372 def popen3(cmd, env=None, newlines=False):
373 373 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
374 374 return stdin, stdout, stderr
375 375
376 376 def popen4(cmd, env=None, newlines=False, bufsize=-1):
377 377 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
378 378 close_fds=closefds,
379 379 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
380 380 stderr=subprocess.PIPE,
381 381 universal_newlines=newlines,
382 382 env=env)
383 383 return p.stdin, p.stdout, p.stderr, p
384 384
385 385 def version():
386 386 """Return version information if available."""
387 387 try:
388 388 from . import __version__
389 389 return __version__.version
390 390 except ImportError:
391 391 return 'unknown'
392 392
393 393 def versiontuple(v=None, n=4):
394 394 """Parses a Mercurial version string into an N-tuple.
395 395
396 396 The version string to be parsed is specified with the ``v`` argument.
397 397 If it isn't defined, the current Mercurial version string will be parsed.
398 398
399 399 ``n`` can be 2, 3, or 4. Here is how some version strings map to
400 400 returned values:
401 401
402 402 >>> v = '3.6.1+190-df9b73d2d444'
403 403 >>> versiontuple(v, 2)
404 404 (3, 6)
405 405 >>> versiontuple(v, 3)
406 406 (3, 6, 1)
407 407 >>> versiontuple(v, 4)
408 408 (3, 6, 1, '190-df9b73d2d444')
409 409
410 410 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
411 411 (3, 6, 1, '190-df9b73d2d444+20151118')
412 412
413 413 >>> v = '3.6'
414 414 >>> versiontuple(v, 2)
415 415 (3, 6)
416 416 >>> versiontuple(v, 3)
417 417 (3, 6, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 6, None, None)
420 420
421 421 >>> v = '3.9-rc'
422 422 >>> versiontuple(v, 2)
423 423 (3, 9)
424 424 >>> versiontuple(v, 3)
425 425 (3, 9, None)
426 426 >>> versiontuple(v, 4)
427 427 (3, 9, None, 'rc')
428 428
429 429 >>> v = '3.9-rc+2-02a8fea4289b'
430 430 >>> versiontuple(v, 2)
431 431 (3, 9)
432 432 >>> versiontuple(v, 3)
433 433 (3, 9, None)
434 434 >>> versiontuple(v, 4)
435 435 (3, 9, None, 'rc+2-02a8fea4289b')
436 436 """
437 437 if not v:
438 438 v = version()
439 439 parts = remod.split('[\+-]', v, 1)
440 440 if len(parts) == 1:
441 441 vparts, extra = parts[0], None
442 442 else:
443 443 vparts, extra = parts
444 444
445 445 vints = []
446 446 for i in vparts.split('.'):
447 447 try:
448 448 vints.append(int(i))
449 449 except ValueError:
450 450 break
451 451 # (3, 6) -> (3, 6, None)
452 452 while len(vints) < 3:
453 453 vints.append(None)
454 454
455 455 if n == 2:
456 456 return (vints[0], vints[1])
457 457 if n == 3:
458 458 return (vints[0], vints[1], vints[2])
459 459 if n == 4:
460 460 return (vints[0], vints[1], vints[2], extra)
461 461
462 462 # used by parsedate
463 463 defaultdateformats = (
464 464 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
465 465 '%Y-%m-%dT%H:%M', # without seconds
466 466 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
467 467 '%Y-%m-%dT%H%M', # without seconds
468 468 '%Y-%m-%d %H:%M:%S', # our common legal variant
469 469 '%Y-%m-%d %H:%M', # without seconds
470 470 '%Y-%m-%d %H%M%S', # without :
471 471 '%Y-%m-%d %H%M', # without seconds
472 472 '%Y-%m-%d %I:%M:%S%p',
473 473 '%Y-%m-%d %H:%M',
474 474 '%Y-%m-%d %I:%M%p',
475 475 '%Y-%m-%d',
476 476 '%m-%d',
477 477 '%m/%d',
478 478 '%m/%d/%y',
479 479 '%m/%d/%Y',
480 480 '%a %b %d %H:%M:%S %Y',
481 481 '%a %b %d %I:%M:%S%p %Y',
482 482 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
483 483 '%b %d %H:%M:%S %Y',
484 484 '%b %d %I:%M:%S%p %Y',
485 485 '%b %d %H:%M:%S',
486 486 '%b %d %I:%M:%S%p',
487 487 '%b %d %H:%M',
488 488 '%b %d %I:%M%p',
489 489 '%b %d %Y',
490 490 '%b %d',
491 491 '%H:%M:%S',
492 492 '%I:%M:%S%p',
493 493 '%H:%M',
494 494 '%I:%M%p',
495 495 )
496 496
497 497 extendeddateformats = defaultdateformats + (
498 498 "%Y",
499 499 "%Y-%m",
500 500 "%b",
501 501 "%b %Y",
502 502 )
503 503
504 504 def cachefunc(func):
505 505 '''cache the result of function calls'''
506 506 # XXX doesn't handle keywords args
507 507 if func.__code__.co_argcount == 0:
508 508 cache = []
509 509 def f():
510 510 if len(cache) == 0:
511 511 cache.append(func())
512 512 return cache[0]
513 513 return f
514 514 cache = {}
515 515 if func.__code__.co_argcount == 1:
516 516 # we gain a small amount of time because
517 517 # we don't need to pack/unpack the list
518 518 def f(arg):
519 519 if arg not in cache:
520 520 cache[arg] = func(arg)
521 521 return cache[arg]
522 522 else:
523 523 def f(*args):
524 524 if args not in cache:
525 525 cache[args] = func(*args)
526 526 return cache[args]
527 527
528 528 return f
529 529
530 530 class sortdict(dict):
531 531 '''a simple sorted dictionary'''
532 532 def __init__(self, data=None):
533 533 self._list = []
534 534 if data:
535 535 self.update(data)
536 536 def copy(self):
537 537 return sortdict(self)
538 538 def __setitem__(self, key, val):
539 539 if key in self:
540 540 self._list.remove(key)
541 541 self._list.append(key)
542 542 dict.__setitem__(self, key, val)
543 543 def __iter__(self):
544 544 return self._list.__iter__()
545 545 def update(self, src):
546 546 if isinstance(src, dict):
547 547 src = src.iteritems()
548 548 for k, v in src:
549 549 self[k] = v
550 550 def clear(self):
551 551 dict.clear(self)
552 552 self._list = []
553 553 def items(self):
554 554 return [(k, self[k]) for k in self._list]
555 555 def __delitem__(self, key):
556 556 dict.__delitem__(self, key)
557 557 self._list.remove(key)
558 558 def pop(self, key, *args, **kwargs):
559 559 dict.pop(self, key, *args, **kwargs)
560 560 try:
561 561 self._list.remove(key)
562 562 except ValueError:
563 563 pass
564 564 def keys(self):
565 565 return self._list[:]
566 566 def iterkeys(self):
567 567 return self._list.__iter__()
568 568 def iteritems(self):
569 569 for k in self._list:
570 570 yield k, self[k]
571 571 def insert(self, index, key, val):
572 572 self._list.insert(index, key)
573 573 dict.__setitem__(self, key, val)
574 574 def __repr__(self):
575 575 if not self:
576 576 return '%s()' % self.__class__.__name__
577 577 return '%s(%r)' % (self.__class__.__name__, self.items())
578 578
579 579 class _lrucachenode(object):
580 580 """A node in a doubly linked list.
581 581
582 582 Holds a reference to nodes on either side as well as a key-value
583 583 pair for the dictionary entry.
584 584 """
585 585 __slots__ = (u'next', u'prev', u'key', u'value')
586 586
587 587 def __init__(self):
588 588 self.next = None
589 589 self.prev = None
590 590
591 591 self.key = _notset
592 592 self.value = None
593 593
594 594 def markempty(self):
595 595 """Mark the node as emptied."""
596 596 self.key = _notset
597 597
598 598 class lrucachedict(object):
599 599 """Dict that caches most recent accesses and sets.
600 600
601 601 The dict consists of an actual backing dict - indexed by original
602 602 key - and a doubly linked circular list defining the order of entries in
603 603 the cache.
604 604
605 605 The head node is the newest entry in the cache. If the cache is full,
606 606 we recycle head.prev and make it the new head. Cache accesses result in
607 607 the node being moved to before the existing head and being marked as the
608 608 new head node.
609 609 """
610 610 def __init__(self, max):
611 611 self._cache = {}
612 612
613 613 self._head = head = _lrucachenode()
614 614 head.prev = head
615 615 head.next = head
616 616 self._size = 1
617 617 self._capacity = max
618 618
619 619 def __len__(self):
620 620 return len(self._cache)
621 621
622 622 def __contains__(self, k):
623 623 return k in self._cache
624 624
625 625 def __iter__(self):
626 626 # We don't have to iterate in cache order, but why not.
627 627 n = self._head
628 628 for i in range(len(self._cache)):
629 629 yield n.key
630 630 n = n.next
631 631
632 632 def __getitem__(self, k):
633 633 node = self._cache[k]
634 634 self._movetohead(node)
635 635 return node.value
636 636
637 637 def __setitem__(self, k, v):
638 638 node = self._cache.get(k)
639 639 # Replace existing value and mark as newest.
640 640 if node is not None:
641 641 node.value = v
642 642 self._movetohead(node)
643 643 return
644 644
645 645 if self._size < self._capacity:
646 646 node = self._addcapacity()
647 647 else:
648 648 # Grab the last/oldest item.
649 649 node = self._head.prev
650 650
651 651 # At capacity. Kill the old entry.
652 652 if node.key is not _notset:
653 653 del self._cache[node.key]
654 654
655 655 node.key = k
656 656 node.value = v
657 657 self._cache[k] = node
658 658 # And mark it as newest entry. No need to adjust order since it
659 659 # is already self._head.prev.
660 660 self._head = node
661 661
662 662 def __delitem__(self, k):
663 663 node = self._cache.pop(k)
664 664 node.markempty()
665 665
666 666 # Temporarily mark as newest item before re-adjusting head to make
667 667 # this node the oldest item.
668 668 self._movetohead(node)
669 669 self._head = node.next
670 670
671 671 # Additional dict methods.
672 672
673 673 def get(self, k, default=None):
674 674 try:
675 675 return self._cache[k].value
676 676 except KeyError:
677 677 return default
678 678
679 679 def clear(self):
680 680 n = self._head
681 681 while n.key is not _notset:
682 682 n.markempty()
683 683 n = n.next
684 684
685 685 self._cache.clear()
686 686
687 687 def copy(self):
688 688 result = lrucachedict(self._capacity)
689 689 n = self._head.prev
690 690 # Iterate in oldest-to-newest order, so the copy has the right ordering
691 691 for i in range(len(self._cache)):
692 692 result[n.key] = n.value
693 693 n = n.prev
694 694 return result
695 695
696 696 def _movetohead(self, node):
697 697 """Mark a node as the newest, making it the new head.
698 698
699 699 When a node is accessed, it becomes the freshest entry in the LRU
700 700 list, which is denoted by self._head.
701 701
702 702 Visually, let's make ``N`` the new head node (* denotes head):
703 703
704 704 previous/oldest <-> head <-> next/next newest
705 705
706 706 ----<->--- A* ---<->-----
707 707 | |
708 708 E <-> D <-> N <-> C <-> B
709 709
710 710 To:
711 711
712 712 ----<->--- N* ---<->-----
713 713 | |
714 714 E <-> D <-> C <-> B <-> A
715 715
716 716 This requires the following moves:
717 717
718 718 C.next = D (node.prev.next = node.next)
719 719 D.prev = C (node.next.prev = node.prev)
720 720 E.next = N (head.prev.next = node)
721 721 N.prev = E (node.prev = head.prev)
722 722 N.next = A (node.next = head)
723 723 A.prev = N (head.prev = node)
724 724 """
725 725 head = self._head
726 726 # C.next = D
727 727 node.prev.next = node.next
728 728 # D.prev = C
729 729 node.next.prev = node.prev
730 730 # N.prev = E
731 731 node.prev = head.prev
732 732 # N.next = A
733 733 # It is tempting to do just "head" here, however if node is
734 734 # adjacent to head, this will do bad things.
735 735 node.next = head.prev.next
736 736 # E.next = N
737 737 node.next.prev = node
738 738 # A.prev = N
739 739 node.prev.next = node
740 740
741 741 self._head = node
742 742
743 743 def _addcapacity(self):
744 744 """Add a node to the circular linked list.
745 745
746 746 The new node is inserted before the head node.
747 747 """
748 748 head = self._head
749 749 node = _lrucachenode()
750 750 head.prev.next = node
751 751 node.prev = head.prev
752 752 node.next = head
753 753 head.prev = node
754 754 self._size += 1
755 755 return node
756 756
757 757 def lrucachefunc(func):
758 758 '''cache most recent results of function calls'''
759 759 cache = {}
760 760 order = collections.deque()
761 761 if func.__code__.co_argcount == 1:
762 762 def f(arg):
763 763 if arg not in cache:
764 764 if len(cache) > 20:
765 765 del cache[order.popleft()]
766 766 cache[arg] = func(arg)
767 767 else:
768 768 order.remove(arg)
769 769 order.append(arg)
770 770 return cache[arg]
771 771 else:
772 772 def f(*args):
773 773 if args not in cache:
774 774 if len(cache) > 20:
775 775 del cache[order.popleft()]
776 776 cache[args] = func(*args)
777 777 else:
778 778 order.remove(args)
779 779 order.append(args)
780 780 return cache[args]
781 781
782 782 return f
783 783
784 784 class propertycache(object):
785 785 def __init__(self, func):
786 786 self.func = func
787 787 self.name = func.__name__
788 788 def __get__(self, obj, type=None):
789 789 result = self.func(obj)
790 790 self.cachevalue(obj, result)
791 791 return result
792 792
793 793 def cachevalue(self, obj, value):
794 794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
795 795 obj.__dict__[self.name] = value
796 796
797 797 def pipefilter(s, cmd):
798 798 '''filter string S through command CMD, returning its output'''
799 799 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
800 800 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
801 801 pout, perr = p.communicate(s)
802 802 return pout
803 803
804 804 def tempfilter(s, cmd):
805 805 '''filter string S through a pair of temporary files with CMD.
806 806 CMD is used as a template to create the real command to be run,
807 807 with the strings INFILE and OUTFILE replaced by the real names of
808 808 the temporary files generated.'''
809 809 inname, outname = None, None
810 810 try:
811 811 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
812 812 fp = os.fdopen(infd, pycompat.sysstr('wb'))
813 813 fp.write(s)
814 814 fp.close()
815 815 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
816 816 os.close(outfd)
817 817 cmd = cmd.replace('INFILE', inname)
818 818 cmd = cmd.replace('OUTFILE', outname)
819 819 code = os.system(cmd)
820 820 if pycompat.sysplatform == 'OpenVMS' and code & 1:
821 821 code = 0
822 822 if code:
823 823 raise Abort(_("command '%s' failed: %s") %
824 824 (cmd, explainexit(code)))
825 825 return readfile(outname)
826 826 finally:
827 827 try:
828 828 if inname:
829 829 os.unlink(inname)
830 830 except OSError:
831 831 pass
832 832 try:
833 833 if outname:
834 834 os.unlink(outname)
835 835 except OSError:
836 836 pass
837 837
838 838 filtertable = {
839 839 'tempfile:': tempfilter,
840 840 'pipe:': pipefilter,
841 841 }
842 842
843 843 def filter(s, cmd):
844 844 "filter a string through a command that transforms its input to its output"
845 845 for name, fn in filtertable.iteritems():
846 846 if cmd.startswith(name):
847 847 return fn(s, cmd[len(name):].lstrip())
848 848 return pipefilter(s, cmd)
849 849
850 850 def binary(s):
851 851 """return true if a string is binary data"""
852 852 return bool(s and '\0' in s)
853 853
854 854 def increasingchunks(source, min=1024, max=65536):
855 855 '''return no less than min bytes per chunk while data remains,
856 856 doubling min after each chunk until it reaches max'''
857 857 def log2(x):
858 858 if not x:
859 859 return 0
860 860 i = 0
861 861 while x:
862 862 x >>= 1
863 863 i += 1
864 864 return i - 1
865 865
866 866 buf = []
867 867 blen = 0
868 868 for chunk in source:
869 869 buf.append(chunk)
870 870 blen += len(chunk)
871 871 if blen >= min:
872 872 if min < max:
873 873 min = min << 1
874 874 nmin = 1 << log2(blen)
875 875 if nmin > min:
876 876 min = nmin
877 877 if min > max:
878 878 min = max
879 879 yield ''.join(buf)
880 880 blen = 0
881 881 buf = []
882 882 if buf:
883 883 yield ''.join(buf)
884 884
885 885 Abort = error.Abort
886 886
887 887 def always(fn):
888 888 return True
889 889
890 890 def never(fn):
891 891 return False
892 892
893 893 def nogc(func):
894 894 """disable garbage collector
895 895
896 896 Python's garbage collector triggers a GC each time a certain number of
897 897 container objects (the number being defined by gc.get_threshold()) are
898 898 allocated even when marked not to be tracked by the collector. Tracking has
899 899 no effect on when GCs are triggered, only on what objects the GC looks
900 900 into. As a workaround, disable GC while building complex (huge)
901 901 containers.
902 902
903 903 This garbage collector issue have been fixed in 2.7.
904 904 """
905 905 if sys.version_info >= (2, 7):
906 906 return func
907 907 def wrapper(*args, **kwargs):
908 908 gcenabled = gc.isenabled()
909 909 gc.disable()
910 910 try:
911 911 return func(*args, **kwargs)
912 912 finally:
913 913 if gcenabled:
914 914 gc.enable()
915 915 return wrapper
916 916
917 917 def pathto(root, n1, n2):
918 918 '''return the relative path from one place to another.
919 919 root should use os.sep to separate directories
920 920 n1 should use os.sep to separate directories
921 921 n2 should use "/" to separate directories
922 922 returns an os.sep-separated path.
923 923
924 924 If n1 is a relative path, it's assumed it's
925 925 relative to root.
926 926 n2 should always be relative to root.
927 927 '''
928 928 if not n1:
929 929 return localpath(n2)
930 930 if os.path.isabs(n1):
931 931 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
932 932 return os.path.join(root, localpath(n2))
933 933 n2 = '/'.join((pconvert(root), n2))
934 934 a, b = splitpath(n1), n2.split('/')
935 935 a.reverse()
936 936 b.reverse()
937 937 while a and b and a[-1] == b[-1]:
938 938 a.pop()
939 939 b.pop()
940 940 b.reverse()
941 941 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
942 942
943 943 def mainfrozen():
944 944 """return True if we are a frozen executable.
945 945
946 946 The code supports py2exe (most common, Windows only) and tools/freeze
947 947 (portable, not much used).
948 948 """
949 949 return (safehasattr(sys, "frozen") or # new py2exe
950 950 safehasattr(sys, "importers") or # old py2exe
951 951 imp.is_frozen(u"__main__")) # tools/freeze
952 952
953 953 # the location of data files matching the source code
954 954 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
955 955 # executable version (py2exe) doesn't support __file__
956 956 datapath = os.path.dirname(pycompat.sysexecutable)
957 957 else:
958 958 datapath = os.path.dirname(__file__)
959 959
960 960 if not isinstance(datapath, bytes):
961 961 datapath = pycompat.fsencode(datapath)
962 962
963 963 i18n.setdatapath(datapath)
964 964
965 965 _hgexecutable = None
966 966
967 967 def hgexecutable():
968 968 """return location of the 'hg' executable.
969 969
970 970 Defaults to $HG or 'hg' in the search path.
971 971 """
972 972 if _hgexecutable is None:
973 973 hg = encoding.environ.get('HG')
974 974 mainmod = sys.modules['__main__']
975 975 if hg:
976 976 _sethgexecutable(hg)
977 977 elif mainfrozen():
978 978 if getattr(sys, 'frozen', None) == 'macosx_app':
979 979 # Env variable set by py2app
980 980 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
981 981 else:
982 982 _sethgexecutable(pycompat.sysexecutable)
983 983 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
984 984 _sethgexecutable(mainmod.__file__)
985 985 else:
986 986 exe = findexe('hg') or os.path.basename(sys.argv[0])
987 987 _sethgexecutable(exe)
988 988 return _hgexecutable
989 989
990 990 def _sethgexecutable(path):
991 991 """set location of the 'hg' executable"""
992 992 global _hgexecutable
993 993 _hgexecutable = path
994 994
995 995 def _isstdout(f):
996 996 fileno = getattr(f, 'fileno', None)
997 997 return fileno and fileno() == sys.__stdout__.fileno()
998 998
999 999 def shellenviron(environ=None):
1000 1000 """return environ with optional override, useful for shelling out"""
1001 1001 def py2shell(val):
1002 1002 'convert python object into string that is useful to shell'
1003 1003 if val is None or val is False:
1004 1004 return '0'
1005 1005 if val is True:
1006 1006 return '1'
1007 1007 return str(val)
1008 1008 env = dict(encoding.environ)
1009 1009 if environ:
1010 1010 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1011 1011 env['HG'] = hgexecutable()
1012 1012 return env
1013 1013
1014 1014 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
1015 1015 '''enhanced shell command execution.
1016 1016 run with environment maybe modified, maybe in different dir.
1017 1017
1018 1018 if command fails and onerr is None, return status, else raise onerr
1019 1019 object as exception.
1020 1020
1021 1021 if out is specified, it is assumed to be a file-like object that has a
1022 1022 write() method. stdout and stderr will be redirected to out.'''
1023 1023 try:
1024 1024 stdout.flush()
1025 1025 except Exception:
1026 1026 pass
1027 1027 origcmd = cmd
1028 1028 cmd = quotecommand(cmd)
1029 1029 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1030 1030 and sys.version_info[1] < 7):
1031 1031 # subprocess kludge to work around issues in half-baked Python
1032 1032 # ports, notably bichued/python:
1033 1033 if not cwd is None:
1034 1034 os.chdir(cwd)
1035 1035 rc = os.system(cmd)
1036 1036 else:
1037 1037 env = shellenviron(environ)
1038 1038 if out is None or _isstdout(out):
1039 1039 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1040 1040 env=env, cwd=cwd)
1041 1041 else:
1042 1042 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1043 1043 env=env, cwd=cwd, stdout=subprocess.PIPE,
1044 1044 stderr=subprocess.STDOUT)
1045 1045 for line in iter(proc.stdout.readline, ''):
1046 1046 out.write(line)
1047 1047 proc.wait()
1048 1048 rc = proc.returncode
1049 1049 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1050 1050 rc = 0
1051 1051 if rc and onerr:
1052 1052 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1053 1053 explainexit(rc)[0])
1054 1054 if errprefix:
1055 1055 errmsg = '%s: %s' % (errprefix, errmsg)
1056 1056 raise onerr(errmsg)
1057 1057 return rc
1058 1058
1059 1059 def checksignature(func):
1060 1060 '''wrap a function with code to check for calling errors'''
1061 1061 def check(*args, **kwargs):
1062 1062 try:
1063 1063 return func(*args, **kwargs)
1064 1064 except TypeError:
1065 1065 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1066 1066 raise error.SignatureError
1067 1067 raise
1068 1068
1069 1069 return check
1070 1070
1071 1071 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1072 1072 '''copy a file, preserving mode and optionally other stat info like
1073 1073 atime/mtime
1074 1074
1075 1075 checkambig argument is used with filestat, and is useful only if
1076 1076 destination file is guarded by any lock (e.g. repo.lock or
1077 1077 repo.wlock).
1078 1078
1079 1079 copystat and checkambig should be exclusive.
1080 1080 '''
1081 1081 assert not (copystat and checkambig)
1082 1082 oldstat = None
1083 1083 if os.path.lexists(dest):
1084 1084 if checkambig:
1085 1085 oldstat = checkambig and filestat(dest)
1086 1086 unlink(dest)
1087 1087 # hardlinks are problematic on CIFS, quietly ignore this flag
1088 1088 # until we find a way to work around it cleanly (issue4546)
1089 1089 if False and hardlink:
1090 1090 try:
1091 1091 oslink(src, dest)
1092 1092 return
1093 1093 except (IOError, OSError):
1094 1094 pass # fall back to normal copy
1095 1095 if os.path.islink(src):
1096 1096 os.symlink(os.readlink(src), dest)
1097 1097 # copytime is ignored for symlinks, but in general copytime isn't needed
1098 1098 # for them anyway
1099 1099 else:
1100 1100 try:
1101 1101 shutil.copyfile(src, dest)
1102 1102 if copystat:
1103 1103 # copystat also copies mode
1104 1104 shutil.copystat(src, dest)
1105 1105 else:
1106 1106 shutil.copymode(src, dest)
1107 1107 if oldstat and oldstat.stat:
1108 1108 newstat = filestat(dest)
1109 1109 if newstat.isambig(oldstat):
1110 1110 # stat of copied file is ambiguous to original one
1111 1111 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1112 1112 os.utime(dest, (advanced, advanced))
1113 1113 except shutil.Error as inst:
1114 1114 raise Abort(str(inst))
1115 1115
1116 1116 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1117 1117 """Copy a directory tree using hardlinks if possible."""
1118 1118 num = 0
1119 1119
1120 1120 if hardlink is None:
1121 1121 hardlink = (os.stat(src).st_dev ==
1122 1122 os.stat(os.path.dirname(dst)).st_dev)
1123 1123 if hardlink:
1124 1124 topic = _('linking')
1125 1125 else:
1126 1126 topic = _('copying')
1127 1127
1128 1128 if os.path.isdir(src):
1129 1129 os.mkdir(dst)
1130 1130 for name, kind in osutil.listdir(src):
1131 1131 srcname = os.path.join(src, name)
1132 1132 dstname = os.path.join(dst, name)
1133 1133 def nprog(t, pos):
1134 1134 if pos is not None:
1135 1135 return progress(t, pos + num)
1136 1136 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1137 1137 num += n
1138 1138 else:
1139 1139 if hardlink:
1140 1140 try:
1141 1141 oslink(src, dst)
1142 1142 except (IOError, OSError):
1143 1143 hardlink = False
1144 1144 shutil.copy(src, dst)
1145 1145 else:
1146 1146 shutil.copy(src, dst)
1147 1147 num += 1
1148 1148 progress(topic, num)
1149 1149 progress(topic, None)
1150 1150
1151 1151 return hardlink, num
1152 1152
1153 1153 _winreservednames = '''con prn aux nul
1154 1154 com1 com2 com3 com4 com5 com6 com7 com8 com9
1155 1155 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1156 1156 _winreservedchars = ':*?"<>|'
1157 1157 def checkwinfilename(path):
1158 1158 r'''Check that the base-relative path is a valid filename on Windows.
1159 1159 Returns None if the path is ok, or a UI string describing the problem.
1160 1160
1161 1161 >>> checkwinfilename("just/a/normal/path")
1162 1162 >>> checkwinfilename("foo/bar/con.xml")
1163 1163 "filename contains 'con', which is reserved on Windows"
1164 1164 >>> checkwinfilename("foo/con.xml/bar")
1165 1165 "filename contains 'con', which is reserved on Windows"
1166 1166 >>> checkwinfilename("foo/bar/xml.con")
1167 1167 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1168 1168 "filename contains 'AUX', which is reserved on Windows"
1169 1169 >>> checkwinfilename("foo/bar/bla:.txt")
1170 1170 "filename contains ':', which is reserved on Windows"
1171 1171 >>> checkwinfilename("foo/bar/b\07la.txt")
1172 1172 "filename contains '\\x07', which is invalid on Windows"
1173 1173 >>> checkwinfilename("foo/bar/bla ")
1174 1174 "filename ends with ' ', which is not allowed on Windows"
1175 1175 >>> checkwinfilename("../bar")
1176 1176 >>> checkwinfilename("foo\\")
1177 1177 "filename ends with '\\', which is invalid on Windows"
1178 1178 >>> checkwinfilename("foo\\/bar")
1179 1179 "directory name ends with '\\', which is invalid on Windows"
1180 1180 '''
1181 1181 if path.endswith('\\'):
1182 1182 return _("filename ends with '\\', which is invalid on Windows")
1183 1183 if '\\/' in path:
1184 1184 return _("directory name ends with '\\', which is invalid on Windows")
1185 1185 for n in path.replace('\\', '/').split('/'):
1186 1186 if not n:
1187 1187 continue
1188 1188 for c in n:
1189 1189 if c in _winreservedchars:
1190 1190 return _("filename contains '%s', which is reserved "
1191 1191 "on Windows") % c
1192 1192 if ord(c) <= 31:
1193 1193 return _("filename contains %r, which is invalid "
1194 1194 "on Windows") % c
1195 1195 base = n.split('.')[0]
1196 1196 if base and base.lower() in _winreservednames:
1197 1197 return _("filename contains '%s', which is reserved "
1198 1198 "on Windows") % base
1199 1199 t = n[-1]
1200 1200 if t in '. ' and n not in '..':
1201 1201 return _("filename ends with '%s', which is not allowed "
1202 1202 "on Windows") % t
1203 1203
1204 1204 if pycompat.osname == 'nt':
1205 1205 checkosfilename = checkwinfilename
1206 1206 timer = time.clock
1207 1207 else:
1208 1208 checkosfilename = platform.checkosfilename
1209 1209 timer = time.time
1210 1210
1211 1211 if safehasattr(time, "perf_counter"):
1212 1212 timer = time.perf_counter
1213 1213
1214 1214 def makelock(info, pathname):
1215 1215 try:
1216 1216 return os.symlink(info, pathname)
1217 1217 except OSError as why:
1218 1218 if why.errno == errno.EEXIST:
1219 1219 raise
1220 1220 except AttributeError: # no symlink in os
1221 1221 pass
1222 1222
1223 1223 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1224 1224 os.write(ld, info)
1225 1225 os.close(ld)
1226 1226
1227 1227 def readlock(pathname):
1228 1228 try:
1229 1229 return os.readlink(pathname)
1230 1230 except OSError as why:
1231 1231 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1232 1232 raise
1233 1233 except AttributeError: # no symlink in os
1234 1234 pass
1235 1235 fp = posixfile(pathname)
1236 1236 r = fp.read()
1237 1237 fp.close()
1238 1238 return r
1239 1239
1240 1240 def fstat(fp):
1241 1241 '''stat file object that may not have fileno method.'''
1242 1242 try:
1243 1243 return os.fstat(fp.fileno())
1244 1244 except AttributeError:
1245 1245 return os.stat(fp.name)
1246 1246
1247 1247 # File system features
1248 1248
1249 1249 def fscasesensitive(path):
1250 1250 """
1251 1251 Return true if the given path is on a case-sensitive filesystem
1252 1252
1253 1253 Requires a path (like /foo/.hg) ending with a foldable final
1254 1254 directory component.
1255 1255 """
1256 1256 s1 = os.lstat(path)
1257 1257 d, b = os.path.split(path)
1258 1258 b2 = b.upper()
1259 1259 if b == b2:
1260 1260 b2 = b.lower()
1261 1261 if b == b2:
1262 1262 return True # no evidence against case sensitivity
1263 1263 p2 = os.path.join(d, b2)
1264 1264 try:
1265 1265 s2 = os.lstat(p2)
1266 1266 if s2 == s1:
1267 1267 return False
1268 1268 return True
1269 1269 except OSError:
1270 1270 return True
1271 1271
1272 1272 try:
1273 1273 import re2
1274 1274 _re2 = None
1275 1275 except ImportError:
1276 1276 _re2 = False
1277 1277
1278 1278 class _re(object):
1279 1279 def _checkre2(self):
1280 1280 global _re2
1281 1281 try:
1282 1282 # check if match works, see issue3964
1283 1283 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1284 1284 except ImportError:
1285 1285 _re2 = False
1286 1286
1287 1287 def compile(self, pat, flags=0):
1288 1288 '''Compile a regular expression, using re2 if possible
1289 1289
1290 1290 For best performance, use only re2-compatible regexp features. The
1291 1291 only flags from the re module that are re2-compatible are
1292 1292 IGNORECASE and MULTILINE.'''
1293 1293 if _re2 is None:
1294 1294 self._checkre2()
1295 1295 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1296 1296 if flags & remod.IGNORECASE:
1297 1297 pat = '(?i)' + pat
1298 1298 if flags & remod.MULTILINE:
1299 1299 pat = '(?m)' + pat
1300 1300 try:
1301 1301 return re2.compile(pat)
1302 1302 except re2.error:
1303 1303 pass
1304 1304 return remod.compile(pat, flags)
1305 1305
1306 1306 @propertycache
1307 1307 def escape(self):
1308 1308 '''Return the version of escape corresponding to self.compile.
1309 1309
1310 1310 This is imperfect because whether re2 or re is used for a particular
1311 1311 function depends on the flags, etc, but it's the best we can do.
1312 1312 '''
1313 1313 global _re2
1314 1314 if _re2 is None:
1315 1315 self._checkre2()
1316 1316 if _re2:
1317 1317 return re2.escape
1318 1318 else:
1319 1319 return remod.escape
1320 1320
1321 1321 re = _re()
1322 1322
1323 1323 _fspathcache = {}
1324 1324 def fspath(name, root):
1325 1325 '''Get name in the case stored in the filesystem
1326 1326
1327 1327 The name should be relative to root, and be normcase-ed for efficiency.
1328 1328
1329 1329 Note that this function is unnecessary, and should not be
1330 1330 called, for case-sensitive filesystems (simply because it's expensive).
1331 1331
1332 1332 The root should be normcase-ed, too.
1333 1333 '''
1334 1334 def _makefspathcacheentry(dir):
1335 1335 return dict((normcase(n), n) for n in os.listdir(dir))
1336 1336
1337 1337 seps = pycompat.ossep
1338 1338 if pycompat.osaltsep:
1339 1339 seps = seps + pycompat.osaltsep
1340 1340 # Protect backslashes. This gets silly very quickly.
1341 1341 seps.replace('\\','\\\\')
1342 1342 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1343 1343 dir = os.path.normpath(root)
1344 1344 result = []
1345 1345 for part, sep in pattern.findall(name):
1346 1346 if sep:
1347 1347 result.append(sep)
1348 1348 continue
1349 1349
1350 1350 if dir not in _fspathcache:
1351 1351 _fspathcache[dir] = _makefspathcacheentry(dir)
1352 1352 contents = _fspathcache[dir]
1353 1353
1354 1354 found = contents.get(part)
1355 1355 if not found:
1356 1356 # retry "once per directory" per "dirstate.walk" which
1357 1357 # may take place for each patches of "hg qpush", for example
1358 1358 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1359 1359 found = contents.get(part)
1360 1360
1361 1361 result.append(found or part)
1362 1362 dir = os.path.join(dir, part)
1363 1363
1364 1364 return ''.join(result)
1365 1365
1366 1366 def checknlink(testfile):
1367 1367 '''check whether hardlink count reporting works properly'''
1368 1368
1369 1369 # testfile may be open, so we need a separate file for checking to
1370 1370 # work around issue2543 (or testfile may get lost on Samba shares)
1371 1371 f1 = testfile + ".hgtmp1"
1372 1372 if os.path.lexists(f1):
1373 1373 return False
1374 1374 try:
1375 1375 posixfile(f1, 'w').close()
1376 1376 except IOError:
1377 1377 try:
1378 1378 os.unlink(f1)
1379 1379 except OSError:
1380 1380 pass
1381 1381 return False
1382 1382
1383 1383 f2 = testfile + ".hgtmp2"
1384 1384 fd = None
1385 1385 try:
1386 1386 oslink(f1, f2)
1387 1387 # nlinks() may behave differently for files on Windows shares if
1388 1388 # the file is open.
1389 1389 fd = posixfile(f2)
1390 1390 return nlinks(f2) > 1
1391 1391 except OSError:
1392 1392 return False
1393 1393 finally:
1394 1394 if fd is not None:
1395 1395 fd.close()
1396 1396 for f in (f1, f2):
1397 1397 try:
1398 1398 os.unlink(f)
1399 1399 except OSError:
1400 1400 pass
1401 1401
1402 1402 def endswithsep(path):
1403 1403 '''Check path ends with os.sep or os.altsep.'''
1404 1404 return (path.endswith(pycompat.ossep)
1405 1405 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1406 1406
1407 1407 def splitpath(path):
1408 1408 '''Split path by os.sep.
1409 1409 Note that this function does not use os.altsep because this is
1410 1410 an alternative of simple "xxx.split(os.sep)".
1411 1411 It is recommended to use os.path.normpath() before using this
1412 1412 function if need.'''
1413 1413 return path.split(pycompat.ossep)
1414 1414
1415 1415 def gui():
1416 1416 '''Are we running in a GUI?'''
1417 1417 if pycompat.sysplatform == 'darwin':
1418 1418 if 'SSH_CONNECTION' in encoding.environ:
1419 1419 # handle SSH access to a box where the user is logged in
1420 1420 return False
1421 1421 elif getattr(osutil, 'isgui', None):
1422 1422 # check if a CoreGraphics session is available
1423 1423 return osutil.isgui()
1424 1424 else:
1425 1425 # pure build; use a safe default
1426 1426 return True
1427 1427 else:
1428 1428 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1429 1429
1430 1430 def mktempcopy(name, emptyok=False, createmode=None):
1431 1431 """Create a temporary file with the same contents from name
1432 1432
1433 1433 The permission bits are copied from the original file.
1434 1434
1435 1435 If the temporary file is going to be truncated immediately, you
1436 1436 can use emptyok=True as an optimization.
1437 1437
1438 1438 Returns the name of the temporary file.
1439 1439 """
1440 1440 d, fn = os.path.split(name)
1441 1441 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1442 1442 os.close(fd)
1443 1443 # Temporary files are created with mode 0600, which is usually not
1444 1444 # what we want. If the original file already exists, just copy
1445 1445 # its mode. Otherwise, manually obey umask.
1446 1446 copymode(name, temp, createmode)
1447 1447 if emptyok:
1448 1448 return temp
1449 1449 try:
1450 1450 try:
1451 1451 ifp = posixfile(name, "rb")
1452 1452 except IOError as inst:
1453 1453 if inst.errno == errno.ENOENT:
1454 1454 return temp
1455 1455 if not getattr(inst, 'filename', None):
1456 1456 inst.filename = name
1457 1457 raise
1458 1458 ofp = posixfile(temp, "wb")
1459 1459 for chunk in filechunkiter(ifp):
1460 1460 ofp.write(chunk)
1461 1461 ifp.close()
1462 1462 ofp.close()
1463 1463 except: # re-raises
1464 1464 try: os.unlink(temp)
1465 1465 except OSError: pass
1466 1466 raise
1467 1467 return temp
1468 1468
1469 1469 class filestat(object):
1470 1470 """help to exactly detect change of a file
1471 1471
1472 1472 'stat' attribute is result of 'os.stat()' if specified 'path'
1473 1473 exists. Otherwise, it is None. This can avoid preparative
1474 1474 'exists()' examination on client side of this class.
1475 1475 """
1476 1476 def __init__(self, path):
1477 1477 try:
1478 1478 self.stat = os.stat(path)
1479 1479 except OSError as err:
1480 1480 if err.errno != errno.ENOENT:
1481 1481 raise
1482 1482 self.stat = None
1483 1483
1484 1484 __hash__ = object.__hash__
1485 1485
1486 1486 def __eq__(self, old):
1487 1487 try:
1488 1488 # if ambiguity between stat of new and old file is
1489 1489 # avoided, comparison of size, ctime and mtime is enough
1490 1490 # to exactly detect change of a file regardless of platform
1491 1491 return (self.stat.st_size == old.stat.st_size and
1492 1492 self.stat.st_ctime == old.stat.st_ctime and
1493 1493 self.stat.st_mtime == old.stat.st_mtime)
1494 1494 except AttributeError:
1495 1495 return False
1496 1496
1497 1497 def isambig(self, old):
1498 1498 """Examine whether new (= self) stat is ambiguous against old one
1499 1499
1500 1500 "S[N]" below means stat of a file at N-th change:
1501 1501
1502 1502 - S[n-1].ctime < S[n].ctime: can detect change of a file
1503 1503 - S[n-1].ctime == S[n].ctime
1504 1504 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1505 1505 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1506 1506 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1507 1507 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1508 1508
1509 1509 Case (*2) above means that a file was changed twice or more at
1510 1510 same time in sec (= S[n-1].ctime), and comparison of timestamp
1511 1511 is ambiguous.
1512 1512
1513 1513 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1514 1514 timestamp is ambiguous".
1515 1515
1516 1516 But advancing mtime only in case (*2) doesn't work as
1517 1517 expected, because naturally advanced S[n].mtime in case (*1)
1518 1518 might be equal to manually advanced S[n-1 or earlier].mtime.
1519 1519
1520 1520 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1521 1521 treated as ambiguous regardless of mtime, to avoid overlooking
1522 1522 by confliction between such mtime.
1523 1523
1524 1524 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1525 1525 S[n].mtime", even if size of a file isn't changed.
1526 1526 """
1527 1527 try:
1528 1528 return (self.stat.st_ctime == old.stat.st_ctime)
1529 1529 except AttributeError:
1530 1530 return False
1531 1531
1532 1532 def avoidambig(self, path, old):
1533 1533 """Change file stat of specified path to avoid ambiguity
1534 1534
1535 1535 'old' should be previous filestat of 'path'.
1536 1536
1537 1537 This skips avoiding ambiguity, if a process doesn't have
1538 1538 appropriate privileges for 'path'.
1539 1539 """
1540 1540 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1541 1541 try:
1542 1542 os.utime(path, (advanced, advanced))
1543 1543 except OSError as inst:
1544 1544 if inst.errno == errno.EPERM:
1545 1545 # utime() on the file created by another user causes EPERM,
1546 1546 # if a process doesn't have appropriate privileges
1547 1547 return
1548 1548 raise
1549 1549
1550 1550 def __ne__(self, other):
1551 1551 return not self == other
1552 1552
1553 1553 class atomictempfile(object):
1554 1554 '''writable file object that atomically updates a file
1555 1555
1556 1556 All writes will go to a temporary copy of the original file. Call
1557 1557 close() when you are done writing, and atomictempfile will rename
1558 1558 the temporary copy to the original name, making the changes
1559 1559 visible. If the object is destroyed without being closed, all your
1560 1560 writes are discarded.
1561 1561
1562 1562 checkambig argument of constructor is used with filestat, and is
1563 1563 useful only if target file is guarded by any lock (e.g. repo.lock
1564 1564 or repo.wlock).
1565 1565 '''
1566 1566 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1567 1567 self.__name = name # permanent name
1568 1568 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1569 1569 createmode=createmode)
1570 1570 self._fp = posixfile(self._tempname, mode)
1571 1571 self._checkambig = checkambig
1572 1572
1573 1573 # delegated methods
1574 1574 self.read = self._fp.read
1575 1575 self.write = self._fp.write
1576 1576 self.seek = self._fp.seek
1577 1577 self.tell = self._fp.tell
1578 1578 self.fileno = self._fp.fileno
1579 1579
1580 1580 def close(self):
1581 1581 if not self._fp.closed:
1582 1582 self._fp.close()
1583 1583 filename = localpath(self.__name)
1584 1584 oldstat = self._checkambig and filestat(filename)
1585 1585 if oldstat and oldstat.stat:
1586 1586 rename(self._tempname, filename)
1587 1587 newstat = filestat(filename)
1588 1588 if newstat.isambig(oldstat):
1589 1589 # stat of changed file is ambiguous to original one
1590 1590 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1591 1591 os.utime(filename, (advanced, advanced))
1592 1592 else:
1593 1593 rename(self._tempname, filename)
1594 1594
1595 1595 def discard(self):
1596 1596 if not self._fp.closed:
1597 1597 try:
1598 1598 os.unlink(self._tempname)
1599 1599 except OSError:
1600 1600 pass
1601 1601 self._fp.close()
1602 1602
1603 1603 def __del__(self):
1604 1604 if safehasattr(self, '_fp'): # constructor actually did something
1605 1605 self.discard()
1606 1606
1607 1607 def __enter__(self):
1608 1608 return self
1609 1609
1610 1610 def __exit__(self, exctype, excvalue, traceback):
1611 1611 if exctype is not None:
1612 1612 self.discard()
1613 1613 else:
1614 1614 self.close()
1615 1615
1616 1616 def makedirs(name, mode=None, notindexed=False):
1617 1617 """recursive directory creation with parent mode inheritance
1618 1618
1619 1619 Newly created directories are marked as "not to be indexed by
1620 1620 the content indexing service", if ``notindexed`` is specified
1621 1621 for "write" mode access.
1622 1622 """
1623 1623 try:
1624 1624 makedir(name, notindexed)
1625 1625 except OSError as err:
1626 1626 if err.errno == errno.EEXIST:
1627 1627 return
1628 1628 if err.errno != errno.ENOENT or not name:
1629 1629 raise
1630 1630 parent = os.path.dirname(os.path.abspath(name))
1631 1631 if parent == name:
1632 1632 raise
1633 1633 makedirs(parent, mode, notindexed)
1634 1634 try:
1635 1635 makedir(name, notindexed)
1636 1636 except OSError as err:
1637 1637 # Catch EEXIST to handle races
1638 1638 if err.errno == errno.EEXIST:
1639 1639 return
1640 1640 raise
1641 1641 if mode is not None:
1642 1642 os.chmod(name, mode)
1643 1643
1644 1644 def readfile(path):
1645 1645 with open(path, 'rb') as fp:
1646 1646 return fp.read()
1647 1647
1648 1648 def writefile(path, text):
1649 1649 with open(path, 'wb') as fp:
1650 1650 fp.write(text)
1651 1651
1652 1652 def appendfile(path, text):
1653 1653 with open(path, 'ab') as fp:
1654 1654 fp.write(text)
1655 1655
1656 1656 class chunkbuffer(object):
1657 1657 """Allow arbitrary sized chunks of data to be efficiently read from an
1658 1658 iterator over chunks of arbitrary size."""
1659 1659
1660 1660 def __init__(self, in_iter):
1661 1661 """in_iter is the iterator that's iterating over the input chunks.
1662 1662 targetsize is how big a buffer to try to maintain."""
1663 1663 def splitbig(chunks):
1664 1664 for chunk in chunks:
1665 1665 if len(chunk) > 2**20:
1666 1666 pos = 0
1667 1667 while pos < len(chunk):
1668 1668 end = pos + 2 ** 18
1669 1669 yield chunk[pos:end]
1670 1670 pos = end
1671 1671 else:
1672 1672 yield chunk
1673 1673 self.iter = splitbig(in_iter)
1674 1674 self._queue = collections.deque()
1675 1675 self._chunkoffset = 0
1676 1676
1677 1677 def read(self, l=None):
1678 1678 """Read L bytes of data from the iterator of chunks of data.
1679 1679 Returns less than L bytes if the iterator runs dry.
1680 1680
1681 1681 If size parameter is omitted, read everything"""
1682 1682 if l is None:
1683 1683 return ''.join(self.iter)
1684 1684
1685 1685 left = l
1686 1686 buf = []
1687 1687 queue = self._queue
1688 1688 while left > 0:
1689 1689 # refill the queue
1690 1690 if not queue:
1691 1691 target = 2**18
1692 1692 for chunk in self.iter:
1693 1693 queue.append(chunk)
1694 1694 target -= len(chunk)
1695 1695 if target <= 0:
1696 1696 break
1697 1697 if not queue:
1698 1698 break
1699 1699
1700 1700 # The easy way to do this would be to queue.popleft(), modify the
1701 1701 # chunk (if necessary), then queue.appendleft(). However, for cases
1702 1702 # where we read partial chunk content, this incurs 2 dequeue
1703 1703 # mutations and creates a new str for the remaining chunk in the
1704 1704 # queue. Our code below avoids this overhead.
1705 1705
1706 1706 chunk = queue[0]
1707 1707 chunkl = len(chunk)
1708 1708 offset = self._chunkoffset
1709 1709
1710 1710 # Use full chunk.
1711 1711 if offset == 0 and left >= chunkl:
1712 1712 left -= chunkl
1713 1713 queue.popleft()
1714 1714 buf.append(chunk)
1715 1715 # self._chunkoffset remains at 0.
1716 1716 continue
1717 1717
1718 1718 chunkremaining = chunkl - offset
1719 1719
1720 1720 # Use all of unconsumed part of chunk.
1721 1721 if left >= chunkremaining:
1722 1722 left -= chunkremaining
1723 1723 queue.popleft()
1724 1724 # offset == 0 is enabled by block above, so this won't merely
1725 1725 # copy via ``chunk[0:]``.
1726 1726 buf.append(chunk[offset:])
1727 1727 self._chunkoffset = 0
1728 1728
1729 1729 # Partial chunk needed.
1730 1730 else:
1731 1731 buf.append(chunk[offset:offset + left])
1732 1732 self._chunkoffset += left
1733 1733 left -= chunkremaining
1734 1734
1735 1735 return ''.join(buf)
1736 1736
1737 1737 def filechunkiter(f, size=131072, limit=None):
1738 1738 """Create a generator that produces the data in the file size
1739 1739 (default 131072) bytes at a time, up to optional limit (default is
1740 1740 to read all data). Chunks may be less than size bytes if the
1741 1741 chunk is the last chunk in the file, or the file is a socket or
1742 1742 some other type of file that sometimes reads less data than is
1743 1743 requested."""
1744 1744 assert size >= 0
1745 1745 assert limit is None or limit >= 0
1746 1746 while True:
1747 1747 if limit is None:
1748 1748 nbytes = size
1749 1749 else:
1750 1750 nbytes = min(limit, size)
1751 1751 s = nbytes and f.read(nbytes)
1752 1752 if not s:
1753 1753 break
1754 1754 if limit:
1755 1755 limit -= len(s)
1756 1756 yield s
1757 1757
1758 1758 def makedate(timestamp=None):
1759 1759 '''Return a unix timestamp (or the current time) as a (unixtime,
1760 1760 offset) tuple based off the local timezone.'''
1761 1761 if timestamp is None:
1762 1762 timestamp = time.time()
1763 1763 if timestamp < 0:
1764 1764 hint = _("check your clock")
1765 1765 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1766 1766 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1767 1767 datetime.datetime.fromtimestamp(timestamp))
1768 1768 tz = delta.days * 86400 + delta.seconds
1769 1769 return timestamp, tz
1770 1770
1771 1771 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1772 1772 """represent a (unixtime, offset) tuple as a localized time.
1773 1773 unixtime is seconds since the epoch, and offset is the time zone's
1774 1774 number of seconds away from UTC.
1775 1775
1776 1776 >>> datestr((0, 0))
1777 1777 'Thu Jan 01 00:00:00 1970 +0000'
1778 1778 >>> datestr((42, 0))
1779 1779 'Thu Jan 01 00:00:42 1970 +0000'
1780 1780 >>> datestr((-42, 0))
1781 1781 'Wed Dec 31 23:59:18 1969 +0000'
1782 1782 >>> datestr((0x7fffffff, 0))
1783 1783 'Tue Jan 19 03:14:07 2038 +0000'
1784 1784 >>> datestr((-0x80000000, 0))
1785 1785 'Fri Dec 13 20:45:52 1901 +0000'
1786 1786 """
1787 1787 t, tz = date or makedate()
1788 1788 if "%1" in format or "%2" in format or "%z" in format:
1789 1789 sign = (tz > 0) and "-" or "+"
1790 1790 minutes = abs(tz) // 60
1791 1791 q, r = divmod(minutes, 60)
1792 1792 format = format.replace("%z", "%1%2")
1793 1793 format = format.replace("%1", "%c%02d" % (sign, q))
1794 1794 format = format.replace("%2", "%02d" % r)
1795 1795 d = t - tz
1796 1796 if d > 0x7fffffff:
1797 1797 d = 0x7fffffff
1798 1798 elif d < -0x80000000:
1799 1799 d = -0x80000000
1800 1800 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1801 1801 # because they use the gmtime() system call which is buggy on Windows
1802 1802 # for negative values.
1803 1803 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1804 1804 s = t.strftime(format)
1805 1805 return s
1806 1806
1807 1807 def shortdate(date=None):
1808 1808 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1809 1809 return datestr(date, format='%Y-%m-%d')
1810 1810
1811 1811 def parsetimezone(s):
1812 1812 """find a trailing timezone, if any, in string, and return a
1813 1813 (offset, remainder) pair"""
1814 1814
1815 1815 if s.endswith("GMT") or s.endswith("UTC"):
1816 1816 return 0, s[:-3].rstrip()
1817 1817
1818 1818 # Unix-style timezones [+-]hhmm
1819 1819 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1820 1820 sign = (s[-5] == "+") and 1 or -1
1821 1821 hours = int(s[-4:-2])
1822 1822 minutes = int(s[-2:])
1823 1823 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1824 1824
1825 1825 # ISO8601 trailing Z
1826 1826 if s.endswith("Z") and s[-2:-1].isdigit():
1827 1827 return 0, s[:-1]
1828 1828
1829 1829 # ISO8601-style [+-]hh:mm
1830 1830 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1831 1831 s[-5:-3].isdigit() and s[-2:].isdigit()):
1832 1832 sign = (s[-6] == "+") and 1 or -1
1833 1833 hours = int(s[-5:-3])
1834 1834 minutes = int(s[-2:])
1835 1835 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1836 1836
1837 1837 return None, s
1838 1838
1839 1839 def strdate(string, format, defaults=[]):
1840 1840 """parse a localized time string and return a (unixtime, offset) tuple.
1841 1841 if the string cannot be parsed, ValueError is raised."""
1842 1842 # NOTE: unixtime = localunixtime + offset
1843 1843 offset, date = parsetimezone(string)
1844 1844
1845 1845 # add missing elements from defaults
1846 1846 usenow = False # default to using biased defaults
1847 1847 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1848 1848 found = [True for p in part if ("%"+p) in format]
1849 1849 if not found:
1850 1850 date += "@" + defaults[part][usenow]
1851 1851 format += "@%" + part[0]
1852 1852 else:
1853 1853 # We've found a specific time element, less specific time
1854 1854 # elements are relative to today
1855 1855 usenow = True
1856 1856
1857 1857 timetuple = time.strptime(date, format)
1858 1858 localunixtime = int(calendar.timegm(timetuple))
1859 1859 if offset is None:
1860 1860 # local timezone
1861 1861 unixtime = int(time.mktime(timetuple))
1862 1862 offset = unixtime - localunixtime
1863 1863 else:
1864 1864 unixtime = localunixtime + offset
1865 1865 return unixtime, offset
1866 1866
1867 1867 def parsedate(date, formats=None, bias=None):
1868 1868 """parse a localized date/time and return a (unixtime, offset) tuple.
1869 1869
1870 1870 The date may be a "unixtime offset" string or in one of the specified
1871 1871 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1872 1872
1873 1873 >>> parsedate(' today ') == parsedate(\
1874 1874 datetime.date.today().strftime('%b %d'))
1875 1875 True
1876 1876 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1877 1877 datetime.timedelta(days=1)\
1878 1878 ).strftime('%b %d'))
1879 1879 True
1880 1880 >>> now, tz = makedate()
1881 1881 >>> strnow, strtz = parsedate('now')
1882 1882 >>> (strnow - now) < 1
1883 1883 True
1884 1884 >>> tz == strtz
1885 1885 True
1886 1886 """
1887 1887 if bias is None:
1888 1888 bias = {}
1889 1889 if not date:
1890 1890 return 0, 0
1891 1891 if isinstance(date, tuple) and len(date) == 2:
1892 1892 return date
1893 1893 if not formats:
1894 1894 formats = defaultdateformats
1895 1895 date = date.strip()
1896 1896
1897 1897 if date == 'now' or date == _('now'):
1898 1898 return makedate()
1899 1899 if date == 'today' or date == _('today'):
1900 1900 date = datetime.date.today().strftime('%b %d')
1901 1901 elif date == 'yesterday' or date == _('yesterday'):
1902 1902 date = (datetime.date.today() -
1903 1903 datetime.timedelta(days=1)).strftime('%b %d')
1904 1904
1905 1905 try:
1906 1906 when, offset = map(int, date.split(' '))
1907 1907 except ValueError:
1908 1908 # fill out defaults
1909 1909 now = makedate()
1910 1910 defaults = {}
1911 1911 for part in ("d", "mb", "yY", "HI", "M", "S"):
1912 1912 # this piece is for rounding the specific end of unknowns
1913 1913 b = bias.get(part)
1914 1914 if b is None:
1915 1915 if part[0] in "HMS":
1916 1916 b = "00"
1917 1917 else:
1918 1918 b = "0"
1919 1919
1920 1920 # this piece is for matching the generic end to today's date
1921 1921 n = datestr(now, "%" + part[0])
1922 1922
1923 1923 defaults[part] = (b, n)
1924 1924
1925 1925 for format in formats:
1926 1926 try:
1927 1927 when, offset = strdate(date, format, defaults)
1928 1928 except (ValueError, OverflowError):
1929 1929 pass
1930 1930 else:
1931 1931 break
1932 1932 else:
1933 1933 raise Abort(_('invalid date: %r') % date)
1934 1934 # validate explicit (probably user-specified) date and
1935 1935 # time zone offset. values must fit in signed 32 bits for
1936 1936 # current 32-bit linux runtimes. timezones go from UTC-12
1937 1937 # to UTC+14
1938 1938 if when < -0x80000000 or when > 0x7fffffff:
1939 1939 raise Abort(_('date exceeds 32 bits: %d') % when)
1940 1940 if offset < -50400 or offset > 43200:
1941 1941 raise Abort(_('impossible time zone offset: %d') % offset)
1942 1942 return when, offset
1943 1943
1944 1944 def matchdate(date):
1945 1945 """Return a function that matches a given date match specifier
1946 1946
1947 1947 Formats include:
1948 1948
1949 1949 '{date}' match a given date to the accuracy provided
1950 1950
1951 1951 '<{date}' on or before a given date
1952 1952
1953 1953 '>{date}' on or after a given date
1954 1954
1955 1955 >>> p1 = parsedate("10:29:59")
1956 1956 >>> p2 = parsedate("10:30:00")
1957 1957 >>> p3 = parsedate("10:30:59")
1958 1958 >>> p4 = parsedate("10:31:00")
1959 1959 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1960 1960 >>> f = matchdate("10:30")
1961 1961 >>> f(p1[0])
1962 1962 False
1963 1963 >>> f(p2[0])
1964 1964 True
1965 1965 >>> f(p3[0])
1966 1966 True
1967 1967 >>> f(p4[0])
1968 1968 False
1969 1969 >>> f(p5[0])
1970 1970 False
1971 1971 """
1972 1972
1973 1973 def lower(date):
1974 1974 d = {'mb': "1", 'd': "1"}
1975 1975 return parsedate(date, extendeddateformats, d)[0]
1976 1976
1977 1977 def upper(date):
1978 1978 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1979 1979 for days in ("31", "30", "29"):
1980 1980 try:
1981 1981 d["d"] = days
1982 1982 return parsedate(date, extendeddateformats, d)[0]
1983 1983 except Abort:
1984 1984 pass
1985 1985 d["d"] = "28"
1986 1986 return parsedate(date, extendeddateformats, d)[0]
1987 1987
1988 1988 date = date.strip()
1989 1989
1990 1990 if not date:
1991 1991 raise Abort(_("dates cannot consist entirely of whitespace"))
1992 1992 elif date[0] == "<":
1993 1993 if not date[1:]:
1994 1994 raise Abort(_("invalid day spec, use '<DATE'"))
1995 1995 when = upper(date[1:])
1996 1996 return lambda x: x <= when
1997 1997 elif date[0] == ">":
1998 1998 if not date[1:]:
1999 1999 raise Abort(_("invalid day spec, use '>DATE'"))
2000 2000 when = lower(date[1:])
2001 2001 return lambda x: x >= when
2002 2002 elif date[0] == "-":
2003 2003 try:
2004 2004 days = int(date[1:])
2005 2005 except ValueError:
2006 2006 raise Abort(_("invalid day spec: %s") % date[1:])
2007 2007 if days < 0:
2008 2008 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2009 2009 % date[1:])
2010 2010 when = makedate()[0] - days * 3600 * 24
2011 2011 return lambda x: x >= when
2012 2012 elif " to " in date:
2013 2013 a, b = date.split(" to ")
2014 2014 start, stop = lower(a), upper(b)
2015 2015 return lambda x: x >= start and x <= stop
2016 2016 else:
2017 2017 start, stop = lower(date), upper(date)
2018 2018 return lambda x: x >= start and x <= stop
2019 2019
2020 2020 def stringmatcher(pattern, casesensitive=True):
2021 2021 """
2022 2022 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2023 2023 returns the matcher name, pattern, and matcher function.
2024 2024 missing or unknown prefixes are treated as literal matches.
2025 2025
2026 2026 helper for tests:
2027 2027 >>> def test(pattern, *tests):
2028 2028 ... kind, pattern, matcher = stringmatcher(pattern)
2029 2029 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2030 2030 >>> def itest(pattern, *tests):
2031 2031 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2032 2032 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2033 2033
2034 2034 exact matching (no prefix):
2035 2035 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2036 2036 ('literal', 'abcdefg', [False, False, True])
2037 2037
2038 2038 regex matching ('re:' prefix)
2039 2039 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2040 2040 ('re', 'a.+b', [False, False, True])
2041 2041
2042 2042 force exact matches ('literal:' prefix)
2043 2043 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2044 2044 ('literal', 're:foobar', [False, True])
2045 2045
2046 2046 unknown prefixes are ignored and treated as literals
2047 2047 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2048 2048 ('literal', 'foo:bar', [False, False, True])
2049 2049
2050 2050 case insensitive regex matches
2051 2051 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2052 2052 ('re', 'A.+b', [False, False, True])
2053 2053
2054 2054 case insensitive literal matches
2055 2055 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2056 2056 ('literal', 'ABCDEFG', [False, False, True])
2057 2057 """
2058 2058 if pattern.startswith('re:'):
2059 2059 pattern = pattern[3:]
2060 2060 try:
2061 2061 flags = 0
2062 2062 if not casesensitive:
2063 2063 flags = remod.I
2064 2064 regex = remod.compile(pattern, flags)
2065 2065 except remod.error as e:
2066 2066 raise error.ParseError(_('invalid regular expression: %s')
2067 2067 % e)
2068 2068 return 're', pattern, regex.search
2069 2069 elif pattern.startswith('literal:'):
2070 2070 pattern = pattern[8:]
2071 2071
2072 2072 match = pattern.__eq__
2073 2073
2074 2074 if not casesensitive:
2075 2075 ipat = encoding.lower(pattern)
2076 2076 match = lambda s: ipat == encoding.lower(s)
2077 2077 return 'literal', pattern, match
2078 2078
2079 2079 def shortuser(user):
2080 2080 """Return a short representation of a user name or email address."""
2081 2081 f = user.find('@')
2082 2082 if f >= 0:
2083 2083 user = user[:f]
2084 2084 f = user.find('<')
2085 2085 if f >= 0:
2086 2086 user = user[f + 1:]
2087 2087 f = user.find(' ')
2088 2088 if f >= 0:
2089 2089 user = user[:f]
2090 2090 f = user.find('.')
2091 2091 if f >= 0:
2092 2092 user = user[:f]
2093 2093 return user
2094 2094
2095 2095 def emailuser(user):
2096 2096 """Return the user portion of an email address."""
2097 2097 f = user.find('@')
2098 2098 if f >= 0:
2099 2099 user = user[:f]
2100 2100 f = user.find('<')
2101 2101 if f >= 0:
2102 2102 user = user[f + 1:]
2103 2103 return user
2104 2104
2105 2105 def email(author):
2106 2106 '''get email of author.'''
2107 2107 r = author.find('>')
2108 2108 if r == -1:
2109 2109 r = None
2110 2110 return author[author.find('<') + 1:r]
2111 2111
2112 2112 def ellipsis(text, maxlength=400):
2113 2113 """Trim string to at most maxlength (default: 400) columns in display."""
2114 2114 return encoding.trim(text, maxlength, ellipsis='...')
2115 2115
2116 2116 def unitcountfn(*unittable):
2117 2117 '''return a function that renders a readable count of some quantity'''
2118 2118
2119 2119 def go(count):
2120 2120 for multiplier, divisor, format in unittable:
2121 2121 if count >= divisor * multiplier:
2122 2122 return format % (count / float(divisor))
2123 2123 return unittable[-1][2] % count
2124 2124
2125 2125 return go
2126 2126
2127 2127 bytecount = unitcountfn(
2128 2128 (100, 1 << 30, _('%.0f GB')),
2129 2129 (10, 1 << 30, _('%.1f GB')),
2130 2130 (1, 1 << 30, _('%.2f GB')),
2131 2131 (100, 1 << 20, _('%.0f MB')),
2132 2132 (10, 1 << 20, _('%.1f MB')),
2133 2133 (1, 1 << 20, _('%.2f MB')),
2134 2134 (100, 1 << 10, _('%.0f KB')),
2135 2135 (10, 1 << 10, _('%.1f KB')),
2136 2136 (1, 1 << 10, _('%.2f KB')),
2137 2137 (1, 1, _('%.0f bytes')),
2138 2138 )
2139 2139
2140 2140 def uirepr(s):
2141 2141 # Avoid double backslash in Windows path repr()
2142 2142 return repr(s).replace('\\\\', '\\')
2143 2143
2144 2144 # delay import of textwrap
2145 2145 def MBTextWrapper(**kwargs):
2146 2146 class tw(textwrap.TextWrapper):
2147 2147 """
2148 2148 Extend TextWrapper for width-awareness.
2149 2149
2150 2150 Neither number of 'bytes' in any encoding nor 'characters' is
2151 2151 appropriate to calculate terminal columns for specified string.
2152 2152
2153 2153 Original TextWrapper implementation uses built-in 'len()' directly,
2154 2154 so overriding is needed to use width information of each characters.
2155 2155
2156 2156 In addition, characters classified into 'ambiguous' width are
2157 2157 treated as wide in East Asian area, but as narrow in other.
2158 2158
2159 2159 This requires use decision to determine width of such characters.
2160 2160 """
2161 2161 def _cutdown(self, ucstr, space_left):
2162 2162 l = 0
2163 2163 colwidth = encoding.ucolwidth
2164 2164 for i in xrange(len(ucstr)):
2165 2165 l += colwidth(ucstr[i])
2166 2166 if space_left < l:
2167 2167 return (ucstr[:i], ucstr[i:])
2168 2168 return ucstr, ''
2169 2169
2170 2170 # overriding of base class
2171 2171 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2172 2172 space_left = max(width - cur_len, 1)
2173 2173
2174 2174 if self.break_long_words:
2175 2175 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2176 2176 cur_line.append(cut)
2177 2177 reversed_chunks[-1] = res
2178 2178 elif not cur_line:
2179 2179 cur_line.append(reversed_chunks.pop())
2180 2180
2181 2181 # this overriding code is imported from TextWrapper of Python 2.6
2182 2182 # to calculate columns of string by 'encoding.ucolwidth()'
2183 2183 def _wrap_chunks(self, chunks):
2184 2184 colwidth = encoding.ucolwidth
2185 2185
2186 2186 lines = []
2187 2187 if self.width <= 0:
2188 2188 raise ValueError("invalid width %r (must be > 0)" % self.width)
2189 2189
2190 2190 # Arrange in reverse order so items can be efficiently popped
2191 2191 # from a stack of chucks.
2192 2192 chunks.reverse()
2193 2193
2194 2194 while chunks:
2195 2195
2196 2196 # Start the list of chunks that will make up the current line.
2197 2197 # cur_len is just the length of all the chunks in cur_line.
2198 2198 cur_line = []
2199 2199 cur_len = 0
2200 2200
2201 2201 # Figure out which static string will prefix this line.
2202 2202 if lines:
2203 2203 indent = self.subsequent_indent
2204 2204 else:
2205 2205 indent = self.initial_indent
2206 2206
2207 2207 # Maximum width for this line.
2208 2208 width = self.width - len(indent)
2209 2209
2210 2210 # First chunk on line is whitespace -- drop it, unless this
2211 2211 # is the very beginning of the text (i.e. no lines started yet).
2212 2212 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2213 2213 del chunks[-1]
2214 2214
2215 2215 while chunks:
2216 2216 l = colwidth(chunks[-1])
2217 2217
2218 2218 # Can at least squeeze this chunk onto the current line.
2219 2219 if cur_len + l <= width:
2220 2220 cur_line.append(chunks.pop())
2221 2221 cur_len += l
2222 2222
2223 2223 # Nope, this line is full.
2224 2224 else:
2225 2225 break
2226 2226
2227 2227 # The current line is full, and the next chunk is too big to
2228 2228 # fit on *any* line (not just this one).
2229 2229 if chunks and colwidth(chunks[-1]) > width:
2230 2230 self._handle_long_word(chunks, cur_line, cur_len, width)
2231 2231
2232 2232 # If the last chunk on this line is all whitespace, drop it.
2233 2233 if (self.drop_whitespace and
2234 2234 cur_line and cur_line[-1].strip() == ''):
2235 2235 del cur_line[-1]
2236 2236
2237 2237 # Convert current line back to a string and store it in list
2238 2238 # of all lines (return value).
2239 2239 if cur_line:
2240 2240 lines.append(indent + ''.join(cur_line))
2241 2241
2242 2242 return lines
2243 2243
2244 2244 global MBTextWrapper
2245 2245 MBTextWrapper = tw
2246 2246 return tw(**kwargs)
2247 2247
2248 2248 def wrap(line, width, initindent='', hangindent=''):
2249 2249 maxindent = max(len(hangindent), len(initindent))
2250 2250 if width <= maxindent:
2251 2251 # adjust for weird terminal size
2252 2252 width = max(78, maxindent + 1)
2253 2253 line = line.decode(encoding.encoding, encoding.encodingmode)
2254 2254 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2255 2255 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2256 2256 wrapper = MBTextWrapper(width=width,
2257 2257 initial_indent=initindent,
2258 2258 subsequent_indent=hangindent)
2259 2259 return wrapper.fill(line).encode(encoding.encoding)
2260 2260
2261 2261 if (pyplatform.python_implementation() == 'CPython' and
2262 2262 sys.version_info < (3, 0)):
2263 2263 # There is an issue in CPython that some IO methods do not handle EINTR
2264 2264 # correctly. The following table shows what CPython version (and functions)
2265 2265 # are affected (buggy: has the EINTR bug, okay: otherwise):
2266 2266 #
2267 2267 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2268 2268 # --------------------------------------------------
2269 2269 # fp.__iter__ | buggy | buggy | okay
2270 2270 # fp.read* | buggy | okay [1] | okay
2271 2271 #
2272 2272 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2273 2273 #
2274 2274 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2275 2275 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2276 2276 #
2277 2277 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2278 2278 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2279 2279 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2280 2280 # fp.__iter__ but not other fp.read* methods.
2281 2281 #
2282 2282 # On modern systems like Linux, the "read" syscall cannot be interrupted
2283 2283 # when reading "fast" files like on-disk files. So the EINTR issue only
2284 2284 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2285 2285 # files approximately as "fast" files and use the fast (unsafe) code path,
2286 2286 # to minimize the performance impact.
2287 2287 if sys.version_info >= (2, 7, 4):
2288 2288 # fp.readline deals with EINTR correctly, use it as a workaround.
2289 2289 def _safeiterfile(fp):
2290 2290 return iter(fp.readline, '')
2291 2291 else:
2292 2292 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2293 2293 # note: this may block longer than necessary because of bufsize.
2294 2294 def _safeiterfile(fp, bufsize=4096):
2295 2295 fd = fp.fileno()
2296 2296 line = ''
2297 2297 while True:
2298 2298 try:
2299 2299 buf = os.read(fd, bufsize)
2300 2300 except OSError as ex:
2301 2301 # os.read only raises EINTR before any data is read
2302 2302 if ex.errno == errno.EINTR:
2303 2303 continue
2304 2304 else:
2305 2305 raise
2306 2306 line += buf
2307 2307 if '\n' in buf:
2308 2308 splitted = line.splitlines(True)
2309 2309 line = ''
2310 2310 for l in splitted:
2311 2311 if l[-1] == '\n':
2312 2312 yield l
2313 2313 else:
2314 2314 line = l
2315 2315 if not buf:
2316 2316 break
2317 2317 if line:
2318 2318 yield line
2319 2319
2320 2320 def iterfile(fp):
2321 2321 fastpath = True
2322 2322 if type(fp) is file:
2323 2323 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2324 2324 if fastpath:
2325 2325 return fp
2326 2326 else:
2327 2327 return _safeiterfile(fp)
2328 2328 else:
2329 2329 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2330 2330 def iterfile(fp):
2331 2331 return fp
2332 2332
2333 2333 def iterlines(iterator):
2334 2334 for chunk in iterator:
2335 2335 for line in chunk.splitlines():
2336 2336 yield line
2337 2337
2338 2338 def expandpath(path):
2339 2339 return os.path.expanduser(os.path.expandvars(path))
2340 2340
2341 2341 def hgcmd():
2342 2342 """Return the command used to execute current hg
2343 2343
2344 2344 This is different from hgexecutable() because on Windows we want
2345 2345 to avoid things opening new shell windows like batch files, so we
2346 2346 get either the python call or current executable.
2347 2347 """
2348 2348 if mainfrozen():
2349 2349 if getattr(sys, 'frozen', None) == 'macosx_app':
2350 2350 # Env variable set by py2app
2351 2351 return [encoding.environ['EXECUTABLEPATH']]
2352 2352 else:
2353 2353 return [pycompat.sysexecutable]
2354 2354 return gethgcmd()
2355 2355
2356 2356 def rundetached(args, condfn):
2357 2357 """Execute the argument list in a detached process.
2358 2358
2359 2359 condfn is a callable which is called repeatedly and should return
2360 2360 True once the child process is known to have started successfully.
2361 2361 At this point, the child process PID is returned. If the child
2362 2362 process fails to start or finishes before condfn() evaluates to
2363 2363 True, return -1.
2364 2364 """
2365 2365 # Windows case is easier because the child process is either
2366 2366 # successfully starting and validating the condition or exiting
2367 2367 # on failure. We just poll on its PID. On Unix, if the child
2368 2368 # process fails to start, it will be left in a zombie state until
2369 2369 # the parent wait on it, which we cannot do since we expect a long
2370 2370 # running process on success. Instead we listen for SIGCHLD telling
2371 2371 # us our child process terminated.
2372 2372 terminated = set()
2373 2373 def handler(signum, frame):
2374 2374 terminated.add(os.wait())
2375 2375 prevhandler = None
2376 2376 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2377 2377 if SIGCHLD is not None:
2378 2378 prevhandler = signal.signal(SIGCHLD, handler)
2379 2379 try:
2380 2380 pid = spawndetached(args)
2381 2381 while not condfn():
2382 2382 if ((pid in terminated or not testpid(pid))
2383 2383 and not condfn()):
2384 2384 return -1
2385 2385 time.sleep(0.1)
2386 2386 return pid
2387 2387 finally:
2388 2388 if prevhandler is not None:
2389 2389 signal.signal(signal.SIGCHLD, prevhandler)
2390 2390
2391 2391 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2392 2392 """Return the result of interpolating items in the mapping into string s.
2393 2393
2394 2394 prefix is a single character string, or a two character string with
2395 2395 a backslash as the first character if the prefix needs to be escaped in
2396 2396 a regular expression.
2397 2397
2398 2398 fn is an optional function that will be applied to the replacement text
2399 2399 just before replacement.
2400 2400
2401 2401 escape_prefix is an optional flag that allows using doubled prefix for
2402 2402 its escaping.
2403 2403 """
2404 2404 fn = fn or (lambda s: s)
2405 2405 patterns = '|'.join(mapping.keys())
2406 2406 if escape_prefix:
2407 2407 patterns += '|' + prefix
2408 2408 if len(prefix) > 1:
2409 2409 prefix_char = prefix[1:]
2410 2410 else:
2411 2411 prefix_char = prefix
2412 2412 mapping[prefix_char] = prefix_char
2413 2413 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2414 2414 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2415 2415
2416 2416 def getport(port):
2417 2417 """Return the port for a given network service.
2418 2418
2419 2419 If port is an integer, it's returned as is. If it's a string, it's
2420 2420 looked up using socket.getservbyname(). If there's no matching
2421 2421 service, error.Abort is raised.
2422 2422 """
2423 2423 try:
2424 2424 return int(port)
2425 2425 except ValueError:
2426 2426 pass
2427 2427
2428 2428 try:
2429 2429 return socket.getservbyname(port)
2430 2430 except socket.error:
2431 2431 raise Abort(_("no port number associated with service '%s'") % port)
2432 2432
2433 2433 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2434 2434 '0': False, 'no': False, 'false': False, 'off': False,
2435 2435 'never': False}
2436 2436
2437 2437 def parsebool(s):
2438 2438 """Parse s into a boolean.
2439 2439
2440 2440 If s is not a valid boolean, returns None.
2441 2441 """
2442 2442 return _booleans.get(s.lower(), None)
2443 2443
2444 2444 _hextochr = dict((a + b, chr(int(a + b, 16)))
2445 2445 for a in string.hexdigits for b in string.hexdigits)
2446 2446
2447 2447 class url(object):
2448 2448 r"""Reliable URL parser.
2449 2449
2450 2450 This parses URLs and provides attributes for the following
2451 2451 components:
2452 2452
2453 2453 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2454 2454
2455 2455 Missing components are set to None. The only exception is
2456 2456 fragment, which is set to '' if present but empty.
2457 2457
2458 2458 If parsefragment is False, fragment is included in query. If
2459 2459 parsequery is False, query is included in path. If both are
2460 2460 False, both fragment and query are included in path.
2461 2461
2462 2462 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2463 2463
2464 2464 Note that for backward compatibility reasons, bundle URLs do not
2465 2465 take host names. That means 'bundle://../' has a path of '../'.
2466 2466
2467 2467 Examples:
2468 2468
2469 2469 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2470 2470 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2471 2471 >>> url('ssh://[::1]:2200//home/joe/repo')
2472 2472 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2473 2473 >>> url('file:///home/joe/repo')
2474 2474 <url scheme: 'file', path: '/home/joe/repo'>
2475 2475 >>> url('file:///c:/temp/foo/')
2476 2476 <url scheme: 'file', path: 'c:/temp/foo/'>
2477 2477 >>> url('bundle:foo')
2478 2478 <url scheme: 'bundle', path: 'foo'>
2479 2479 >>> url('bundle://../foo')
2480 2480 <url scheme: 'bundle', path: '../foo'>
2481 2481 >>> url(r'c:\foo\bar')
2482 2482 <url path: 'c:\\foo\\bar'>
2483 2483 >>> url(r'\\blah\blah\blah')
2484 2484 <url path: '\\\\blah\\blah\\blah'>
2485 2485 >>> url(r'\\blah\blah\blah#baz')
2486 2486 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2487 2487 >>> url(r'file:///C:\users\me')
2488 2488 <url scheme: 'file', path: 'C:\\users\\me'>
2489 2489
2490 2490 Authentication credentials:
2491 2491
2492 2492 >>> url('ssh://joe:xyz@x/repo')
2493 2493 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2494 2494 >>> url('ssh://joe@x/repo')
2495 2495 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2496 2496
2497 2497 Query strings and fragments:
2498 2498
2499 2499 >>> url('http://host/a?b#c')
2500 2500 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2501 2501 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2502 2502 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2503 2503
2504 2504 Empty path:
2505 2505
2506 2506 >>> url('')
2507 2507 <url path: ''>
2508 2508 >>> url('#a')
2509 2509 <url path: '', fragment: 'a'>
2510 2510 >>> url('http://host/')
2511 2511 <url scheme: 'http', host: 'host', path: ''>
2512 2512 >>> url('http://host/#a')
2513 2513 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2514 2514
2515 2515 Only scheme:
2516 2516
2517 2517 >>> url('http:')
2518 2518 <url scheme: 'http'>
2519 2519 """
2520 2520
2521 2521 _safechars = "!~*'()+"
2522 2522 _safepchars = "/!~*'()+:\\"
2523 2523 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2524 2524
2525 2525 def __init__(self, path, parsequery=True, parsefragment=True):
2526 2526 # We slowly chomp away at path until we have only the path left
2527 2527 self.scheme = self.user = self.passwd = self.host = None
2528 2528 self.port = self.path = self.query = self.fragment = None
2529 2529 self._localpath = True
2530 2530 self._hostport = ''
2531 2531 self._origpath = path
2532 2532
2533 2533 if parsefragment and '#' in path:
2534 2534 path, self.fragment = path.split('#', 1)
2535 2535
2536 2536 # special case for Windows drive letters and UNC paths
2537 2537 if hasdriveletter(path) or path.startswith('\\\\'):
2538 2538 self.path = path
2539 2539 return
2540 2540
2541 2541 # For compatibility reasons, we can't handle bundle paths as
2542 2542 # normal URLS
2543 2543 if path.startswith('bundle:'):
2544 2544 self.scheme = 'bundle'
2545 2545 path = path[7:]
2546 2546 if path.startswith('//'):
2547 2547 path = path[2:]
2548 2548 self.path = path
2549 2549 return
2550 2550
2551 2551 if self._matchscheme(path):
2552 2552 parts = path.split(':', 1)
2553 2553 if parts[0]:
2554 2554 self.scheme, path = parts
2555 2555 self._localpath = False
2556 2556
2557 2557 if not path:
2558 2558 path = None
2559 2559 if self._localpath:
2560 2560 self.path = ''
2561 2561 return
2562 2562 else:
2563 2563 if self._localpath:
2564 2564 self.path = path
2565 2565 return
2566 2566
2567 2567 if parsequery and '?' in path:
2568 2568 path, self.query = path.split('?', 1)
2569 2569 if not path:
2570 2570 path = None
2571 2571 if not self.query:
2572 2572 self.query = None
2573 2573
2574 2574 # // is required to specify a host/authority
2575 2575 if path and path.startswith('//'):
2576 2576 parts = path[2:].split('/', 1)
2577 2577 if len(parts) > 1:
2578 2578 self.host, path = parts
2579 2579 else:
2580 2580 self.host = parts[0]
2581 2581 path = None
2582 2582 if not self.host:
2583 2583 self.host = None
2584 2584 # path of file:///d is /d
2585 2585 # path of file:///d:/ is d:/, not /d:/
2586 2586 if path and not hasdriveletter(path):
2587 2587 path = '/' + path
2588 2588
2589 2589 if self.host and '@' in self.host:
2590 2590 self.user, self.host = self.host.rsplit('@', 1)
2591 2591 if ':' in self.user:
2592 2592 self.user, self.passwd = self.user.split(':', 1)
2593 2593 if not self.host:
2594 2594 self.host = None
2595 2595
2596 2596 # Don't split on colons in IPv6 addresses without ports
2597 2597 if (self.host and ':' in self.host and
2598 2598 not (self.host.startswith('[') and self.host.endswith(']'))):
2599 2599 self._hostport = self.host
2600 2600 self.host, self.port = self.host.rsplit(':', 1)
2601 2601 if not self.host:
2602 2602 self.host = None
2603 2603
2604 2604 if (self.host and self.scheme == 'file' and
2605 2605 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2606 2606 raise Abort(_('file:// URLs can only refer to localhost'))
2607 2607
2608 2608 self.path = path
2609 2609
2610 2610 # leave the query string escaped
2611 2611 for a in ('user', 'passwd', 'host', 'port',
2612 2612 'path', 'fragment'):
2613 2613 v = getattr(self, a)
2614 2614 if v is not None:
2615 2615 setattr(self, a, pycompat.urlunquote(v))
2616 2616
2617 2617 def __repr__(self):
2618 2618 attrs = []
2619 2619 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2620 2620 'query', 'fragment'):
2621 2621 v = getattr(self, a)
2622 2622 if v is not None:
2623 2623 attrs.append('%s: %r' % (a, v))
2624 2624 return '<url %s>' % ', '.join(attrs)
2625 2625
2626 2626 def __str__(self):
2627 2627 r"""Join the URL's components back into a URL string.
2628 2628
2629 2629 Examples:
2630 2630
2631 2631 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2632 2632 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2633 2633 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2634 2634 'http://user:pw@host:80/?foo=bar&baz=42'
2635 2635 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2636 2636 'http://user:pw@host:80/?foo=bar%3dbaz'
2637 2637 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2638 2638 'ssh://user:pw@[::1]:2200//home/joe#'
2639 2639 >>> str(url('http://localhost:80//'))
2640 2640 'http://localhost:80//'
2641 2641 >>> str(url('http://localhost:80/'))
2642 2642 'http://localhost:80/'
2643 2643 >>> str(url('http://localhost:80'))
2644 2644 'http://localhost:80/'
2645 2645 >>> str(url('bundle:foo'))
2646 2646 'bundle:foo'
2647 2647 >>> str(url('bundle://../foo'))
2648 2648 'bundle:../foo'
2649 2649 >>> str(url('path'))
2650 2650 'path'
2651 2651 >>> str(url('file:///tmp/foo/bar'))
2652 2652 'file:///tmp/foo/bar'
2653 2653 >>> str(url('file:///c:/tmp/foo/bar'))
2654 2654 'file:///c:/tmp/foo/bar'
2655 2655 >>> print url(r'bundle:foo\bar')
2656 2656 bundle:foo\bar
2657 2657 >>> print url(r'file:///D:\data\hg')
2658 2658 file:///D:\data\hg
2659 2659 """
2660 2660 if self._localpath:
2661 2661 s = self.path
2662 2662 if self.scheme == 'bundle':
2663 2663 s = 'bundle:' + s
2664 2664 if self.fragment:
2665 2665 s += '#' + self.fragment
2666 2666 return s
2667 2667
2668 2668 s = self.scheme + ':'
2669 2669 if self.user or self.passwd or self.host:
2670 2670 s += '//'
2671 2671 elif self.scheme and (not self.path or self.path.startswith('/')
2672 2672 or hasdriveletter(self.path)):
2673 2673 s += '//'
2674 2674 if hasdriveletter(self.path):
2675 2675 s += '/'
2676 2676 if self.user:
2677 2677 s += urlreq.quote(self.user, safe=self._safechars)
2678 2678 if self.passwd:
2679 2679 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2680 2680 if self.user or self.passwd:
2681 2681 s += '@'
2682 2682 if self.host:
2683 2683 if not (self.host.startswith('[') and self.host.endswith(']')):
2684 2684 s += urlreq.quote(self.host)
2685 2685 else:
2686 2686 s += self.host
2687 2687 if self.port:
2688 2688 s += ':' + urlreq.quote(self.port)
2689 2689 if self.host:
2690 2690 s += '/'
2691 2691 if self.path:
2692 2692 # TODO: similar to the query string, we should not unescape the
2693 2693 # path when we store it, the path might contain '%2f' = '/',
2694 2694 # which we should *not* escape.
2695 2695 s += urlreq.quote(self.path, safe=self._safepchars)
2696 2696 if self.query:
2697 2697 # we store the query in escaped form.
2698 2698 s += '?' + self.query
2699 2699 if self.fragment is not None:
2700 2700 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2701 2701 return s
2702 2702
2703 2703 def authinfo(self):
2704 2704 user, passwd = self.user, self.passwd
2705 2705 try:
2706 2706 self.user, self.passwd = None, None
2707 2707 s = str(self)
2708 2708 finally:
2709 2709 self.user, self.passwd = user, passwd
2710 2710 if not self.user:
2711 2711 return (s, None)
2712 2712 # authinfo[1] is passed to urllib2 password manager, and its
2713 2713 # URIs must not contain credentials. The host is passed in the
2714 2714 # URIs list because Python < 2.4.3 uses only that to search for
2715 2715 # a password.
2716 2716 return (s, (None, (s, self.host),
2717 2717 self.user, self.passwd or ''))
2718 2718
2719 2719 def isabs(self):
2720 2720 if self.scheme and self.scheme != 'file':
2721 2721 return True # remote URL
2722 2722 if hasdriveletter(self.path):
2723 2723 return True # absolute for our purposes - can't be joined()
2724 2724 if self.path.startswith(r'\\'):
2725 2725 return True # Windows UNC path
2726 2726 if self.path.startswith('/'):
2727 2727 return True # POSIX-style
2728 2728 return False
2729 2729
2730 2730 def localpath(self):
2731 2731 if self.scheme == 'file' or self.scheme == 'bundle':
2732 2732 path = self.path or '/'
2733 2733 # For Windows, we need to promote hosts containing drive
2734 2734 # letters to paths with drive letters.
2735 2735 if hasdriveletter(self._hostport):
2736 2736 path = self._hostport + '/' + self.path
2737 2737 elif (self.host is not None and self.path
2738 2738 and not hasdriveletter(path)):
2739 2739 path = '/' + path
2740 2740 return path
2741 2741 return self._origpath
2742 2742
2743 2743 def islocal(self):
2744 2744 '''whether localpath will return something that posixfile can open'''
2745 2745 return (not self.scheme or self.scheme == 'file'
2746 2746 or self.scheme == 'bundle')
2747 2747
2748 2748 def hasscheme(path):
2749 2749 return bool(url(path).scheme)
2750 2750
2751 2751 def hasdriveletter(path):
2752 2752 return path and path[1:2] == ':' and path[0:1].isalpha()
2753 2753
2754 2754 def urllocalpath(path):
2755 2755 return url(path, parsequery=False, parsefragment=False).localpath()
2756 2756
2757 2757 def hidepassword(u):
2758 2758 '''hide user credential in a url string'''
2759 2759 u = url(u)
2760 2760 if u.passwd:
2761 2761 u.passwd = '***'
2762 2762 return str(u)
2763 2763
2764 2764 def removeauth(u):
2765 2765 '''remove all authentication information from a url string'''
2766 2766 u = url(u)
2767 2767 u.user = u.passwd = None
2768 2768 return str(u)
2769 2769
2770 2770 timecount = unitcountfn(
2771 2771 (1, 1e3, _('%.0f s')),
2772 2772 (100, 1, _('%.1f s')),
2773 2773 (10, 1, _('%.2f s')),
2774 2774 (1, 1, _('%.3f s')),
2775 2775 (100, 0.001, _('%.1f ms')),
2776 2776 (10, 0.001, _('%.2f ms')),
2777 2777 (1, 0.001, _('%.3f ms')),
2778 2778 (100, 0.000001, _('%.1f us')),
2779 2779 (10, 0.000001, _('%.2f us')),
2780 2780 (1, 0.000001, _('%.3f us')),
2781 2781 (100, 0.000000001, _('%.1f ns')),
2782 2782 (10, 0.000000001, _('%.2f ns')),
2783 2783 (1, 0.000000001, _('%.3f ns')),
2784 2784 )
2785 2785
2786 2786 _timenesting = [0]
2787 2787
2788 2788 def timed(func):
2789 2789 '''Report the execution time of a function call to stderr.
2790 2790
2791 2791 During development, use as a decorator when you need to measure
2792 2792 the cost of a function, e.g. as follows:
2793 2793
2794 2794 @util.timed
2795 2795 def foo(a, b, c):
2796 2796 pass
2797 2797 '''
2798 2798
2799 2799 def wrapper(*args, **kwargs):
2800 start = time.time()
2800 start = timer()
2801 2801 indent = 2
2802 2802 _timenesting[0] += indent
2803 2803 try:
2804 2804 return func(*args, **kwargs)
2805 2805 finally:
2806 elapsed = time.time() - start
2806 elapsed = timer() - start
2807 2807 _timenesting[0] -= indent
2808 2808 stderr.write('%s%s: %s\n' %
2809 2809 (' ' * _timenesting[0], func.__name__,
2810 2810 timecount(elapsed)))
2811 2811 return wrapper
2812 2812
2813 2813 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2814 2814 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2815 2815
2816 2816 def sizetoint(s):
2817 2817 '''Convert a space specifier to a byte count.
2818 2818
2819 2819 >>> sizetoint('30')
2820 2820 30
2821 2821 >>> sizetoint('2.2kb')
2822 2822 2252
2823 2823 >>> sizetoint('6M')
2824 2824 6291456
2825 2825 '''
2826 2826 t = s.strip().lower()
2827 2827 try:
2828 2828 for k, u in _sizeunits:
2829 2829 if t.endswith(k):
2830 2830 return int(float(t[:-len(k)]) * u)
2831 2831 return int(t)
2832 2832 except ValueError:
2833 2833 raise error.ParseError(_("couldn't parse size: %s") % s)
2834 2834
2835 2835 class hooks(object):
2836 2836 '''A collection of hook functions that can be used to extend a
2837 2837 function's behavior. Hooks are called in lexicographic order,
2838 2838 based on the names of their sources.'''
2839 2839
2840 2840 def __init__(self):
2841 2841 self._hooks = []
2842 2842
2843 2843 def add(self, source, hook):
2844 2844 self._hooks.append((source, hook))
2845 2845
2846 2846 def __call__(self, *args):
2847 2847 self._hooks.sort(key=lambda x: x[0])
2848 2848 results = []
2849 2849 for source, hook in self._hooks:
2850 2850 results.append(hook(*args))
2851 2851 return results
2852 2852
2853 2853 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2854 2854 '''Yields lines for a nicely formatted stacktrace.
2855 2855 Skips the 'skip' last entries.
2856 2856 Each file+linenumber is formatted according to fileline.
2857 2857 Each line is formatted according to line.
2858 2858 If line is None, it yields:
2859 2859 length of longest filepath+line number,
2860 2860 filepath+linenumber,
2861 2861 function
2862 2862
2863 2863 Not be used in production code but very convenient while developing.
2864 2864 '''
2865 2865 entries = [(fileline % (fn, ln), func)
2866 2866 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2867 2867 if entries:
2868 2868 fnmax = max(len(entry[0]) for entry in entries)
2869 2869 for fnln, func in entries:
2870 2870 if line is None:
2871 2871 yield (fnmax, fnln, func)
2872 2872 else:
2873 2873 yield line % (fnmax, fnln, func)
2874 2874
2875 2875 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2876 2876 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2877 2877 Skips the 'skip' last entries. By default it will flush stdout first.
2878 2878 It can be used everywhere and intentionally does not require an ui object.
2879 2879 Not be used in production code but very convenient while developing.
2880 2880 '''
2881 2881 if otherf:
2882 2882 otherf.flush()
2883 2883 f.write('%s at:\n' % msg)
2884 2884 for line in getstackframes(skip + 1):
2885 2885 f.write(line)
2886 2886 f.flush()
2887 2887
2888 2888 class dirs(object):
2889 2889 '''a multiset of directory names from a dirstate or manifest'''
2890 2890
2891 2891 def __init__(self, map, skip=None):
2892 2892 self._dirs = {}
2893 2893 addpath = self.addpath
2894 2894 if safehasattr(map, 'iteritems') and skip is not None:
2895 2895 for f, s in map.iteritems():
2896 2896 if s[0] != skip:
2897 2897 addpath(f)
2898 2898 else:
2899 2899 for f in map:
2900 2900 addpath(f)
2901 2901
2902 2902 def addpath(self, path):
2903 2903 dirs = self._dirs
2904 2904 for base in finddirs(path):
2905 2905 if base in dirs:
2906 2906 dirs[base] += 1
2907 2907 return
2908 2908 dirs[base] = 1
2909 2909
2910 2910 def delpath(self, path):
2911 2911 dirs = self._dirs
2912 2912 for base in finddirs(path):
2913 2913 if dirs[base] > 1:
2914 2914 dirs[base] -= 1
2915 2915 return
2916 2916 del dirs[base]
2917 2917
2918 2918 def __iter__(self):
2919 2919 return self._dirs.iterkeys()
2920 2920
2921 2921 def __contains__(self, d):
2922 2922 return d in self._dirs
2923 2923
2924 2924 if safehasattr(parsers, 'dirs'):
2925 2925 dirs = parsers.dirs
2926 2926
2927 2927 def finddirs(path):
2928 2928 pos = path.rfind('/')
2929 2929 while pos != -1:
2930 2930 yield path[:pos]
2931 2931 pos = path.rfind('/', 0, pos)
2932 2932
2933 2933 class ctxmanager(object):
2934 2934 '''A context manager for use in 'with' blocks to allow multiple
2935 2935 contexts to be entered at once. This is both safer and more
2936 2936 flexible than contextlib.nested.
2937 2937
2938 2938 Once Mercurial supports Python 2.7+, this will become mostly
2939 2939 unnecessary.
2940 2940 '''
2941 2941
2942 2942 def __init__(self, *args):
2943 2943 '''Accepts a list of no-argument functions that return context
2944 2944 managers. These will be invoked at __call__ time.'''
2945 2945 self._pending = args
2946 2946 self._atexit = []
2947 2947
2948 2948 def __enter__(self):
2949 2949 return self
2950 2950
2951 2951 def enter(self):
2952 2952 '''Create and enter context managers in the order in which they were
2953 2953 passed to the constructor.'''
2954 2954 values = []
2955 2955 for func in self._pending:
2956 2956 obj = func()
2957 2957 values.append(obj.__enter__())
2958 2958 self._atexit.append(obj.__exit__)
2959 2959 del self._pending
2960 2960 return values
2961 2961
2962 2962 def atexit(self, func, *args, **kwargs):
2963 2963 '''Add a function to call when this context manager exits. The
2964 2964 ordering of multiple atexit calls is unspecified, save that
2965 2965 they will happen before any __exit__ functions.'''
2966 2966 def wrapper(exc_type, exc_val, exc_tb):
2967 2967 func(*args, **kwargs)
2968 2968 self._atexit.append(wrapper)
2969 2969 return func
2970 2970
2971 2971 def __exit__(self, exc_type, exc_val, exc_tb):
2972 2972 '''Context managers are exited in the reverse order from which
2973 2973 they were created.'''
2974 2974 received = exc_type is not None
2975 2975 suppressed = False
2976 2976 pending = None
2977 2977 self._atexit.reverse()
2978 2978 for exitfunc in self._atexit:
2979 2979 try:
2980 2980 if exitfunc(exc_type, exc_val, exc_tb):
2981 2981 suppressed = True
2982 2982 exc_type = None
2983 2983 exc_val = None
2984 2984 exc_tb = None
2985 2985 except BaseException:
2986 2986 pending = sys.exc_info()
2987 2987 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2988 2988 del self._atexit
2989 2989 if pending:
2990 2990 raise exc_val
2991 2991 return received and suppressed
2992 2992
2993 2993 # compression code
2994 2994
2995 2995 SERVERROLE = 'server'
2996 2996 CLIENTROLE = 'client'
2997 2997
2998 2998 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
2999 2999 (u'name', u'serverpriority',
3000 3000 u'clientpriority'))
3001 3001
3002 3002 class compressormanager(object):
3003 3003 """Holds registrations of various compression engines.
3004 3004
3005 3005 This class essentially abstracts the differences between compression
3006 3006 engines to allow new compression formats to be added easily, possibly from
3007 3007 extensions.
3008 3008
3009 3009 Compressors are registered against the global instance by calling its
3010 3010 ``register()`` method.
3011 3011 """
3012 3012 def __init__(self):
3013 3013 self._engines = {}
3014 3014 # Bundle spec human name to engine name.
3015 3015 self._bundlenames = {}
3016 3016 # Internal bundle identifier to engine name.
3017 3017 self._bundletypes = {}
3018 3018 # Revlog header to engine name.
3019 3019 self._revlogheaders = {}
3020 3020 # Wire proto identifier to engine name.
3021 3021 self._wiretypes = {}
3022 3022
3023 3023 def __getitem__(self, key):
3024 3024 return self._engines[key]
3025 3025
3026 3026 def __contains__(self, key):
3027 3027 return key in self._engines
3028 3028
3029 3029 def __iter__(self):
3030 3030 return iter(self._engines.keys())
3031 3031
3032 3032 def register(self, engine):
3033 3033 """Register a compression engine with the manager.
3034 3034
3035 3035 The argument must be a ``compressionengine`` instance.
3036 3036 """
3037 3037 if not isinstance(engine, compressionengine):
3038 3038 raise ValueError(_('argument must be a compressionengine'))
3039 3039
3040 3040 name = engine.name()
3041 3041
3042 3042 if name in self._engines:
3043 3043 raise error.Abort(_('compression engine %s already registered') %
3044 3044 name)
3045 3045
3046 3046 bundleinfo = engine.bundletype()
3047 3047 if bundleinfo:
3048 3048 bundlename, bundletype = bundleinfo
3049 3049
3050 3050 if bundlename in self._bundlenames:
3051 3051 raise error.Abort(_('bundle name %s already registered') %
3052 3052 bundlename)
3053 3053 if bundletype in self._bundletypes:
3054 3054 raise error.Abort(_('bundle type %s already registered by %s') %
3055 3055 (bundletype, self._bundletypes[bundletype]))
3056 3056
3057 3057 # No external facing name declared.
3058 3058 if bundlename:
3059 3059 self._bundlenames[bundlename] = name
3060 3060
3061 3061 self._bundletypes[bundletype] = name
3062 3062
3063 3063 wiresupport = engine.wireprotosupport()
3064 3064 if wiresupport:
3065 3065 wiretype = wiresupport.name
3066 3066 if wiretype in self._wiretypes:
3067 3067 raise error.Abort(_('wire protocol compression %s already '
3068 3068 'registered by %s') %
3069 3069 (wiretype, self._wiretypes[wiretype]))
3070 3070
3071 3071 self._wiretypes[wiretype] = name
3072 3072
3073 3073 revlogheader = engine.revlogheader()
3074 3074 if revlogheader and revlogheader in self._revlogheaders:
3075 3075 raise error.Abort(_('revlog header %s already registered by %s') %
3076 3076 (revlogheader, self._revlogheaders[revlogheader]))
3077 3077
3078 3078 if revlogheader:
3079 3079 self._revlogheaders[revlogheader] = name
3080 3080
3081 3081 self._engines[name] = engine
3082 3082
3083 3083 @property
3084 3084 def supportedbundlenames(self):
3085 3085 return set(self._bundlenames.keys())
3086 3086
3087 3087 @property
3088 3088 def supportedbundletypes(self):
3089 3089 return set(self._bundletypes.keys())
3090 3090
3091 3091 def forbundlename(self, bundlename):
3092 3092 """Obtain a compression engine registered to a bundle name.
3093 3093
3094 3094 Will raise KeyError if the bundle type isn't registered.
3095 3095
3096 3096 Will abort if the engine is known but not available.
3097 3097 """
3098 3098 engine = self._engines[self._bundlenames[bundlename]]
3099 3099 if not engine.available():
3100 3100 raise error.Abort(_('compression engine %s could not be loaded') %
3101 3101 engine.name())
3102 3102 return engine
3103 3103
3104 3104 def forbundletype(self, bundletype):
3105 3105 """Obtain a compression engine registered to a bundle type.
3106 3106
3107 3107 Will raise KeyError if the bundle type isn't registered.
3108 3108
3109 3109 Will abort if the engine is known but not available.
3110 3110 """
3111 3111 engine = self._engines[self._bundletypes[bundletype]]
3112 3112 if not engine.available():
3113 3113 raise error.Abort(_('compression engine %s could not be loaded') %
3114 3114 engine.name())
3115 3115 return engine
3116 3116
3117 3117 def supportedwireengines(self, role, onlyavailable=True):
3118 3118 """Obtain compression engines that support the wire protocol.
3119 3119
3120 3120 Returns a list of engines in prioritized order, most desired first.
3121 3121
3122 3122 If ``onlyavailable`` is set, filter out engines that can't be
3123 3123 loaded.
3124 3124 """
3125 3125 assert role in (SERVERROLE, CLIENTROLE)
3126 3126
3127 3127 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3128 3128
3129 3129 engines = [self._engines[e] for e in self._wiretypes.values()]
3130 3130 if onlyavailable:
3131 3131 engines = [e for e in engines if e.available()]
3132 3132
3133 3133 def getkey(e):
3134 3134 # Sort first by priority, highest first. In case of tie, sort
3135 3135 # alphabetically. This is arbitrary, but ensures output is
3136 3136 # stable.
3137 3137 w = e.wireprotosupport()
3138 3138 return -1 * getattr(w, attr), w.name
3139 3139
3140 3140 return list(sorted(engines, key=getkey))
3141 3141
3142 3142 def forwiretype(self, wiretype):
3143 3143 engine = self._engines[self._wiretypes[wiretype]]
3144 3144 if not engine.available():
3145 3145 raise error.Abort(_('compression engine %s could not be loaded') %
3146 3146 engine.name())
3147 3147 return engine
3148 3148
3149 3149 def forrevlogheader(self, header):
3150 3150 """Obtain a compression engine registered to a revlog header.
3151 3151
3152 3152 Will raise KeyError if the revlog header value isn't registered.
3153 3153 """
3154 3154 return self._engines[self._revlogheaders[header]]
3155 3155
3156 3156 compengines = compressormanager()
3157 3157
3158 3158 class compressionengine(object):
3159 3159 """Base class for compression engines.
3160 3160
3161 3161 Compression engines must implement the interface defined by this class.
3162 3162 """
3163 3163 def name(self):
3164 3164 """Returns the name of the compression engine.
3165 3165
3166 3166 This is the key the engine is registered under.
3167 3167
3168 3168 This method must be implemented.
3169 3169 """
3170 3170 raise NotImplementedError()
3171 3171
3172 3172 def available(self):
3173 3173 """Whether the compression engine is available.
3174 3174
3175 3175 The intent of this method is to allow optional compression engines
3176 3176 that may not be available in all installations (such as engines relying
3177 3177 on C extensions that may not be present).
3178 3178 """
3179 3179 return True
3180 3180
3181 3181 def bundletype(self):
3182 3182 """Describes bundle identifiers for this engine.
3183 3183
3184 3184 If this compression engine isn't supported for bundles, returns None.
3185 3185
3186 3186 If this engine can be used for bundles, returns a 2-tuple of strings of
3187 3187 the user-facing "bundle spec" compression name and an internal
3188 3188 identifier used to denote the compression format within bundles. To
3189 3189 exclude the name from external usage, set the first element to ``None``.
3190 3190
3191 3191 If bundle compression is supported, the class must also implement
3192 3192 ``compressstream`` and `decompressorreader``.
3193 3193 """
3194 3194 return None
3195 3195
3196 3196 def wireprotosupport(self):
3197 3197 """Declare support for this compression format on the wire protocol.
3198 3198
3199 3199 If this compression engine isn't supported for compressing wire
3200 3200 protocol payloads, returns None.
3201 3201
3202 3202 Otherwise, returns ``compenginewireprotosupport`` with the following
3203 3203 fields:
3204 3204
3205 3205 * String format identifier
3206 3206 * Integer priority for the server
3207 3207 * Integer priority for the client
3208 3208
3209 3209 The integer priorities are used to order the advertisement of format
3210 3210 support by server and client. The highest integer is advertised
3211 3211 first. Integers with non-positive values aren't advertised.
3212 3212
3213 3213 The priority values are somewhat arbitrary and only used for default
3214 3214 ordering. The relative order can be changed via config options.
3215 3215
3216 3216 If wire protocol compression is supported, the class must also implement
3217 3217 ``compressstream`` and ``decompressorreader``.
3218 3218 """
3219 3219 return None
3220 3220
3221 3221 def revlogheader(self):
3222 3222 """Header added to revlog chunks that identifies this engine.
3223 3223
3224 3224 If this engine can be used to compress revlogs, this method should
3225 3225 return the bytes used to identify chunks compressed with this engine.
3226 3226 Else, the method should return ``None`` to indicate it does not
3227 3227 participate in revlog compression.
3228 3228 """
3229 3229 return None
3230 3230
3231 3231 def compressstream(self, it, opts=None):
3232 3232 """Compress an iterator of chunks.
3233 3233
3234 3234 The method receives an iterator (ideally a generator) of chunks of
3235 3235 bytes to be compressed. It returns an iterator (ideally a generator)
3236 3236 of bytes of chunks representing the compressed output.
3237 3237
3238 3238 Optionally accepts an argument defining how to perform compression.
3239 3239 Each engine treats this argument differently.
3240 3240 """
3241 3241 raise NotImplementedError()
3242 3242
3243 3243 def decompressorreader(self, fh):
3244 3244 """Perform decompression on a file object.
3245 3245
3246 3246 Argument is an object with a ``read(size)`` method that returns
3247 3247 compressed data. Return value is an object with a ``read(size)`` that
3248 3248 returns uncompressed data.
3249 3249 """
3250 3250 raise NotImplementedError()
3251 3251
3252 3252 def revlogcompressor(self, opts=None):
3253 3253 """Obtain an object that can be used to compress revlog entries.
3254 3254
3255 3255 The object has a ``compress(data)`` method that compresses binary
3256 3256 data. This method returns compressed binary data or ``None`` if
3257 3257 the data could not be compressed (too small, not compressible, etc).
3258 3258 The returned data should have a header uniquely identifying this
3259 3259 compression format so decompression can be routed to this engine.
3260 3260 This header should be identified by the ``revlogheader()`` return
3261 3261 value.
3262 3262
3263 3263 The object has a ``decompress(data)`` method that decompresses
3264 3264 data. The method will only be called if ``data`` begins with
3265 3265 ``revlogheader()``. The method should return the raw, uncompressed
3266 3266 data or raise a ``RevlogError``.
3267 3267
3268 3268 The object is reusable but is not thread safe.
3269 3269 """
3270 3270 raise NotImplementedError()
3271 3271
3272 3272 class _zlibengine(compressionengine):
3273 3273 def name(self):
3274 3274 return 'zlib'
3275 3275
3276 3276 def bundletype(self):
3277 3277 return 'gzip', 'GZ'
3278 3278
3279 3279 def wireprotosupport(self):
3280 3280 return compewireprotosupport('zlib', 20, 20)
3281 3281
3282 3282 def revlogheader(self):
3283 3283 return 'x'
3284 3284
3285 3285 def compressstream(self, it, opts=None):
3286 3286 opts = opts or {}
3287 3287
3288 3288 z = zlib.compressobj(opts.get('level', -1))
3289 3289 for chunk in it:
3290 3290 data = z.compress(chunk)
3291 3291 # Not all calls to compress emit data. It is cheaper to inspect
3292 3292 # here than to feed empty chunks through generator.
3293 3293 if data:
3294 3294 yield data
3295 3295
3296 3296 yield z.flush()
3297 3297
3298 3298 def decompressorreader(self, fh):
3299 3299 def gen():
3300 3300 d = zlib.decompressobj()
3301 3301 for chunk in filechunkiter(fh):
3302 3302 while chunk:
3303 3303 # Limit output size to limit memory.
3304 3304 yield d.decompress(chunk, 2 ** 18)
3305 3305 chunk = d.unconsumed_tail
3306 3306
3307 3307 return chunkbuffer(gen())
3308 3308
3309 3309 class zlibrevlogcompressor(object):
3310 3310 def compress(self, data):
3311 3311 insize = len(data)
3312 3312 # Caller handles empty input case.
3313 3313 assert insize > 0
3314 3314
3315 3315 if insize < 44:
3316 3316 return None
3317 3317
3318 3318 elif insize <= 1000000:
3319 3319 compressed = zlib.compress(data)
3320 3320 if len(compressed) < insize:
3321 3321 return compressed
3322 3322 return None
3323 3323
3324 3324 # zlib makes an internal copy of the input buffer, doubling
3325 3325 # memory usage for large inputs. So do streaming compression
3326 3326 # on large inputs.
3327 3327 else:
3328 3328 z = zlib.compressobj()
3329 3329 parts = []
3330 3330 pos = 0
3331 3331 while pos < insize:
3332 3332 pos2 = pos + 2**20
3333 3333 parts.append(z.compress(data[pos:pos2]))
3334 3334 pos = pos2
3335 3335 parts.append(z.flush())
3336 3336
3337 3337 if sum(map(len, parts)) < insize:
3338 3338 return ''.join(parts)
3339 3339 return None
3340 3340
3341 3341 def decompress(self, data):
3342 3342 try:
3343 3343 return zlib.decompress(data)
3344 3344 except zlib.error as e:
3345 3345 raise error.RevlogError(_('revlog decompress error: %s') %
3346 3346 str(e))
3347 3347
3348 3348 def revlogcompressor(self, opts=None):
3349 3349 return self.zlibrevlogcompressor()
3350 3350
3351 3351 compengines.register(_zlibengine())
3352 3352
3353 3353 class _bz2engine(compressionengine):
3354 3354 def name(self):
3355 3355 return 'bz2'
3356 3356
3357 3357 def bundletype(self):
3358 3358 return 'bzip2', 'BZ'
3359 3359
3360 3360 # We declare a protocol name but don't advertise by default because
3361 3361 # it is slow.
3362 3362 def wireprotosupport(self):
3363 3363 return compewireprotosupport('bzip2', 0, 0)
3364 3364
3365 3365 def compressstream(self, it, opts=None):
3366 3366 opts = opts or {}
3367 3367 z = bz2.BZ2Compressor(opts.get('level', 9))
3368 3368 for chunk in it:
3369 3369 data = z.compress(chunk)
3370 3370 if data:
3371 3371 yield data
3372 3372
3373 3373 yield z.flush()
3374 3374
3375 3375 def decompressorreader(self, fh):
3376 3376 def gen():
3377 3377 d = bz2.BZ2Decompressor()
3378 3378 for chunk in filechunkiter(fh):
3379 3379 yield d.decompress(chunk)
3380 3380
3381 3381 return chunkbuffer(gen())
3382 3382
3383 3383 compengines.register(_bz2engine())
3384 3384
3385 3385 class _truncatedbz2engine(compressionengine):
3386 3386 def name(self):
3387 3387 return 'bz2truncated'
3388 3388
3389 3389 def bundletype(self):
3390 3390 return None, '_truncatedBZ'
3391 3391
3392 3392 # We don't implement compressstream because it is hackily handled elsewhere.
3393 3393
3394 3394 def decompressorreader(self, fh):
3395 3395 def gen():
3396 3396 # The input stream doesn't have the 'BZ' header. So add it back.
3397 3397 d = bz2.BZ2Decompressor()
3398 3398 d.decompress('BZ')
3399 3399 for chunk in filechunkiter(fh):
3400 3400 yield d.decompress(chunk)
3401 3401
3402 3402 return chunkbuffer(gen())
3403 3403
3404 3404 compengines.register(_truncatedbz2engine())
3405 3405
3406 3406 class _noopengine(compressionengine):
3407 3407 def name(self):
3408 3408 return 'none'
3409 3409
3410 3410 def bundletype(self):
3411 3411 return 'none', 'UN'
3412 3412
3413 3413 # Clients always support uncompressed payloads. Servers don't because
3414 3414 # unless you are on a fast network, uncompressed payloads can easily
3415 3415 # saturate your network pipe.
3416 3416 def wireprotosupport(self):
3417 3417 return compewireprotosupport('none', 0, 10)
3418 3418
3419 3419 # We don't implement revlogheader because it is handled specially
3420 3420 # in the revlog class.
3421 3421
3422 3422 def compressstream(self, it, opts=None):
3423 3423 return it
3424 3424
3425 3425 def decompressorreader(self, fh):
3426 3426 return fh
3427 3427
3428 3428 class nooprevlogcompressor(object):
3429 3429 def compress(self, data):
3430 3430 return None
3431 3431
3432 3432 def revlogcompressor(self, opts=None):
3433 3433 return self.nooprevlogcompressor()
3434 3434
3435 3435 compengines.register(_noopengine())
3436 3436
3437 3437 class _zstdengine(compressionengine):
3438 3438 def name(self):
3439 3439 return 'zstd'
3440 3440
3441 3441 @propertycache
3442 3442 def _module(self):
3443 3443 # Not all installs have the zstd module available. So defer importing
3444 3444 # until first access.
3445 3445 try:
3446 3446 from . import zstd
3447 3447 # Force delayed import.
3448 3448 zstd.__version__
3449 3449 return zstd
3450 3450 except ImportError:
3451 3451 return None
3452 3452
3453 3453 def available(self):
3454 3454 return bool(self._module)
3455 3455
3456 3456 def bundletype(self):
3457 3457 return 'zstd', 'ZS'
3458 3458
3459 3459 def wireprotosupport(self):
3460 3460 return compewireprotosupport('zstd', 50, 50)
3461 3461
3462 3462 def revlogheader(self):
3463 3463 return '\x28'
3464 3464
3465 3465 def compressstream(self, it, opts=None):
3466 3466 opts = opts or {}
3467 3467 # zstd level 3 is almost always significantly faster than zlib
3468 3468 # while providing no worse compression. It strikes a good balance
3469 3469 # between speed and compression.
3470 3470 level = opts.get('level', 3)
3471 3471
3472 3472 zstd = self._module
3473 3473 z = zstd.ZstdCompressor(level=level).compressobj()
3474 3474 for chunk in it:
3475 3475 data = z.compress(chunk)
3476 3476 if data:
3477 3477 yield data
3478 3478
3479 3479 yield z.flush()
3480 3480
3481 3481 def decompressorreader(self, fh):
3482 3482 zstd = self._module
3483 3483 dctx = zstd.ZstdDecompressor()
3484 3484 return chunkbuffer(dctx.read_from(fh))
3485 3485
3486 3486 class zstdrevlogcompressor(object):
3487 3487 def __init__(self, zstd, level=3):
3488 3488 # Writing the content size adds a few bytes to the output. However,
3489 3489 # it allows decompression to be more optimal since we can
3490 3490 # pre-allocate a buffer to hold the result.
3491 3491 self._cctx = zstd.ZstdCompressor(level=level,
3492 3492 write_content_size=True)
3493 3493 self._dctx = zstd.ZstdDecompressor()
3494 3494 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3495 3495 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3496 3496
3497 3497 def compress(self, data):
3498 3498 insize = len(data)
3499 3499 # Caller handles empty input case.
3500 3500 assert insize > 0
3501 3501
3502 3502 if insize < 50:
3503 3503 return None
3504 3504
3505 3505 elif insize <= 1000000:
3506 3506 compressed = self._cctx.compress(data)
3507 3507 if len(compressed) < insize:
3508 3508 return compressed
3509 3509 return None
3510 3510 else:
3511 3511 z = self._cctx.compressobj()
3512 3512 chunks = []
3513 3513 pos = 0
3514 3514 while pos < insize:
3515 3515 pos2 = pos + self._compinsize
3516 3516 chunk = z.compress(data[pos:pos2])
3517 3517 if chunk:
3518 3518 chunks.append(chunk)
3519 3519 pos = pos2
3520 3520 chunks.append(z.flush())
3521 3521
3522 3522 if sum(map(len, chunks)) < insize:
3523 3523 return ''.join(chunks)
3524 3524 return None
3525 3525
3526 3526 def decompress(self, data):
3527 3527 insize = len(data)
3528 3528
3529 3529 try:
3530 3530 # This was measured to be faster than other streaming
3531 3531 # decompressors.
3532 3532 dobj = self._dctx.decompressobj()
3533 3533 chunks = []
3534 3534 pos = 0
3535 3535 while pos < insize:
3536 3536 pos2 = pos + self._decompinsize
3537 3537 chunk = dobj.decompress(data[pos:pos2])
3538 3538 if chunk:
3539 3539 chunks.append(chunk)
3540 3540 pos = pos2
3541 3541 # Frame should be exhausted, so no finish() API.
3542 3542
3543 3543 return ''.join(chunks)
3544 3544 except Exception as e:
3545 3545 raise error.RevlogError(_('revlog decompress error: %s') %
3546 3546 str(e))
3547 3547
3548 3548 def revlogcompressor(self, opts=None):
3549 3549 opts = opts or {}
3550 3550 return self.zstdrevlogcompressor(self._module,
3551 3551 level=opts.get('level', 3))
3552 3552
3553 3553 compengines.register(_zstdengine())
3554 3554
3555 3555 # convenient shortcut
3556 3556 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now