##// END OF EJS Templates
util: introduce timer()...
Simon Farnsworth -
r30974:ae5d60bb default
parent child Browse files
Show More
@@ -1,3551 +1,3556
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), 'wb', 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 unlinkpath = platform.unlinkpath
139 139 username = platform.username
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 # disable Python's problematic floating point timestamps (issue4836)
146 146 # (Python hypocritically says you shouldn't change this behavior in
147 147 # libraries, and sure enough Mercurial is not a library.)
148 148 os.stat_float_times(False)
149 149
150 150 def safehasattr(thing, attr):
151 151 return getattr(thing, attr, _notset) is not _notset
152 152
153 153 def bitsfrom(container):
154 154 bits = 0
155 155 for bit in container:
156 156 bits |= bit
157 157 return bits
158 158
159 159 DIGESTS = {
160 160 'md5': hashlib.md5,
161 161 'sha1': hashlib.sha1,
162 162 'sha512': hashlib.sha512,
163 163 }
164 164 # List of digest types from strongest to weakest
165 165 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
166 166
167 167 for k in DIGESTS_BY_STRENGTH:
168 168 assert k in DIGESTS
169 169
170 170 class digester(object):
171 171 """helper to compute digests.
172 172
173 173 This helper can be used to compute one or more digests given their name.
174 174
175 175 >>> d = digester(['md5', 'sha1'])
176 176 >>> d.update('foo')
177 177 >>> [k for k in sorted(d)]
178 178 ['md5', 'sha1']
179 179 >>> d['md5']
180 180 'acbd18db4cc2f85cedef654fccc4a4d8'
181 181 >>> d['sha1']
182 182 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
183 183 >>> digester.preferred(['md5', 'sha1'])
184 184 'sha1'
185 185 """
186 186
187 187 def __init__(self, digests, s=''):
188 188 self._hashes = {}
189 189 for k in digests:
190 190 if k not in DIGESTS:
191 191 raise Abort(_('unknown digest type: %s') % k)
192 192 self._hashes[k] = DIGESTS[k]()
193 193 if s:
194 194 self.update(s)
195 195
196 196 def update(self, data):
197 197 for h in self._hashes.values():
198 198 h.update(data)
199 199
200 200 def __getitem__(self, key):
201 201 if key not in DIGESTS:
202 202 raise Abort(_('unknown digest type: %s') % k)
203 203 return self._hashes[key].hexdigest()
204 204
205 205 def __iter__(self):
206 206 return iter(self._hashes)
207 207
208 208 @staticmethod
209 209 def preferred(supported):
210 210 """returns the strongest digest type in both supported and DIGESTS."""
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 if k in supported:
214 214 return k
215 215 return None
216 216
217 217 class digestchecker(object):
218 218 """file handle wrapper that additionally checks content against a given
219 219 size and digests.
220 220
221 221 d = digestchecker(fh, size, {'md5': '...'})
222 222
223 223 When multiple digests are given, all of them are validated.
224 224 """
225 225
226 226 def __init__(self, fh, size, digests):
227 227 self._fh = fh
228 228 self._size = size
229 229 self._got = 0
230 230 self._digests = dict(digests)
231 231 self._digester = digester(self._digests.keys())
232 232
233 233 def read(self, length=-1):
234 234 content = self._fh.read(length)
235 235 self._digester.update(content)
236 236 self._got += len(content)
237 237 return content
238 238
239 239 def validate(self):
240 240 if self._size != self._got:
241 241 raise Abort(_('size mismatch: expected %d, got %d') %
242 242 (self._size, self._got))
243 243 for k, v in self._digests.items():
244 244 if v != self._digester[k]:
245 245 # i18n: first parameter is a digest name
246 246 raise Abort(_('%s mismatch: expected %s, got %s') %
247 247 (k, v, self._digester[k]))
248 248
249 249 try:
250 250 buffer = buffer
251 251 except NameError:
252 252 if not pycompat.ispy3:
253 253 def buffer(sliceable, offset=0, length=None):
254 254 if length is not None:
255 255 return sliceable[offset:offset + length]
256 256 return sliceable[offset:]
257 257 else:
258 258 def buffer(sliceable, offset=0, length=None):
259 259 if length is not None:
260 260 return memoryview(sliceable)[offset:offset + length]
261 261 return memoryview(sliceable)[offset:]
262 262
263 263 closefds = pycompat.osname == 'posix'
264 264
265 265 _chunksize = 4096
266 266
267 267 class bufferedinputpipe(object):
268 268 """a manually buffered input pipe
269 269
270 270 Python will not let us use buffered IO and lazy reading with 'polling' at
271 271 the same time. We cannot probe the buffer state and select will not detect
272 272 that data are ready to read if they are already buffered.
273 273
274 274 This class let us work around that by implementing its own buffering
275 275 (allowing efficient readline) while offering a way to know if the buffer is
276 276 empty from the output (allowing collaboration of the buffer with polling).
277 277
278 278 This class lives in the 'util' module because it makes use of the 'os'
279 279 module from the python stdlib.
280 280 """
281 281
282 282 def __init__(self, input):
283 283 self._input = input
284 284 self._buffer = []
285 285 self._eof = False
286 286 self._lenbuf = 0
287 287
288 288 @property
289 289 def hasbuffer(self):
290 290 """True is any data is currently buffered
291 291
292 292 This will be used externally a pre-step for polling IO. If there is
293 293 already data then no polling should be set in place."""
294 294 return bool(self._buffer)
295 295
296 296 @property
297 297 def closed(self):
298 298 return self._input.closed
299 299
300 300 def fileno(self):
301 301 return self._input.fileno()
302 302
303 303 def close(self):
304 304 return self._input.close()
305 305
306 306 def read(self, size):
307 307 while (not self._eof) and (self._lenbuf < size):
308 308 self._fillbuffer()
309 309 return self._frombuffer(size)
310 310
311 311 def readline(self, *args, **kwargs):
312 312 if 1 < len(self._buffer):
313 313 # this should not happen because both read and readline end with a
314 314 # _frombuffer call that collapse it.
315 315 self._buffer = [''.join(self._buffer)]
316 316 self._lenbuf = len(self._buffer[0])
317 317 lfi = -1
318 318 if self._buffer:
319 319 lfi = self._buffer[-1].find('\n')
320 320 while (not self._eof) and lfi < 0:
321 321 self._fillbuffer()
322 322 if self._buffer:
323 323 lfi = self._buffer[-1].find('\n')
324 324 size = lfi + 1
325 325 if lfi < 0: # end of file
326 326 size = self._lenbuf
327 327 elif 1 < len(self._buffer):
328 328 # we need to take previous chunks into account
329 329 size += self._lenbuf - len(self._buffer[-1])
330 330 return self._frombuffer(size)
331 331
332 332 def _frombuffer(self, size):
333 333 """return at most 'size' data from the buffer
334 334
335 335 The data are removed from the buffer."""
336 336 if size == 0 or not self._buffer:
337 337 return ''
338 338 buf = self._buffer[0]
339 339 if 1 < len(self._buffer):
340 340 buf = ''.join(self._buffer)
341 341
342 342 data = buf[:size]
343 343 buf = buf[len(data):]
344 344 if buf:
345 345 self._buffer = [buf]
346 346 self._lenbuf = len(buf)
347 347 else:
348 348 self._buffer = []
349 349 self._lenbuf = 0
350 350 return data
351 351
352 352 def _fillbuffer(self):
353 353 """read data to the buffer"""
354 354 data = os.read(self._input.fileno(), _chunksize)
355 355 if not data:
356 356 self._eof = True
357 357 else:
358 358 self._lenbuf += len(data)
359 359 self._buffer.append(data)
360 360
361 361 def popen2(cmd, env=None, newlines=False):
362 362 # Setting bufsize to -1 lets the system decide the buffer size.
363 363 # The default for bufsize is 0, meaning unbuffered. This leads to
364 364 # poor performance on Mac OS X: http://bugs.python.org/issue4194
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 universal_newlines=newlines,
369 369 env=env)
370 370 return p.stdin, p.stdout
371 371
372 372 def popen3(cmd, env=None, newlines=False):
373 373 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
374 374 return stdin, stdout, stderr
375 375
376 376 def popen4(cmd, env=None, newlines=False, bufsize=-1):
377 377 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
378 378 close_fds=closefds,
379 379 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
380 380 stderr=subprocess.PIPE,
381 381 universal_newlines=newlines,
382 382 env=env)
383 383 return p.stdin, p.stdout, p.stderr, p
384 384
385 385 def version():
386 386 """Return version information if available."""
387 387 try:
388 388 from . import __version__
389 389 return __version__.version
390 390 except ImportError:
391 391 return 'unknown'
392 392
393 393 def versiontuple(v=None, n=4):
394 394 """Parses a Mercurial version string into an N-tuple.
395 395
396 396 The version string to be parsed is specified with the ``v`` argument.
397 397 If it isn't defined, the current Mercurial version string will be parsed.
398 398
399 399 ``n`` can be 2, 3, or 4. Here is how some version strings map to
400 400 returned values:
401 401
402 402 >>> v = '3.6.1+190-df9b73d2d444'
403 403 >>> versiontuple(v, 2)
404 404 (3, 6)
405 405 >>> versiontuple(v, 3)
406 406 (3, 6, 1)
407 407 >>> versiontuple(v, 4)
408 408 (3, 6, 1, '190-df9b73d2d444')
409 409
410 410 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
411 411 (3, 6, 1, '190-df9b73d2d444+20151118')
412 412
413 413 >>> v = '3.6'
414 414 >>> versiontuple(v, 2)
415 415 (3, 6)
416 416 >>> versiontuple(v, 3)
417 417 (3, 6, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 6, None, None)
420 420
421 421 >>> v = '3.9-rc'
422 422 >>> versiontuple(v, 2)
423 423 (3, 9)
424 424 >>> versiontuple(v, 3)
425 425 (3, 9, None)
426 426 >>> versiontuple(v, 4)
427 427 (3, 9, None, 'rc')
428 428
429 429 >>> v = '3.9-rc+2-02a8fea4289b'
430 430 >>> versiontuple(v, 2)
431 431 (3, 9)
432 432 >>> versiontuple(v, 3)
433 433 (3, 9, None)
434 434 >>> versiontuple(v, 4)
435 435 (3, 9, None, 'rc+2-02a8fea4289b')
436 436 """
437 437 if not v:
438 438 v = version()
439 439 parts = remod.split('[\+-]', v, 1)
440 440 if len(parts) == 1:
441 441 vparts, extra = parts[0], None
442 442 else:
443 443 vparts, extra = parts
444 444
445 445 vints = []
446 446 for i in vparts.split('.'):
447 447 try:
448 448 vints.append(int(i))
449 449 except ValueError:
450 450 break
451 451 # (3, 6) -> (3, 6, None)
452 452 while len(vints) < 3:
453 453 vints.append(None)
454 454
455 455 if n == 2:
456 456 return (vints[0], vints[1])
457 457 if n == 3:
458 458 return (vints[0], vints[1], vints[2])
459 459 if n == 4:
460 460 return (vints[0], vints[1], vints[2], extra)
461 461
462 462 # used by parsedate
463 463 defaultdateformats = (
464 464 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
465 465 '%Y-%m-%dT%H:%M', # without seconds
466 466 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
467 467 '%Y-%m-%dT%H%M', # without seconds
468 468 '%Y-%m-%d %H:%M:%S', # our common legal variant
469 469 '%Y-%m-%d %H:%M', # without seconds
470 470 '%Y-%m-%d %H%M%S', # without :
471 471 '%Y-%m-%d %H%M', # without seconds
472 472 '%Y-%m-%d %I:%M:%S%p',
473 473 '%Y-%m-%d %H:%M',
474 474 '%Y-%m-%d %I:%M%p',
475 475 '%Y-%m-%d',
476 476 '%m-%d',
477 477 '%m/%d',
478 478 '%m/%d/%y',
479 479 '%m/%d/%Y',
480 480 '%a %b %d %H:%M:%S %Y',
481 481 '%a %b %d %I:%M:%S%p %Y',
482 482 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
483 483 '%b %d %H:%M:%S %Y',
484 484 '%b %d %I:%M:%S%p %Y',
485 485 '%b %d %H:%M:%S',
486 486 '%b %d %I:%M:%S%p',
487 487 '%b %d %H:%M',
488 488 '%b %d %I:%M%p',
489 489 '%b %d %Y',
490 490 '%b %d',
491 491 '%H:%M:%S',
492 492 '%I:%M:%S%p',
493 493 '%H:%M',
494 494 '%I:%M%p',
495 495 )
496 496
497 497 extendeddateformats = defaultdateformats + (
498 498 "%Y",
499 499 "%Y-%m",
500 500 "%b",
501 501 "%b %Y",
502 502 )
503 503
504 504 def cachefunc(func):
505 505 '''cache the result of function calls'''
506 506 # XXX doesn't handle keywords args
507 507 if func.__code__.co_argcount == 0:
508 508 cache = []
509 509 def f():
510 510 if len(cache) == 0:
511 511 cache.append(func())
512 512 return cache[0]
513 513 return f
514 514 cache = {}
515 515 if func.__code__.co_argcount == 1:
516 516 # we gain a small amount of time because
517 517 # we don't need to pack/unpack the list
518 518 def f(arg):
519 519 if arg not in cache:
520 520 cache[arg] = func(arg)
521 521 return cache[arg]
522 522 else:
523 523 def f(*args):
524 524 if args not in cache:
525 525 cache[args] = func(*args)
526 526 return cache[args]
527 527
528 528 return f
529 529
530 530 class sortdict(dict):
531 531 '''a simple sorted dictionary'''
532 532 def __init__(self, data=None):
533 533 self._list = []
534 534 if data:
535 535 self.update(data)
536 536 def copy(self):
537 537 return sortdict(self)
538 538 def __setitem__(self, key, val):
539 539 if key in self:
540 540 self._list.remove(key)
541 541 self._list.append(key)
542 542 dict.__setitem__(self, key, val)
543 543 def __iter__(self):
544 544 return self._list.__iter__()
545 545 def update(self, src):
546 546 if isinstance(src, dict):
547 547 src = src.iteritems()
548 548 for k, v in src:
549 549 self[k] = v
550 550 def clear(self):
551 551 dict.clear(self)
552 552 self._list = []
553 553 def items(self):
554 554 return [(k, self[k]) for k in self._list]
555 555 def __delitem__(self, key):
556 556 dict.__delitem__(self, key)
557 557 self._list.remove(key)
558 558 def pop(self, key, *args, **kwargs):
559 559 dict.pop(self, key, *args, **kwargs)
560 560 try:
561 561 self._list.remove(key)
562 562 except ValueError:
563 563 pass
564 564 def keys(self):
565 565 return self._list[:]
566 566 def iterkeys(self):
567 567 return self._list.__iter__()
568 568 def iteritems(self):
569 569 for k in self._list:
570 570 yield k, self[k]
571 571 def insert(self, index, key, val):
572 572 self._list.insert(index, key)
573 573 dict.__setitem__(self, key, val)
574 574 def __repr__(self):
575 575 if not self:
576 576 return '%s()' % self.__class__.__name__
577 577 return '%s(%r)' % (self.__class__.__name__, self.items())
578 578
579 579 class _lrucachenode(object):
580 580 """A node in a doubly linked list.
581 581
582 582 Holds a reference to nodes on either side as well as a key-value
583 583 pair for the dictionary entry.
584 584 """
585 585 __slots__ = (u'next', u'prev', u'key', u'value')
586 586
587 587 def __init__(self):
588 588 self.next = None
589 589 self.prev = None
590 590
591 591 self.key = _notset
592 592 self.value = None
593 593
594 594 def markempty(self):
595 595 """Mark the node as emptied."""
596 596 self.key = _notset
597 597
598 598 class lrucachedict(object):
599 599 """Dict that caches most recent accesses and sets.
600 600
601 601 The dict consists of an actual backing dict - indexed by original
602 602 key - and a doubly linked circular list defining the order of entries in
603 603 the cache.
604 604
605 605 The head node is the newest entry in the cache. If the cache is full,
606 606 we recycle head.prev and make it the new head. Cache accesses result in
607 607 the node being moved to before the existing head and being marked as the
608 608 new head node.
609 609 """
610 610 def __init__(self, max):
611 611 self._cache = {}
612 612
613 613 self._head = head = _lrucachenode()
614 614 head.prev = head
615 615 head.next = head
616 616 self._size = 1
617 617 self._capacity = max
618 618
619 619 def __len__(self):
620 620 return len(self._cache)
621 621
622 622 def __contains__(self, k):
623 623 return k in self._cache
624 624
625 625 def __iter__(self):
626 626 # We don't have to iterate in cache order, but why not.
627 627 n = self._head
628 628 for i in range(len(self._cache)):
629 629 yield n.key
630 630 n = n.next
631 631
632 632 def __getitem__(self, k):
633 633 node = self._cache[k]
634 634 self._movetohead(node)
635 635 return node.value
636 636
637 637 def __setitem__(self, k, v):
638 638 node = self._cache.get(k)
639 639 # Replace existing value and mark as newest.
640 640 if node is not None:
641 641 node.value = v
642 642 self._movetohead(node)
643 643 return
644 644
645 645 if self._size < self._capacity:
646 646 node = self._addcapacity()
647 647 else:
648 648 # Grab the last/oldest item.
649 649 node = self._head.prev
650 650
651 651 # At capacity. Kill the old entry.
652 652 if node.key is not _notset:
653 653 del self._cache[node.key]
654 654
655 655 node.key = k
656 656 node.value = v
657 657 self._cache[k] = node
658 658 # And mark it as newest entry. No need to adjust order since it
659 659 # is already self._head.prev.
660 660 self._head = node
661 661
662 662 def __delitem__(self, k):
663 663 node = self._cache.pop(k)
664 664 node.markempty()
665 665
666 666 # Temporarily mark as newest item before re-adjusting head to make
667 667 # this node the oldest item.
668 668 self._movetohead(node)
669 669 self._head = node.next
670 670
671 671 # Additional dict methods.
672 672
673 673 def get(self, k, default=None):
674 674 try:
675 675 return self._cache[k].value
676 676 except KeyError:
677 677 return default
678 678
679 679 def clear(self):
680 680 n = self._head
681 681 while n.key is not _notset:
682 682 n.markempty()
683 683 n = n.next
684 684
685 685 self._cache.clear()
686 686
687 687 def copy(self):
688 688 result = lrucachedict(self._capacity)
689 689 n = self._head.prev
690 690 # Iterate in oldest-to-newest order, so the copy has the right ordering
691 691 for i in range(len(self._cache)):
692 692 result[n.key] = n.value
693 693 n = n.prev
694 694 return result
695 695
696 696 def _movetohead(self, node):
697 697 """Mark a node as the newest, making it the new head.
698 698
699 699 When a node is accessed, it becomes the freshest entry in the LRU
700 700 list, which is denoted by self._head.
701 701
702 702 Visually, let's make ``N`` the new head node (* denotes head):
703 703
704 704 previous/oldest <-> head <-> next/next newest
705 705
706 706 ----<->--- A* ---<->-----
707 707 | |
708 708 E <-> D <-> N <-> C <-> B
709 709
710 710 To:
711 711
712 712 ----<->--- N* ---<->-----
713 713 | |
714 714 E <-> D <-> C <-> B <-> A
715 715
716 716 This requires the following moves:
717 717
718 718 C.next = D (node.prev.next = node.next)
719 719 D.prev = C (node.next.prev = node.prev)
720 720 E.next = N (head.prev.next = node)
721 721 N.prev = E (node.prev = head.prev)
722 722 N.next = A (node.next = head)
723 723 A.prev = N (head.prev = node)
724 724 """
725 725 head = self._head
726 726 # C.next = D
727 727 node.prev.next = node.next
728 728 # D.prev = C
729 729 node.next.prev = node.prev
730 730 # N.prev = E
731 731 node.prev = head.prev
732 732 # N.next = A
733 733 # It is tempting to do just "head" here, however if node is
734 734 # adjacent to head, this will do bad things.
735 735 node.next = head.prev.next
736 736 # E.next = N
737 737 node.next.prev = node
738 738 # A.prev = N
739 739 node.prev.next = node
740 740
741 741 self._head = node
742 742
743 743 def _addcapacity(self):
744 744 """Add a node to the circular linked list.
745 745
746 746 The new node is inserted before the head node.
747 747 """
748 748 head = self._head
749 749 node = _lrucachenode()
750 750 head.prev.next = node
751 751 node.prev = head.prev
752 752 node.next = head
753 753 head.prev = node
754 754 self._size += 1
755 755 return node
756 756
757 757 def lrucachefunc(func):
758 758 '''cache most recent results of function calls'''
759 759 cache = {}
760 760 order = collections.deque()
761 761 if func.__code__.co_argcount == 1:
762 762 def f(arg):
763 763 if arg not in cache:
764 764 if len(cache) > 20:
765 765 del cache[order.popleft()]
766 766 cache[arg] = func(arg)
767 767 else:
768 768 order.remove(arg)
769 769 order.append(arg)
770 770 return cache[arg]
771 771 else:
772 772 def f(*args):
773 773 if args not in cache:
774 774 if len(cache) > 20:
775 775 del cache[order.popleft()]
776 776 cache[args] = func(*args)
777 777 else:
778 778 order.remove(args)
779 779 order.append(args)
780 780 return cache[args]
781 781
782 782 return f
783 783
784 784 class propertycache(object):
785 785 def __init__(self, func):
786 786 self.func = func
787 787 self.name = func.__name__
788 788 def __get__(self, obj, type=None):
789 789 result = self.func(obj)
790 790 self.cachevalue(obj, result)
791 791 return result
792 792
793 793 def cachevalue(self, obj, value):
794 794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
795 795 obj.__dict__[self.name] = value
796 796
797 797 def pipefilter(s, cmd):
798 798 '''filter string S through command CMD, returning its output'''
799 799 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
800 800 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
801 801 pout, perr = p.communicate(s)
802 802 return pout
803 803
804 804 def tempfilter(s, cmd):
805 805 '''filter string S through a pair of temporary files with CMD.
806 806 CMD is used as a template to create the real command to be run,
807 807 with the strings INFILE and OUTFILE replaced by the real names of
808 808 the temporary files generated.'''
809 809 inname, outname = None, None
810 810 try:
811 811 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
812 812 fp = os.fdopen(infd, pycompat.sysstr('wb'))
813 813 fp.write(s)
814 814 fp.close()
815 815 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
816 816 os.close(outfd)
817 817 cmd = cmd.replace('INFILE', inname)
818 818 cmd = cmd.replace('OUTFILE', outname)
819 819 code = os.system(cmd)
820 820 if pycompat.sysplatform == 'OpenVMS' and code & 1:
821 821 code = 0
822 822 if code:
823 823 raise Abort(_("command '%s' failed: %s") %
824 824 (cmd, explainexit(code)))
825 825 return readfile(outname)
826 826 finally:
827 827 try:
828 828 if inname:
829 829 os.unlink(inname)
830 830 except OSError:
831 831 pass
832 832 try:
833 833 if outname:
834 834 os.unlink(outname)
835 835 except OSError:
836 836 pass
837 837
838 838 filtertable = {
839 839 'tempfile:': tempfilter,
840 840 'pipe:': pipefilter,
841 841 }
842 842
843 843 def filter(s, cmd):
844 844 "filter a string through a command that transforms its input to its output"
845 845 for name, fn in filtertable.iteritems():
846 846 if cmd.startswith(name):
847 847 return fn(s, cmd[len(name):].lstrip())
848 848 return pipefilter(s, cmd)
849 849
850 850 def binary(s):
851 851 """return true if a string is binary data"""
852 852 return bool(s and '\0' in s)
853 853
854 854 def increasingchunks(source, min=1024, max=65536):
855 855 '''return no less than min bytes per chunk while data remains,
856 856 doubling min after each chunk until it reaches max'''
857 857 def log2(x):
858 858 if not x:
859 859 return 0
860 860 i = 0
861 861 while x:
862 862 x >>= 1
863 863 i += 1
864 864 return i - 1
865 865
866 866 buf = []
867 867 blen = 0
868 868 for chunk in source:
869 869 buf.append(chunk)
870 870 blen += len(chunk)
871 871 if blen >= min:
872 872 if min < max:
873 873 min = min << 1
874 874 nmin = 1 << log2(blen)
875 875 if nmin > min:
876 876 min = nmin
877 877 if min > max:
878 878 min = max
879 879 yield ''.join(buf)
880 880 blen = 0
881 881 buf = []
882 882 if buf:
883 883 yield ''.join(buf)
884 884
885 885 Abort = error.Abort
886 886
887 887 def always(fn):
888 888 return True
889 889
890 890 def never(fn):
891 891 return False
892 892
893 893 def nogc(func):
894 894 """disable garbage collector
895 895
896 896 Python's garbage collector triggers a GC each time a certain number of
897 897 container objects (the number being defined by gc.get_threshold()) are
898 898 allocated even when marked not to be tracked by the collector. Tracking has
899 899 no effect on when GCs are triggered, only on what objects the GC looks
900 900 into. As a workaround, disable GC while building complex (huge)
901 901 containers.
902 902
903 903 This garbage collector issue have been fixed in 2.7.
904 904 """
905 905 if sys.version_info >= (2, 7):
906 906 return func
907 907 def wrapper(*args, **kwargs):
908 908 gcenabled = gc.isenabled()
909 909 gc.disable()
910 910 try:
911 911 return func(*args, **kwargs)
912 912 finally:
913 913 if gcenabled:
914 914 gc.enable()
915 915 return wrapper
916 916
917 917 def pathto(root, n1, n2):
918 918 '''return the relative path from one place to another.
919 919 root should use os.sep to separate directories
920 920 n1 should use os.sep to separate directories
921 921 n2 should use "/" to separate directories
922 922 returns an os.sep-separated path.
923 923
924 924 If n1 is a relative path, it's assumed it's
925 925 relative to root.
926 926 n2 should always be relative to root.
927 927 '''
928 928 if not n1:
929 929 return localpath(n2)
930 930 if os.path.isabs(n1):
931 931 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
932 932 return os.path.join(root, localpath(n2))
933 933 n2 = '/'.join((pconvert(root), n2))
934 934 a, b = splitpath(n1), n2.split('/')
935 935 a.reverse()
936 936 b.reverse()
937 937 while a and b and a[-1] == b[-1]:
938 938 a.pop()
939 939 b.pop()
940 940 b.reverse()
941 941 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
942 942
943 943 def mainfrozen():
944 944 """return True if we are a frozen executable.
945 945
946 946 The code supports py2exe (most common, Windows only) and tools/freeze
947 947 (portable, not much used).
948 948 """
949 949 return (safehasattr(sys, "frozen") or # new py2exe
950 950 safehasattr(sys, "importers") or # old py2exe
951 951 imp.is_frozen(u"__main__")) # tools/freeze
952 952
953 953 # the location of data files matching the source code
954 954 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
955 955 # executable version (py2exe) doesn't support __file__
956 956 datapath = os.path.dirname(pycompat.sysexecutable)
957 957 else:
958 958 datapath = os.path.dirname(__file__)
959 959
960 960 if not isinstance(datapath, bytes):
961 961 datapath = pycompat.fsencode(datapath)
962 962
963 963 i18n.setdatapath(datapath)
964 964
965 965 _hgexecutable = None
966 966
967 967 def hgexecutable():
968 968 """return location of the 'hg' executable.
969 969
970 970 Defaults to $HG or 'hg' in the search path.
971 971 """
972 972 if _hgexecutable is None:
973 973 hg = encoding.environ.get('HG')
974 974 mainmod = sys.modules['__main__']
975 975 if hg:
976 976 _sethgexecutable(hg)
977 977 elif mainfrozen():
978 978 if getattr(sys, 'frozen', None) == 'macosx_app':
979 979 # Env variable set by py2app
980 980 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
981 981 else:
982 982 _sethgexecutable(pycompat.sysexecutable)
983 983 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
984 984 _sethgexecutable(mainmod.__file__)
985 985 else:
986 986 exe = findexe('hg') or os.path.basename(sys.argv[0])
987 987 _sethgexecutable(exe)
988 988 return _hgexecutable
989 989
990 990 def _sethgexecutable(path):
991 991 """set location of the 'hg' executable"""
992 992 global _hgexecutable
993 993 _hgexecutable = path
994 994
995 995 def _isstdout(f):
996 996 fileno = getattr(f, 'fileno', None)
997 997 return fileno and fileno() == sys.__stdout__.fileno()
998 998
999 999 def shellenviron(environ=None):
1000 1000 """return environ with optional override, useful for shelling out"""
1001 1001 def py2shell(val):
1002 1002 'convert python object into string that is useful to shell'
1003 1003 if val is None or val is False:
1004 1004 return '0'
1005 1005 if val is True:
1006 1006 return '1'
1007 1007 return str(val)
1008 1008 env = dict(encoding.environ)
1009 1009 if environ:
1010 1010 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1011 1011 env['HG'] = hgexecutable()
1012 1012 return env
1013 1013
1014 1014 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
1015 1015 '''enhanced shell command execution.
1016 1016 run with environment maybe modified, maybe in different dir.
1017 1017
1018 1018 if command fails and onerr is None, return status, else raise onerr
1019 1019 object as exception.
1020 1020
1021 1021 if out is specified, it is assumed to be a file-like object that has a
1022 1022 write() method. stdout and stderr will be redirected to out.'''
1023 1023 try:
1024 1024 stdout.flush()
1025 1025 except Exception:
1026 1026 pass
1027 1027 origcmd = cmd
1028 1028 cmd = quotecommand(cmd)
1029 1029 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1030 1030 and sys.version_info[1] < 7):
1031 1031 # subprocess kludge to work around issues in half-baked Python
1032 1032 # ports, notably bichued/python:
1033 1033 if not cwd is None:
1034 1034 os.chdir(cwd)
1035 1035 rc = os.system(cmd)
1036 1036 else:
1037 1037 env = shellenviron(environ)
1038 1038 if out is None or _isstdout(out):
1039 1039 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1040 1040 env=env, cwd=cwd)
1041 1041 else:
1042 1042 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1043 1043 env=env, cwd=cwd, stdout=subprocess.PIPE,
1044 1044 stderr=subprocess.STDOUT)
1045 1045 for line in iter(proc.stdout.readline, ''):
1046 1046 out.write(line)
1047 1047 proc.wait()
1048 1048 rc = proc.returncode
1049 1049 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1050 1050 rc = 0
1051 1051 if rc and onerr:
1052 1052 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1053 1053 explainexit(rc)[0])
1054 1054 if errprefix:
1055 1055 errmsg = '%s: %s' % (errprefix, errmsg)
1056 1056 raise onerr(errmsg)
1057 1057 return rc
1058 1058
1059 1059 def checksignature(func):
1060 1060 '''wrap a function with code to check for calling errors'''
1061 1061 def check(*args, **kwargs):
1062 1062 try:
1063 1063 return func(*args, **kwargs)
1064 1064 except TypeError:
1065 1065 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1066 1066 raise error.SignatureError
1067 1067 raise
1068 1068
1069 1069 return check
1070 1070
1071 1071 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1072 1072 '''copy a file, preserving mode and optionally other stat info like
1073 1073 atime/mtime
1074 1074
1075 1075 checkambig argument is used with filestat, and is useful only if
1076 1076 destination file is guarded by any lock (e.g. repo.lock or
1077 1077 repo.wlock).
1078 1078
1079 1079 copystat and checkambig should be exclusive.
1080 1080 '''
1081 1081 assert not (copystat and checkambig)
1082 1082 oldstat = None
1083 1083 if os.path.lexists(dest):
1084 1084 if checkambig:
1085 1085 oldstat = checkambig and filestat(dest)
1086 1086 unlink(dest)
1087 1087 # hardlinks are problematic on CIFS, quietly ignore this flag
1088 1088 # until we find a way to work around it cleanly (issue4546)
1089 1089 if False and hardlink:
1090 1090 try:
1091 1091 oslink(src, dest)
1092 1092 return
1093 1093 except (IOError, OSError):
1094 1094 pass # fall back to normal copy
1095 1095 if os.path.islink(src):
1096 1096 os.symlink(os.readlink(src), dest)
1097 1097 # copytime is ignored for symlinks, but in general copytime isn't needed
1098 1098 # for them anyway
1099 1099 else:
1100 1100 try:
1101 1101 shutil.copyfile(src, dest)
1102 1102 if copystat:
1103 1103 # copystat also copies mode
1104 1104 shutil.copystat(src, dest)
1105 1105 else:
1106 1106 shutil.copymode(src, dest)
1107 1107 if oldstat and oldstat.stat:
1108 1108 newstat = filestat(dest)
1109 1109 if newstat.isambig(oldstat):
1110 1110 # stat of copied file is ambiguous to original one
1111 1111 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1112 1112 os.utime(dest, (advanced, advanced))
1113 1113 except shutil.Error as inst:
1114 1114 raise Abort(str(inst))
1115 1115
1116 1116 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1117 1117 """Copy a directory tree using hardlinks if possible."""
1118 1118 num = 0
1119 1119
1120 1120 if hardlink is None:
1121 1121 hardlink = (os.stat(src).st_dev ==
1122 1122 os.stat(os.path.dirname(dst)).st_dev)
1123 1123 if hardlink:
1124 1124 topic = _('linking')
1125 1125 else:
1126 1126 topic = _('copying')
1127 1127
1128 1128 if os.path.isdir(src):
1129 1129 os.mkdir(dst)
1130 1130 for name, kind in osutil.listdir(src):
1131 1131 srcname = os.path.join(src, name)
1132 1132 dstname = os.path.join(dst, name)
1133 1133 def nprog(t, pos):
1134 1134 if pos is not None:
1135 1135 return progress(t, pos + num)
1136 1136 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1137 1137 num += n
1138 1138 else:
1139 1139 if hardlink:
1140 1140 try:
1141 1141 oslink(src, dst)
1142 1142 except (IOError, OSError):
1143 1143 hardlink = False
1144 1144 shutil.copy(src, dst)
1145 1145 else:
1146 1146 shutil.copy(src, dst)
1147 1147 num += 1
1148 1148 progress(topic, num)
1149 1149 progress(topic, None)
1150 1150
1151 1151 return hardlink, num
1152 1152
1153 1153 _winreservednames = '''con prn aux nul
1154 1154 com1 com2 com3 com4 com5 com6 com7 com8 com9
1155 1155 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1156 1156 _winreservedchars = ':*?"<>|'
1157 1157 def checkwinfilename(path):
1158 1158 r'''Check that the base-relative path is a valid filename on Windows.
1159 1159 Returns None if the path is ok, or a UI string describing the problem.
1160 1160
1161 1161 >>> checkwinfilename("just/a/normal/path")
1162 1162 >>> checkwinfilename("foo/bar/con.xml")
1163 1163 "filename contains 'con', which is reserved on Windows"
1164 1164 >>> checkwinfilename("foo/con.xml/bar")
1165 1165 "filename contains 'con', which is reserved on Windows"
1166 1166 >>> checkwinfilename("foo/bar/xml.con")
1167 1167 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1168 1168 "filename contains 'AUX', which is reserved on Windows"
1169 1169 >>> checkwinfilename("foo/bar/bla:.txt")
1170 1170 "filename contains ':', which is reserved on Windows"
1171 1171 >>> checkwinfilename("foo/bar/b\07la.txt")
1172 1172 "filename contains '\\x07', which is invalid on Windows"
1173 1173 >>> checkwinfilename("foo/bar/bla ")
1174 1174 "filename ends with ' ', which is not allowed on Windows"
1175 1175 >>> checkwinfilename("../bar")
1176 1176 >>> checkwinfilename("foo\\")
1177 1177 "filename ends with '\\', which is invalid on Windows"
1178 1178 >>> checkwinfilename("foo\\/bar")
1179 1179 "directory name ends with '\\', which is invalid on Windows"
1180 1180 '''
1181 1181 if path.endswith('\\'):
1182 1182 return _("filename ends with '\\', which is invalid on Windows")
1183 1183 if '\\/' in path:
1184 1184 return _("directory name ends with '\\', which is invalid on Windows")
1185 1185 for n in path.replace('\\', '/').split('/'):
1186 1186 if not n:
1187 1187 continue
1188 1188 for c in n:
1189 1189 if c in _winreservedchars:
1190 1190 return _("filename contains '%s', which is reserved "
1191 1191 "on Windows") % c
1192 1192 if ord(c) <= 31:
1193 1193 return _("filename contains %r, which is invalid "
1194 1194 "on Windows") % c
1195 1195 base = n.split('.')[0]
1196 1196 if base and base.lower() in _winreservednames:
1197 1197 return _("filename contains '%s', which is reserved "
1198 1198 "on Windows") % base
1199 1199 t = n[-1]
1200 1200 if t in '. ' and n not in '..':
1201 1201 return _("filename ends with '%s', which is not allowed "
1202 1202 "on Windows") % t
1203 1203
1204 1204 if pycompat.osname == 'nt':
1205 1205 checkosfilename = checkwinfilename
1206 timer = time.clock
1206 1207 else:
1207 1208 checkosfilename = platform.checkosfilename
1209 timer = time.time
1210
1211 if safehasattr(time, "perf_counter"):
1212 timer = time.perf_counter
1208 1213
1209 1214 def makelock(info, pathname):
1210 1215 try:
1211 1216 return os.symlink(info, pathname)
1212 1217 except OSError as why:
1213 1218 if why.errno == errno.EEXIST:
1214 1219 raise
1215 1220 except AttributeError: # no symlink in os
1216 1221 pass
1217 1222
1218 1223 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1219 1224 os.write(ld, info)
1220 1225 os.close(ld)
1221 1226
1222 1227 def readlock(pathname):
1223 1228 try:
1224 1229 return os.readlink(pathname)
1225 1230 except OSError as why:
1226 1231 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1227 1232 raise
1228 1233 except AttributeError: # no symlink in os
1229 1234 pass
1230 1235 fp = posixfile(pathname)
1231 1236 r = fp.read()
1232 1237 fp.close()
1233 1238 return r
1234 1239
1235 1240 def fstat(fp):
1236 1241 '''stat file object that may not have fileno method.'''
1237 1242 try:
1238 1243 return os.fstat(fp.fileno())
1239 1244 except AttributeError:
1240 1245 return os.stat(fp.name)
1241 1246
1242 1247 # File system features
1243 1248
1244 1249 def fscasesensitive(path):
1245 1250 """
1246 1251 Return true if the given path is on a case-sensitive filesystem
1247 1252
1248 1253 Requires a path (like /foo/.hg) ending with a foldable final
1249 1254 directory component.
1250 1255 """
1251 1256 s1 = os.lstat(path)
1252 1257 d, b = os.path.split(path)
1253 1258 b2 = b.upper()
1254 1259 if b == b2:
1255 1260 b2 = b.lower()
1256 1261 if b == b2:
1257 1262 return True # no evidence against case sensitivity
1258 1263 p2 = os.path.join(d, b2)
1259 1264 try:
1260 1265 s2 = os.lstat(p2)
1261 1266 if s2 == s1:
1262 1267 return False
1263 1268 return True
1264 1269 except OSError:
1265 1270 return True
1266 1271
1267 1272 try:
1268 1273 import re2
1269 1274 _re2 = None
1270 1275 except ImportError:
1271 1276 _re2 = False
1272 1277
1273 1278 class _re(object):
1274 1279 def _checkre2(self):
1275 1280 global _re2
1276 1281 try:
1277 1282 # check if match works, see issue3964
1278 1283 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1279 1284 except ImportError:
1280 1285 _re2 = False
1281 1286
1282 1287 def compile(self, pat, flags=0):
1283 1288 '''Compile a regular expression, using re2 if possible
1284 1289
1285 1290 For best performance, use only re2-compatible regexp features. The
1286 1291 only flags from the re module that are re2-compatible are
1287 1292 IGNORECASE and MULTILINE.'''
1288 1293 if _re2 is None:
1289 1294 self._checkre2()
1290 1295 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1291 1296 if flags & remod.IGNORECASE:
1292 1297 pat = '(?i)' + pat
1293 1298 if flags & remod.MULTILINE:
1294 1299 pat = '(?m)' + pat
1295 1300 try:
1296 1301 return re2.compile(pat)
1297 1302 except re2.error:
1298 1303 pass
1299 1304 return remod.compile(pat, flags)
1300 1305
1301 1306 @propertycache
1302 1307 def escape(self):
1303 1308 '''Return the version of escape corresponding to self.compile.
1304 1309
1305 1310 This is imperfect because whether re2 or re is used for a particular
1306 1311 function depends on the flags, etc, but it's the best we can do.
1307 1312 '''
1308 1313 global _re2
1309 1314 if _re2 is None:
1310 1315 self._checkre2()
1311 1316 if _re2:
1312 1317 return re2.escape
1313 1318 else:
1314 1319 return remod.escape
1315 1320
1316 1321 re = _re()
1317 1322
1318 1323 _fspathcache = {}
1319 1324 def fspath(name, root):
1320 1325 '''Get name in the case stored in the filesystem
1321 1326
1322 1327 The name should be relative to root, and be normcase-ed for efficiency.
1323 1328
1324 1329 Note that this function is unnecessary, and should not be
1325 1330 called, for case-sensitive filesystems (simply because it's expensive).
1326 1331
1327 1332 The root should be normcase-ed, too.
1328 1333 '''
1329 1334 def _makefspathcacheentry(dir):
1330 1335 return dict((normcase(n), n) for n in os.listdir(dir))
1331 1336
1332 1337 seps = pycompat.ossep
1333 1338 if pycompat.osaltsep:
1334 1339 seps = seps + pycompat.osaltsep
1335 1340 # Protect backslashes. This gets silly very quickly.
1336 1341 seps.replace('\\','\\\\')
1337 1342 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1338 1343 dir = os.path.normpath(root)
1339 1344 result = []
1340 1345 for part, sep in pattern.findall(name):
1341 1346 if sep:
1342 1347 result.append(sep)
1343 1348 continue
1344 1349
1345 1350 if dir not in _fspathcache:
1346 1351 _fspathcache[dir] = _makefspathcacheentry(dir)
1347 1352 contents = _fspathcache[dir]
1348 1353
1349 1354 found = contents.get(part)
1350 1355 if not found:
1351 1356 # retry "once per directory" per "dirstate.walk" which
1352 1357 # may take place for each patches of "hg qpush", for example
1353 1358 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1354 1359 found = contents.get(part)
1355 1360
1356 1361 result.append(found or part)
1357 1362 dir = os.path.join(dir, part)
1358 1363
1359 1364 return ''.join(result)
1360 1365
1361 1366 def checknlink(testfile):
1362 1367 '''check whether hardlink count reporting works properly'''
1363 1368
1364 1369 # testfile may be open, so we need a separate file for checking to
1365 1370 # work around issue2543 (or testfile may get lost on Samba shares)
1366 1371 f1 = testfile + ".hgtmp1"
1367 1372 if os.path.lexists(f1):
1368 1373 return False
1369 1374 try:
1370 1375 posixfile(f1, 'w').close()
1371 1376 except IOError:
1372 1377 try:
1373 1378 os.unlink(f1)
1374 1379 except OSError:
1375 1380 pass
1376 1381 return False
1377 1382
1378 1383 f2 = testfile + ".hgtmp2"
1379 1384 fd = None
1380 1385 try:
1381 1386 oslink(f1, f2)
1382 1387 # nlinks() may behave differently for files on Windows shares if
1383 1388 # the file is open.
1384 1389 fd = posixfile(f2)
1385 1390 return nlinks(f2) > 1
1386 1391 except OSError:
1387 1392 return False
1388 1393 finally:
1389 1394 if fd is not None:
1390 1395 fd.close()
1391 1396 for f in (f1, f2):
1392 1397 try:
1393 1398 os.unlink(f)
1394 1399 except OSError:
1395 1400 pass
1396 1401
1397 1402 def endswithsep(path):
1398 1403 '''Check path ends with os.sep or os.altsep.'''
1399 1404 return (path.endswith(pycompat.ossep)
1400 1405 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1401 1406
1402 1407 def splitpath(path):
1403 1408 '''Split path by os.sep.
1404 1409 Note that this function does not use os.altsep because this is
1405 1410 an alternative of simple "xxx.split(os.sep)".
1406 1411 It is recommended to use os.path.normpath() before using this
1407 1412 function if need.'''
1408 1413 return path.split(pycompat.ossep)
1409 1414
1410 1415 def gui():
1411 1416 '''Are we running in a GUI?'''
1412 1417 if pycompat.sysplatform == 'darwin':
1413 1418 if 'SSH_CONNECTION' in encoding.environ:
1414 1419 # handle SSH access to a box where the user is logged in
1415 1420 return False
1416 1421 elif getattr(osutil, 'isgui', None):
1417 1422 # check if a CoreGraphics session is available
1418 1423 return osutil.isgui()
1419 1424 else:
1420 1425 # pure build; use a safe default
1421 1426 return True
1422 1427 else:
1423 1428 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1424 1429
1425 1430 def mktempcopy(name, emptyok=False, createmode=None):
1426 1431 """Create a temporary file with the same contents from name
1427 1432
1428 1433 The permission bits are copied from the original file.
1429 1434
1430 1435 If the temporary file is going to be truncated immediately, you
1431 1436 can use emptyok=True as an optimization.
1432 1437
1433 1438 Returns the name of the temporary file.
1434 1439 """
1435 1440 d, fn = os.path.split(name)
1436 1441 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1437 1442 os.close(fd)
1438 1443 # Temporary files are created with mode 0600, which is usually not
1439 1444 # what we want. If the original file already exists, just copy
1440 1445 # its mode. Otherwise, manually obey umask.
1441 1446 copymode(name, temp, createmode)
1442 1447 if emptyok:
1443 1448 return temp
1444 1449 try:
1445 1450 try:
1446 1451 ifp = posixfile(name, "rb")
1447 1452 except IOError as inst:
1448 1453 if inst.errno == errno.ENOENT:
1449 1454 return temp
1450 1455 if not getattr(inst, 'filename', None):
1451 1456 inst.filename = name
1452 1457 raise
1453 1458 ofp = posixfile(temp, "wb")
1454 1459 for chunk in filechunkiter(ifp):
1455 1460 ofp.write(chunk)
1456 1461 ifp.close()
1457 1462 ofp.close()
1458 1463 except: # re-raises
1459 1464 try: os.unlink(temp)
1460 1465 except OSError: pass
1461 1466 raise
1462 1467 return temp
1463 1468
1464 1469 class filestat(object):
1465 1470 """help to exactly detect change of a file
1466 1471
1467 1472 'stat' attribute is result of 'os.stat()' if specified 'path'
1468 1473 exists. Otherwise, it is None. This can avoid preparative
1469 1474 'exists()' examination on client side of this class.
1470 1475 """
1471 1476 def __init__(self, path):
1472 1477 try:
1473 1478 self.stat = os.stat(path)
1474 1479 except OSError as err:
1475 1480 if err.errno != errno.ENOENT:
1476 1481 raise
1477 1482 self.stat = None
1478 1483
1479 1484 __hash__ = object.__hash__
1480 1485
1481 1486 def __eq__(self, old):
1482 1487 try:
1483 1488 # if ambiguity between stat of new and old file is
1484 1489 # avoided, comparison of size, ctime and mtime is enough
1485 1490 # to exactly detect change of a file regardless of platform
1486 1491 return (self.stat.st_size == old.stat.st_size and
1487 1492 self.stat.st_ctime == old.stat.st_ctime and
1488 1493 self.stat.st_mtime == old.stat.st_mtime)
1489 1494 except AttributeError:
1490 1495 return False
1491 1496
1492 1497 def isambig(self, old):
1493 1498 """Examine whether new (= self) stat is ambiguous against old one
1494 1499
1495 1500 "S[N]" below means stat of a file at N-th change:
1496 1501
1497 1502 - S[n-1].ctime < S[n].ctime: can detect change of a file
1498 1503 - S[n-1].ctime == S[n].ctime
1499 1504 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1500 1505 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1501 1506 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1502 1507 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1503 1508
1504 1509 Case (*2) above means that a file was changed twice or more at
1505 1510 same time in sec (= S[n-1].ctime), and comparison of timestamp
1506 1511 is ambiguous.
1507 1512
1508 1513 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1509 1514 timestamp is ambiguous".
1510 1515
1511 1516 But advancing mtime only in case (*2) doesn't work as
1512 1517 expected, because naturally advanced S[n].mtime in case (*1)
1513 1518 might be equal to manually advanced S[n-1 or earlier].mtime.
1514 1519
1515 1520 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1516 1521 treated as ambiguous regardless of mtime, to avoid overlooking
1517 1522 by confliction between such mtime.
1518 1523
1519 1524 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1520 1525 S[n].mtime", even if size of a file isn't changed.
1521 1526 """
1522 1527 try:
1523 1528 return (self.stat.st_ctime == old.stat.st_ctime)
1524 1529 except AttributeError:
1525 1530 return False
1526 1531
1527 1532 def avoidambig(self, path, old):
1528 1533 """Change file stat of specified path to avoid ambiguity
1529 1534
1530 1535 'old' should be previous filestat of 'path'.
1531 1536
1532 1537 This skips avoiding ambiguity, if a process doesn't have
1533 1538 appropriate privileges for 'path'.
1534 1539 """
1535 1540 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1536 1541 try:
1537 1542 os.utime(path, (advanced, advanced))
1538 1543 except OSError as inst:
1539 1544 if inst.errno == errno.EPERM:
1540 1545 # utime() on the file created by another user causes EPERM,
1541 1546 # if a process doesn't have appropriate privileges
1542 1547 return
1543 1548 raise
1544 1549
1545 1550 def __ne__(self, other):
1546 1551 return not self == other
1547 1552
1548 1553 class atomictempfile(object):
1549 1554 '''writable file object that atomically updates a file
1550 1555
1551 1556 All writes will go to a temporary copy of the original file. Call
1552 1557 close() when you are done writing, and atomictempfile will rename
1553 1558 the temporary copy to the original name, making the changes
1554 1559 visible. If the object is destroyed without being closed, all your
1555 1560 writes are discarded.
1556 1561
1557 1562 checkambig argument of constructor is used with filestat, and is
1558 1563 useful only if target file is guarded by any lock (e.g. repo.lock
1559 1564 or repo.wlock).
1560 1565 '''
1561 1566 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1562 1567 self.__name = name # permanent name
1563 1568 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1564 1569 createmode=createmode)
1565 1570 self._fp = posixfile(self._tempname, mode)
1566 1571 self._checkambig = checkambig
1567 1572
1568 1573 # delegated methods
1569 1574 self.read = self._fp.read
1570 1575 self.write = self._fp.write
1571 1576 self.seek = self._fp.seek
1572 1577 self.tell = self._fp.tell
1573 1578 self.fileno = self._fp.fileno
1574 1579
1575 1580 def close(self):
1576 1581 if not self._fp.closed:
1577 1582 self._fp.close()
1578 1583 filename = localpath(self.__name)
1579 1584 oldstat = self._checkambig and filestat(filename)
1580 1585 if oldstat and oldstat.stat:
1581 1586 rename(self._tempname, filename)
1582 1587 newstat = filestat(filename)
1583 1588 if newstat.isambig(oldstat):
1584 1589 # stat of changed file is ambiguous to original one
1585 1590 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1586 1591 os.utime(filename, (advanced, advanced))
1587 1592 else:
1588 1593 rename(self._tempname, filename)
1589 1594
1590 1595 def discard(self):
1591 1596 if not self._fp.closed:
1592 1597 try:
1593 1598 os.unlink(self._tempname)
1594 1599 except OSError:
1595 1600 pass
1596 1601 self._fp.close()
1597 1602
1598 1603 def __del__(self):
1599 1604 if safehasattr(self, '_fp'): # constructor actually did something
1600 1605 self.discard()
1601 1606
1602 1607 def __enter__(self):
1603 1608 return self
1604 1609
1605 1610 def __exit__(self, exctype, excvalue, traceback):
1606 1611 if exctype is not None:
1607 1612 self.discard()
1608 1613 else:
1609 1614 self.close()
1610 1615
1611 1616 def makedirs(name, mode=None, notindexed=False):
1612 1617 """recursive directory creation with parent mode inheritance
1613 1618
1614 1619 Newly created directories are marked as "not to be indexed by
1615 1620 the content indexing service", if ``notindexed`` is specified
1616 1621 for "write" mode access.
1617 1622 """
1618 1623 try:
1619 1624 makedir(name, notindexed)
1620 1625 except OSError as err:
1621 1626 if err.errno == errno.EEXIST:
1622 1627 return
1623 1628 if err.errno != errno.ENOENT or not name:
1624 1629 raise
1625 1630 parent = os.path.dirname(os.path.abspath(name))
1626 1631 if parent == name:
1627 1632 raise
1628 1633 makedirs(parent, mode, notindexed)
1629 1634 try:
1630 1635 makedir(name, notindexed)
1631 1636 except OSError as err:
1632 1637 # Catch EEXIST to handle races
1633 1638 if err.errno == errno.EEXIST:
1634 1639 return
1635 1640 raise
1636 1641 if mode is not None:
1637 1642 os.chmod(name, mode)
1638 1643
1639 1644 def readfile(path):
1640 1645 with open(path, 'rb') as fp:
1641 1646 return fp.read()
1642 1647
1643 1648 def writefile(path, text):
1644 1649 with open(path, 'wb') as fp:
1645 1650 fp.write(text)
1646 1651
1647 1652 def appendfile(path, text):
1648 1653 with open(path, 'ab') as fp:
1649 1654 fp.write(text)
1650 1655
1651 1656 class chunkbuffer(object):
1652 1657 """Allow arbitrary sized chunks of data to be efficiently read from an
1653 1658 iterator over chunks of arbitrary size."""
1654 1659
1655 1660 def __init__(self, in_iter):
1656 1661 """in_iter is the iterator that's iterating over the input chunks.
1657 1662 targetsize is how big a buffer to try to maintain."""
1658 1663 def splitbig(chunks):
1659 1664 for chunk in chunks:
1660 1665 if len(chunk) > 2**20:
1661 1666 pos = 0
1662 1667 while pos < len(chunk):
1663 1668 end = pos + 2 ** 18
1664 1669 yield chunk[pos:end]
1665 1670 pos = end
1666 1671 else:
1667 1672 yield chunk
1668 1673 self.iter = splitbig(in_iter)
1669 1674 self._queue = collections.deque()
1670 1675 self._chunkoffset = 0
1671 1676
1672 1677 def read(self, l=None):
1673 1678 """Read L bytes of data from the iterator of chunks of data.
1674 1679 Returns less than L bytes if the iterator runs dry.
1675 1680
1676 1681 If size parameter is omitted, read everything"""
1677 1682 if l is None:
1678 1683 return ''.join(self.iter)
1679 1684
1680 1685 left = l
1681 1686 buf = []
1682 1687 queue = self._queue
1683 1688 while left > 0:
1684 1689 # refill the queue
1685 1690 if not queue:
1686 1691 target = 2**18
1687 1692 for chunk in self.iter:
1688 1693 queue.append(chunk)
1689 1694 target -= len(chunk)
1690 1695 if target <= 0:
1691 1696 break
1692 1697 if not queue:
1693 1698 break
1694 1699
1695 1700 # The easy way to do this would be to queue.popleft(), modify the
1696 1701 # chunk (if necessary), then queue.appendleft(). However, for cases
1697 1702 # where we read partial chunk content, this incurs 2 dequeue
1698 1703 # mutations and creates a new str for the remaining chunk in the
1699 1704 # queue. Our code below avoids this overhead.
1700 1705
1701 1706 chunk = queue[0]
1702 1707 chunkl = len(chunk)
1703 1708 offset = self._chunkoffset
1704 1709
1705 1710 # Use full chunk.
1706 1711 if offset == 0 and left >= chunkl:
1707 1712 left -= chunkl
1708 1713 queue.popleft()
1709 1714 buf.append(chunk)
1710 1715 # self._chunkoffset remains at 0.
1711 1716 continue
1712 1717
1713 1718 chunkremaining = chunkl - offset
1714 1719
1715 1720 # Use all of unconsumed part of chunk.
1716 1721 if left >= chunkremaining:
1717 1722 left -= chunkremaining
1718 1723 queue.popleft()
1719 1724 # offset == 0 is enabled by block above, so this won't merely
1720 1725 # copy via ``chunk[0:]``.
1721 1726 buf.append(chunk[offset:])
1722 1727 self._chunkoffset = 0
1723 1728
1724 1729 # Partial chunk needed.
1725 1730 else:
1726 1731 buf.append(chunk[offset:offset + left])
1727 1732 self._chunkoffset += left
1728 1733 left -= chunkremaining
1729 1734
1730 1735 return ''.join(buf)
1731 1736
1732 1737 def filechunkiter(f, size=131072, limit=None):
1733 1738 """Create a generator that produces the data in the file size
1734 1739 (default 131072) bytes at a time, up to optional limit (default is
1735 1740 to read all data). Chunks may be less than size bytes if the
1736 1741 chunk is the last chunk in the file, or the file is a socket or
1737 1742 some other type of file that sometimes reads less data than is
1738 1743 requested."""
1739 1744 assert size >= 0
1740 1745 assert limit is None or limit >= 0
1741 1746 while True:
1742 1747 if limit is None:
1743 1748 nbytes = size
1744 1749 else:
1745 1750 nbytes = min(limit, size)
1746 1751 s = nbytes and f.read(nbytes)
1747 1752 if not s:
1748 1753 break
1749 1754 if limit:
1750 1755 limit -= len(s)
1751 1756 yield s
1752 1757
1753 1758 def makedate(timestamp=None):
1754 1759 '''Return a unix timestamp (or the current time) as a (unixtime,
1755 1760 offset) tuple based off the local timezone.'''
1756 1761 if timestamp is None:
1757 1762 timestamp = time.time()
1758 1763 if timestamp < 0:
1759 1764 hint = _("check your clock")
1760 1765 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1761 1766 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1762 1767 datetime.datetime.fromtimestamp(timestamp))
1763 1768 tz = delta.days * 86400 + delta.seconds
1764 1769 return timestamp, tz
1765 1770
1766 1771 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1767 1772 """represent a (unixtime, offset) tuple as a localized time.
1768 1773 unixtime is seconds since the epoch, and offset is the time zone's
1769 1774 number of seconds away from UTC.
1770 1775
1771 1776 >>> datestr((0, 0))
1772 1777 'Thu Jan 01 00:00:00 1970 +0000'
1773 1778 >>> datestr((42, 0))
1774 1779 'Thu Jan 01 00:00:42 1970 +0000'
1775 1780 >>> datestr((-42, 0))
1776 1781 'Wed Dec 31 23:59:18 1969 +0000'
1777 1782 >>> datestr((0x7fffffff, 0))
1778 1783 'Tue Jan 19 03:14:07 2038 +0000'
1779 1784 >>> datestr((-0x80000000, 0))
1780 1785 'Fri Dec 13 20:45:52 1901 +0000'
1781 1786 """
1782 1787 t, tz = date or makedate()
1783 1788 if "%1" in format or "%2" in format or "%z" in format:
1784 1789 sign = (tz > 0) and "-" or "+"
1785 1790 minutes = abs(tz) // 60
1786 1791 q, r = divmod(minutes, 60)
1787 1792 format = format.replace("%z", "%1%2")
1788 1793 format = format.replace("%1", "%c%02d" % (sign, q))
1789 1794 format = format.replace("%2", "%02d" % r)
1790 1795 d = t - tz
1791 1796 if d > 0x7fffffff:
1792 1797 d = 0x7fffffff
1793 1798 elif d < -0x80000000:
1794 1799 d = -0x80000000
1795 1800 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1796 1801 # because they use the gmtime() system call which is buggy on Windows
1797 1802 # for negative values.
1798 1803 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1799 1804 s = t.strftime(format)
1800 1805 return s
1801 1806
1802 1807 def shortdate(date=None):
1803 1808 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1804 1809 return datestr(date, format='%Y-%m-%d')
1805 1810
1806 1811 def parsetimezone(s):
1807 1812 """find a trailing timezone, if any, in string, and return a
1808 1813 (offset, remainder) pair"""
1809 1814
1810 1815 if s.endswith("GMT") or s.endswith("UTC"):
1811 1816 return 0, s[:-3].rstrip()
1812 1817
1813 1818 # Unix-style timezones [+-]hhmm
1814 1819 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1815 1820 sign = (s[-5] == "+") and 1 or -1
1816 1821 hours = int(s[-4:-2])
1817 1822 minutes = int(s[-2:])
1818 1823 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1819 1824
1820 1825 # ISO8601 trailing Z
1821 1826 if s.endswith("Z") and s[-2:-1].isdigit():
1822 1827 return 0, s[:-1]
1823 1828
1824 1829 # ISO8601-style [+-]hh:mm
1825 1830 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1826 1831 s[-5:-3].isdigit() and s[-2:].isdigit()):
1827 1832 sign = (s[-6] == "+") and 1 or -1
1828 1833 hours = int(s[-5:-3])
1829 1834 minutes = int(s[-2:])
1830 1835 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1831 1836
1832 1837 return None, s
1833 1838
1834 1839 def strdate(string, format, defaults=[]):
1835 1840 """parse a localized time string and return a (unixtime, offset) tuple.
1836 1841 if the string cannot be parsed, ValueError is raised."""
1837 1842 # NOTE: unixtime = localunixtime + offset
1838 1843 offset, date = parsetimezone(string)
1839 1844
1840 1845 # add missing elements from defaults
1841 1846 usenow = False # default to using biased defaults
1842 1847 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1843 1848 found = [True for p in part if ("%"+p) in format]
1844 1849 if not found:
1845 1850 date += "@" + defaults[part][usenow]
1846 1851 format += "@%" + part[0]
1847 1852 else:
1848 1853 # We've found a specific time element, less specific time
1849 1854 # elements are relative to today
1850 1855 usenow = True
1851 1856
1852 1857 timetuple = time.strptime(date, format)
1853 1858 localunixtime = int(calendar.timegm(timetuple))
1854 1859 if offset is None:
1855 1860 # local timezone
1856 1861 unixtime = int(time.mktime(timetuple))
1857 1862 offset = unixtime - localunixtime
1858 1863 else:
1859 1864 unixtime = localunixtime + offset
1860 1865 return unixtime, offset
1861 1866
1862 1867 def parsedate(date, formats=None, bias=None):
1863 1868 """parse a localized date/time and return a (unixtime, offset) tuple.
1864 1869
1865 1870 The date may be a "unixtime offset" string or in one of the specified
1866 1871 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1867 1872
1868 1873 >>> parsedate(' today ') == parsedate(\
1869 1874 datetime.date.today().strftime('%b %d'))
1870 1875 True
1871 1876 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1872 1877 datetime.timedelta(days=1)\
1873 1878 ).strftime('%b %d'))
1874 1879 True
1875 1880 >>> now, tz = makedate()
1876 1881 >>> strnow, strtz = parsedate('now')
1877 1882 >>> (strnow - now) < 1
1878 1883 True
1879 1884 >>> tz == strtz
1880 1885 True
1881 1886 """
1882 1887 if bias is None:
1883 1888 bias = {}
1884 1889 if not date:
1885 1890 return 0, 0
1886 1891 if isinstance(date, tuple) and len(date) == 2:
1887 1892 return date
1888 1893 if not formats:
1889 1894 formats = defaultdateformats
1890 1895 date = date.strip()
1891 1896
1892 1897 if date == 'now' or date == _('now'):
1893 1898 return makedate()
1894 1899 if date == 'today' or date == _('today'):
1895 1900 date = datetime.date.today().strftime('%b %d')
1896 1901 elif date == 'yesterday' or date == _('yesterday'):
1897 1902 date = (datetime.date.today() -
1898 1903 datetime.timedelta(days=1)).strftime('%b %d')
1899 1904
1900 1905 try:
1901 1906 when, offset = map(int, date.split(' '))
1902 1907 except ValueError:
1903 1908 # fill out defaults
1904 1909 now = makedate()
1905 1910 defaults = {}
1906 1911 for part in ("d", "mb", "yY", "HI", "M", "S"):
1907 1912 # this piece is for rounding the specific end of unknowns
1908 1913 b = bias.get(part)
1909 1914 if b is None:
1910 1915 if part[0] in "HMS":
1911 1916 b = "00"
1912 1917 else:
1913 1918 b = "0"
1914 1919
1915 1920 # this piece is for matching the generic end to today's date
1916 1921 n = datestr(now, "%" + part[0])
1917 1922
1918 1923 defaults[part] = (b, n)
1919 1924
1920 1925 for format in formats:
1921 1926 try:
1922 1927 when, offset = strdate(date, format, defaults)
1923 1928 except (ValueError, OverflowError):
1924 1929 pass
1925 1930 else:
1926 1931 break
1927 1932 else:
1928 1933 raise Abort(_('invalid date: %r') % date)
1929 1934 # validate explicit (probably user-specified) date and
1930 1935 # time zone offset. values must fit in signed 32 bits for
1931 1936 # current 32-bit linux runtimes. timezones go from UTC-12
1932 1937 # to UTC+14
1933 1938 if when < -0x80000000 or when > 0x7fffffff:
1934 1939 raise Abort(_('date exceeds 32 bits: %d') % when)
1935 1940 if offset < -50400 or offset > 43200:
1936 1941 raise Abort(_('impossible time zone offset: %d') % offset)
1937 1942 return when, offset
1938 1943
1939 1944 def matchdate(date):
1940 1945 """Return a function that matches a given date match specifier
1941 1946
1942 1947 Formats include:
1943 1948
1944 1949 '{date}' match a given date to the accuracy provided
1945 1950
1946 1951 '<{date}' on or before a given date
1947 1952
1948 1953 '>{date}' on or after a given date
1949 1954
1950 1955 >>> p1 = parsedate("10:29:59")
1951 1956 >>> p2 = parsedate("10:30:00")
1952 1957 >>> p3 = parsedate("10:30:59")
1953 1958 >>> p4 = parsedate("10:31:00")
1954 1959 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1955 1960 >>> f = matchdate("10:30")
1956 1961 >>> f(p1[0])
1957 1962 False
1958 1963 >>> f(p2[0])
1959 1964 True
1960 1965 >>> f(p3[0])
1961 1966 True
1962 1967 >>> f(p4[0])
1963 1968 False
1964 1969 >>> f(p5[0])
1965 1970 False
1966 1971 """
1967 1972
1968 1973 def lower(date):
1969 1974 d = {'mb': "1", 'd': "1"}
1970 1975 return parsedate(date, extendeddateformats, d)[0]
1971 1976
1972 1977 def upper(date):
1973 1978 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1974 1979 for days in ("31", "30", "29"):
1975 1980 try:
1976 1981 d["d"] = days
1977 1982 return parsedate(date, extendeddateformats, d)[0]
1978 1983 except Abort:
1979 1984 pass
1980 1985 d["d"] = "28"
1981 1986 return parsedate(date, extendeddateformats, d)[0]
1982 1987
1983 1988 date = date.strip()
1984 1989
1985 1990 if not date:
1986 1991 raise Abort(_("dates cannot consist entirely of whitespace"))
1987 1992 elif date[0] == "<":
1988 1993 if not date[1:]:
1989 1994 raise Abort(_("invalid day spec, use '<DATE'"))
1990 1995 when = upper(date[1:])
1991 1996 return lambda x: x <= when
1992 1997 elif date[0] == ">":
1993 1998 if not date[1:]:
1994 1999 raise Abort(_("invalid day spec, use '>DATE'"))
1995 2000 when = lower(date[1:])
1996 2001 return lambda x: x >= when
1997 2002 elif date[0] == "-":
1998 2003 try:
1999 2004 days = int(date[1:])
2000 2005 except ValueError:
2001 2006 raise Abort(_("invalid day spec: %s") % date[1:])
2002 2007 if days < 0:
2003 2008 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2004 2009 % date[1:])
2005 2010 when = makedate()[0] - days * 3600 * 24
2006 2011 return lambda x: x >= when
2007 2012 elif " to " in date:
2008 2013 a, b = date.split(" to ")
2009 2014 start, stop = lower(a), upper(b)
2010 2015 return lambda x: x >= start and x <= stop
2011 2016 else:
2012 2017 start, stop = lower(date), upper(date)
2013 2018 return lambda x: x >= start and x <= stop
2014 2019
2015 2020 def stringmatcher(pattern, casesensitive=True):
2016 2021 """
2017 2022 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2018 2023 returns the matcher name, pattern, and matcher function.
2019 2024 missing or unknown prefixes are treated as literal matches.
2020 2025
2021 2026 helper for tests:
2022 2027 >>> def test(pattern, *tests):
2023 2028 ... kind, pattern, matcher = stringmatcher(pattern)
2024 2029 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2025 2030 >>> def itest(pattern, *tests):
2026 2031 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2027 2032 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2028 2033
2029 2034 exact matching (no prefix):
2030 2035 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2031 2036 ('literal', 'abcdefg', [False, False, True])
2032 2037
2033 2038 regex matching ('re:' prefix)
2034 2039 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2035 2040 ('re', 'a.+b', [False, False, True])
2036 2041
2037 2042 force exact matches ('literal:' prefix)
2038 2043 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2039 2044 ('literal', 're:foobar', [False, True])
2040 2045
2041 2046 unknown prefixes are ignored and treated as literals
2042 2047 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2043 2048 ('literal', 'foo:bar', [False, False, True])
2044 2049
2045 2050 case insensitive regex matches
2046 2051 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2047 2052 ('re', 'A.+b', [False, False, True])
2048 2053
2049 2054 case insensitive literal matches
2050 2055 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2051 2056 ('literal', 'ABCDEFG', [False, False, True])
2052 2057 """
2053 2058 if pattern.startswith('re:'):
2054 2059 pattern = pattern[3:]
2055 2060 try:
2056 2061 flags = 0
2057 2062 if not casesensitive:
2058 2063 flags = remod.I
2059 2064 regex = remod.compile(pattern, flags)
2060 2065 except remod.error as e:
2061 2066 raise error.ParseError(_('invalid regular expression: %s')
2062 2067 % e)
2063 2068 return 're', pattern, regex.search
2064 2069 elif pattern.startswith('literal:'):
2065 2070 pattern = pattern[8:]
2066 2071
2067 2072 match = pattern.__eq__
2068 2073
2069 2074 if not casesensitive:
2070 2075 ipat = encoding.lower(pattern)
2071 2076 match = lambda s: ipat == encoding.lower(s)
2072 2077 return 'literal', pattern, match
2073 2078
2074 2079 def shortuser(user):
2075 2080 """Return a short representation of a user name or email address."""
2076 2081 f = user.find('@')
2077 2082 if f >= 0:
2078 2083 user = user[:f]
2079 2084 f = user.find('<')
2080 2085 if f >= 0:
2081 2086 user = user[f + 1:]
2082 2087 f = user.find(' ')
2083 2088 if f >= 0:
2084 2089 user = user[:f]
2085 2090 f = user.find('.')
2086 2091 if f >= 0:
2087 2092 user = user[:f]
2088 2093 return user
2089 2094
2090 2095 def emailuser(user):
2091 2096 """Return the user portion of an email address."""
2092 2097 f = user.find('@')
2093 2098 if f >= 0:
2094 2099 user = user[:f]
2095 2100 f = user.find('<')
2096 2101 if f >= 0:
2097 2102 user = user[f + 1:]
2098 2103 return user
2099 2104
2100 2105 def email(author):
2101 2106 '''get email of author.'''
2102 2107 r = author.find('>')
2103 2108 if r == -1:
2104 2109 r = None
2105 2110 return author[author.find('<') + 1:r]
2106 2111
2107 2112 def ellipsis(text, maxlength=400):
2108 2113 """Trim string to at most maxlength (default: 400) columns in display."""
2109 2114 return encoding.trim(text, maxlength, ellipsis='...')
2110 2115
2111 2116 def unitcountfn(*unittable):
2112 2117 '''return a function that renders a readable count of some quantity'''
2113 2118
2114 2119 def go(count):
2115 2120 for multiplier, divisor, format in unittable:
2116 2121 if count >= divisor * multiplier:
2117 2122 return format % (count / float(divisor))
2118 2123 return unittable[-1][2] % count
2119 2124
2120 2125 return go
2121 2126
2122 2127 bytecount = unitcountfn(
2123 2128 (100, 1 << 30, _('%.0f GB')),
2124 2129 (10, 1 << 30, _('%.1f GB')),
2125 2130 (1, 1 << 30, _('%.2f GB')),
2126 2131 (100, 1 << 20, _('%.0f MB')),
2127 2132 (10, 1 << 20, _('%.1f MB')),
2128 2133 (1, 1 << 20, _('%.2f MB')),
2129 2134 (100, 1 << 10, _('%.0f KB')),
2130 2135 (10, 1 << 10, _('%.1f KB')),
2131 2136 (1, 1 << 10, _('%.2f KB')),
2132 2137 (1, 1, _('%.0f bytes')),
2133 2138 )
2134 2139
2135 2140 def uirepr(s):
2136 2141 # Avoid double backslash in Windows path repr()
2137 2142 return repr(s).replace('\\\\', '\\')
2138 2143
2139 2144 # delay import of textwrap
2140 2145 def MBTextWrapper(**kwargs):
2141 2146 class tw(textwrap.TextWrapper):
2142 2147 """
2143 2148 Extend TextWrapper for width-awareness.
2144 2149
2145 2150 Neither number of 'bytes' in any encoding nor 'characters' is
2146 2151 appropriate to calculate terminal columns for specified string.
2147 2152
2148 2153 Original TextWrapper implementation uses built-in 'len()' directly,
2149 2154 so overriding is needed to use width information of each characters.
2150 2155
2151 2156 In addition, characters classified into 'ambiguous' width are
2152 2157 treated as wide in East Asian area, but as narrow in other.
2153 2158
2154 2159 This requires use decision to determine width of such characters.
2155 2160 """
2156 2161 def _cutdown(self, ucstr, space_left):
2157 2162 l = 0
2158 2163 colwidth = encoding.ucolwidth
2159 2164 for i in xrange(len(ucstr)):
2160 2165 l += colwidth(ucstr[i])
2161 2166 if space_left < l:
2162 2167 return (ucstr[:i], ucstr[i:])
2163 2168 return ucstr, ''
2164 2169
2165 2170 # overriding of base class
2166 2171 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2167 2172 space_left = max(width - cur_len, 1)
2168 2173
2169 2174 if self.break_long_words:
2170 2175 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2171 2176 cur_line.append(cut)
2172 2177 reversed_chunks[-1] = res
2173 2178 elif not cur_line:
2174 2179 cur_line.append(reversed_chunks.pop())
2175 2180
2176 2181 # this overriding code is imported from TextWrapper of Python 2.6
2177 2182 # to calculate columns of string by 'encoding.ucolwidth()'
2178 2183 def _wrap_chunks(self, chunks):
2179 2184 colwidth = encoding.ucolwidth
2180 2185
2181 2186 lines = []
2182 2187 if self.width <= 0:
2183 2188 raise ValueError("invalid width %r (must be > 0)" % self.width)
2184 2189
2185 2190 # Arrange in reverse order so items can be efficiently popped
2186 2191 # from a stack of chucks.
2187 2192 chunks.reverse()
2188 2193
2189 2194 while chunks:
2190 2195
2191 2196 # Start the list of chunks that will make up the current line.
2192 2197 # cur_len is just the length of all the chunks in cur_line.
2193 2198 cur_line = []
2194 2199 cur_len = 0
2195 2200
2196 2201 # Figure out which static string will prefix this line.
2197 2202 if lines:
2198 2203 indent = self.subsequent_indent
2199 2204 else:
2200 2205 indent = self.initial_indent
2201 2206
2202 2207 # Maximum width for this line.
2203 2208 width = self.width - len(indent)
2204 2209
2205 2210 # First chunk on line is whitespace -- drop it, unless this
2206 2211 # is the very beginning of the text (i.e. no lines started yet).
2207 2212 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2208 2213 del chunks[-1]
2209 2214
2210 2215 while chunks:
2211 2216 l = colwidth(chunks[-1])
2212 2217
2213 2218 # Can at least squeeze this chunk onto the current line.
2214 2219 if cur_len + l <= width:
2215 2220 cur_line.append(chunks.pop())
2216 2221 cur_len += l
2217 2222
2218 2223 # Nope, this line is full.
2219 2224 else:
2220 2225 break
2221 2226
2222 2227 # The current line is full, and the next chunk is too big to
2223 2228 # fit on *any* line (not just this one).
2224 2229 if chunks and colwidth(chunks[-1]) > width:
2225 2230 self._handle_long_word(chunks, cur_line, cur_len, width)
2226 2231
2227 2232 # If the last chunk on this line is all whitespace, drop it.
2228 2233 if (self.drop_whitespace and
2229 2234 cur_line and cur_line[-1].strip() == ''):
2230 2235 del cur_line[-1]
2231 2236
2232 2237 # Convert current line back to a string and store it in list
2233 2238 # of all lines (return value).
2234 2239 if cur_line:
2235 2240 lines.append(indent + ''.join(cur_line))
2236 2241
2237 2242 return lines
2238 2243
2239 2244 global MBTextWrapper
2240 2245 MBTextWrapper = tw
2241 2246 return tw(**kwargs)
2242 2247
2243 2248 def wrap(line, width, initindent='', hangindent=''):
2244 2249 maxindent = max(len(hangindent), len(initindent))
2245 2250 if width <= maxindent:
2246 2251 # adjust for weird terminal size
2247 2252 width = max(78, maxindent + 1)
2248 2253 line = line.decode(encoding.encoding, encoding.encodingmode)
2249 2254 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2250 2255 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2251 2256 wrapper = MBTextWrapper(width=width,
2252 2257 initial_indent=initindent,
2253 2258 subsequent_indent=hangindent)
2254 2259 return wrapper.fill(line).encode(encoding.encoding)
2255 2260
2256 2261 if (pyplatform.python_implementation() == 'CPython' and
2257 2262 sys.version_info < (3, 0)):
2258 2263 # There is an issue in CPython that some IO methods do not handle EINTR
2259 2264 # correctly. The following table shows what CPython version (and functions)
2260 2265 # are affected (buggy: has the EINTR bug, okay: otherwise):
2261 2266 #
2262 2267 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2263 2268 # --------------------------------------------------
2264 2269 # fp.__iter__ | buggy | buggy | okay
2265 2270 # fp.read* | buggy | okay [1] | okay
2266 2271 #
2267 2272 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2268 2273 #
2269 2274 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2270 2275 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2271 2276 #
2272 2277 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2273 2278 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2274 2279 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2275 2280 # fp.__iter__ but not other fp.read* methods.
2276 2281 #
2277 2282 # On modern systems like Linux, the "read" syscall cannot be interrupted
2278 2283 # when reading "fast" files like on-disk files. So the EINTR issue only
2279 2284 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2280 2285 # files approximately as "fast" files and use the fast (unsafe) code path,
2281 2286 # to minimize the performance impact.
2282 2287 if sys.version_info >= (2, 7, 4):
2283 2288 # fp.readline deals with EINTR correctly, use it as a workaround.
2284 2289 def _safeiterfile(fp):
2285 2290 return iter(fp.readline, '')
2286 2291 else:
2287 2292 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2288 2293 # note: this may block longer than necessary because of bufsize.
2289 2294 def _safeiterfile(fp, bufsize=4096):
2290 2295 fd = fp.fileno()
2291 2296 line = ''
2292 2297 while True:
2293 2298 try:
2294 2299 buf = os.read(fd, bufsize)
2295 2300 except OSError as ex:
2296 2301 # os.read only raises EINTR before any data is read
2297 2302 if ex.errno == errno.EINTR:
2298 2303 continue
2299 2304 else:
2300 2305 raise
2301 2306 line += buf
2302 2307 if '\n' in buf:
2303 2308 splitted = line.splitlines(True)
2304 2309 line = ''
2305 2310 for l in splitted:
2306 2311 if l[-1] == '\n':
2307 2312 yield l
2308 2313 else:
2309 2314 line = l
2310 2315 if not buf:
2311 2316 break
2312 2317 if line:
2313 2318 yield line
2314 2319
2315 2320 def iterfile(fp):
2316 2321 fastpath = True
2317 2322 if type(fp) is file:
2318 2323 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2319 2324 if fastpath:
2320 2325 return fp
2321 2326 else:
2322 2327 return _safeiterfile(fp)
2323 2328 else:
2324 2329 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2325 2330 def iterfile(fp):
2326 2331 return fp
2327 2332
2328 2333 def iterlines(iterator):
2329 2334 for chunk in iterator:
2330 2335 for line in chunk.splitlines():
2331 2336 yield line
2332 2337
2333 2338 def expandpath(path):
2334 2339 return os.path.expanduser(os.path.expandvars(path))
2335 2340
2336 2341 def hgcmd():
2337 2342 """Return the command used to execute current hg
2338 2343
2339 2344 This is different from hgexecutable() because on Windows we want
2340 2345 to avoid things opening new shell windows like batch files, so we
2341 2346 get either the python call or current executable.
2342 2347 """
2343 2348 if mainfrozen():
2344 2349 if getattr(sys, 'frozen', None) == 'macosx_app':
2345 2350 # Env variable set by py2app
2346 2351 return [encoding.environ['EXECUTABLEPATH']]
2347 2352 else:
2348 2353 return [pycompat.sysexecutable]
2349 2354 return gethgcmd()
2350 2355
2351 2356 def rundetached(args, condfn):
2352 2357 """Execute the argument list in a detached process.
2353 2358
2354 2359 condfn is a callable which is called repeatedly and should return
2355 2360 True once the child process is known to have started successfully.
2356 2361 At this point, the child process PID is returned. If the child
2357 2362 process fails to start or finishes before condfn() evaluates to
2358 2363 True, return -1.
2359 2364 """
2360 2365 # Windows case is easier because the child process is either
2361 2366 # successfully starting and validating the condition or exiting
2362 2367 # on failure. We just poll on its PID. On Unix, if the child
2363 2368 # process fails to start, it will be left in a zombie state until
2364 2369 # the parent wait on it, which we cannot do since we expect a long
2365 2370 # running process on success. Instead we listen for SIGCHLD telling
2366 2371 # us our child process terminated.
2367 2372 terminated = set()
2368 2373 def handler(signum, frame):
2369 2374 terminated.add(os.wait())
2370 2375 prevhandler = None
2371 2376 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2372 2377 if SIGCHLD is not None:
2373 2378 prevhandler = signal.signal(SIGCHLD, handler)
2374 2379 try:
2375 2380 pid = spawndetached(args)
2376 2381 while not condfn():
2377 2382 if ((pid in terminated or not testpid(pid))
2378 2383 and not condfn()):
2379 2384 return -1
2380 2385 time.sleep(0.1)
2381 2386 return pid
2382 2387 finally:
2383 2388 if prevhandler is not None:
2384 2389 signal.signal(signal.SIGCHLD, prevhandler)
2385 2390
2386 2391 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2387 2392 """Return the result of interpolating items in the mapping into string s.
2388 2393
2389 2394 prefix is a single character string, or a two character string with
2390 2395 a backslash as the first character if the prefix needs to be escaped in
2391 2396 a regular expression.
2392 2397
2393 2398 fn is an optional function that will be applied to the replacement text
2394 2399 just before replacement.
2395 2400
2396 2401 escape_prefix is an optional flag that allows using doubled prefix for
2397 2402 its escaping.
2398 2403 """
2399 2404 fn = fn or (lambda s: s)
2400 2405 patterns = '|'.join(mapping.keys())
2401 2406 if escape_prefix:
2402 2407 patterns += '|' + prefix
2403 2408 if len(prefix) > 1:
2404 2409 prefix_char = prefix[1:]
2405 2410 else:
2406 2411 prefix_char = prefix
2407 2412 mapping[prefix_char] = prefix_char
2408 2413 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2409 2414 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2410 2415
2411 2416 def getport(port):
2412 2417 """Return the port for a given network service.
2413 2418
2414 2419 If port is an integer, it's returned as is. If it's a string, it's
2415 2420 looked up using socket.getservbyname(). If there's no matching
2416 2421 service, error.Abort is raised.
2417 2422 """
2418 2423 try:
2419 2424 return int(port)
2420 2425 except ValueError:
2421 2426 pass
2422 2427
2423 2428 try:
2424 2429 return socket.getservbyname(port)
2425 2430 except socket.error:
2426 2431 raise Abort(_("no port number associated with service '%s'") % port)
2427 2432
2428 2433 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2429 2434 '0': False, 'no': False, 'false': False, 'off': False,
2430 2435 'never': False}
2431 2436
2432 2437 def parsebool(s):
2433 2438 """Parse s into a boolean.
2434 2439
2435 2440 If s is not a valid boolean, returns None.
2436 2441 """
2437 2442 return _booleans.get(s.lower(), None)
2438 2443
2439 2444 _hextochr = dict((a + b, chr(int(a + b, 16)))
2440 2445 for a in string.hexdigits for b in string.hexdigits)
2441 2446
2442 2447 class url(object):
2443 2448 r"""Reliable URL parser.
2444 2449
2445 2450 This parses URLs and provides attributes for the following
2446 2451 components:
2447 2452
2448 2453 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2449 2454
2450 2455 Missing components are set to None. The only exception is
2451 2456 fragment, which is set to '' if present but empty.
2452 2457
2453 2458 If parsefragment is False, fragment is included in query. If
2454 2459 parsequery is False, query is included in path. If both are
2455 2460 False, both fragment and query are included in path.
2456 2461
2457 2462 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2458 2463
2459 2464 Note that for backward compatibility reasons, bundle URLs do not
2460 2465 take host names. That means 'bundle://../' has a path of '../'.
2461 2466
2462 2467 Examples:
2463 2468
2464 2469 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2465 2470 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2466 2471 >>> url('ssh://[::1]:2200//home/joe/repo')
2467 2472 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2468 2473 >>> url('file:///home/joe/repo')
2469 2474 <url scheme: 'file', path: '/home/joe/repo'>
2470 2475 >>> url('file:///c:/temp/foo/')
2471 2476 <url scheme: 'file', path: 'c:/temp/foo/'>
2472 2477 >>> url('bundle:foo')
2473 2478 <url scheme: 'bundle', path: 'foo'>
2474 2479 >>> url('bundle://../foo')
2475 2480 <url scheme: 'bundle', path: '../foo'>
2476 2481 >>> url(r'c:\foo\bar')
2477 2482 <url path: 'c:\\foo\\bar'>
2478 2483 >>> url(r'\\blah\blah\blah')
2479 2484 <url path: '\\\\blah\\blah\\blah'>
2480 2485 >>> url(r'\\blah\blah\blah#baz')
2481 2486 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2482 2487 >>> url(r'file:///C:\users\me')
2483 2488 <url scheme: 'file', path: 'C:\\users\\me'>
2484 2489
2485 2490 Authentication credentials:
2486 2491
2487 2492 >>> url('ssh://joe:xyz@x/repo')
2488 2493 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2489 2494 >>> url('ssh://joe@x/repo')
2490 2495 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2491 2496
2492 2497 Query strings and fragments:
2493 2498
2494 2499 >>> url('http://host/a?b#c')
2495 2500 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2496 2501 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2497 2502 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2498 2503
2499 2504 Empty path:
2500 2505
2501 2506 >>> url('')
2502 2507 <url path: ''>
2503 2508 >>> url('#a')
2504 2509 <url path: '', fragment: 'a'>
2505 2510 >>> url('http://host/')
2506 2511 <url scheme: 'http', host: 'host', path: ''>
2507 2512 >>> url('http://host/#a')
2508 2513 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2509 2514
2510 2515 Only scheme:
2511 2516
2512 2517 >>> url('http:')
2513 2518 <url scheme: 'http'>
2514 2519 """
2515 2520
2516 2521 _safechars = "!~*'()+"
2517 2522 _safepchars = "/!~*'()+:\\"
2518 2523 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2519 2524
2520 2525 def __init__(self, path, parsequery=True, parsefragment=True):
2521 2526 # We slowly chomp away at path until we have only the path left
2522 2527 self.scheme = self.user = self.passwd = self.host = None
2523 2528 self.port = self.path = self.query = self.fragment = None
2524 2529 self._localpath = True
2525 2530 self._hostport = ''
2526 2531 self._origpath = path
2527 2532
2528 2533 if parsefragment and '#' in path:
2529 2534 path, self.fragment = path.split('#', 1)
2530 2535
2531 2536 # special case for Windows drive letters and UNC paths
2532 2537 if hasdriveletter(path) or path.startswith('\\\\'):
2533 2538 self.path = path
2534 2539 return
2535 2540
2536 2541 # For compatibility reasons, we can't handle bundle paths as
2537 2542 # normal URLS
2538 2543 if path.startswith('bundle:'):
2539 2544 self.scheme = 'bundle'
2540 2545 path = path[7:]
2541 2546 if path.startswith('//'):
2542 2547 path = path[2:]
2543 2548 self.path = path
2544 2549 return
2545 2550
2546 2551 if self._matchscheme(path):
2547 2552 parts = path.split(':', 1)
2548 2553 if parts[0]:
2549 2554 self.scheme, path = parts
2550 2555 self._localpath = False
2551 2556
2552 2557 if not path:
2553 2558 path = None
2554 2559 if self._localpath:
2555 2560 self.path = ''
2556 2561 return
2557 2562 else:
2558 2563 if self._localpath:
2559 2564 self.path = path
2560 2565 return
2561 2566
2562 2567 if parsequery and '?' in path:
2563 2568 path, self.query = path.split('?', 1)
2564 2569 if not path:
2565 2570 path = None
2566 2571 if not self.query:
2567 2572 self.query = None
2568 2573
2569 2574 # // is required to specify a host/authority
2570 2575 if path and path.startswith('//'):
2571 2576 parts = path[2:].split('/', 1)
2572 2577 if len(parts) > 1:
2573 2578 self.host, path = parts
2574 2579 else:
2575 2580 self.host = parts[0]
2576 2581 path = None
2577 2582 if not self.host:
2578 2583 self.host = None
2579 2584 # path of file:///d is /d
2580 2585 # path of file:///d:/ is d:/, not /d:/
2581 2586 if path and not hasdriveletter(path):
2582 2587 path = '/' + path
2583 2588
2584 2589 if self.host and '@' in self.host:
2585 2590 self.user, self.host = self.host.rsplit('@', 1)
2586 2591 if ':' in self.user:
2587 2592 self.user, self.passwd = self.user.split(':', 1)
2588 2593 if not self.host:
2589 2594 self.host = None
2590 2595
2591 2596 # Don't split on colons in IPv6 addresses without ports
2592 2597 if (self.host and ':' in self.host and
2593 2598 not (self.host.startswith('[') and self.host.endswith(']'))):
2594 2599 self._hostport = self.host
2595 2600 self.host, self.port = self.host.rsplit(':', 1)
2596 2601 if not self.host:
2597 2602 self.host = None
2598 2603
2599 2604 if (self.host and self.scheme == 'file' and
2600 2605 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2601 2606 raise Abort(_('file:// URLs can only refer to localhost'))
2602 2607
2603 2608 self.path = path
2604 2609
2605 2610 # leave the query string escaped
2606 2611 for a in ('user', 'passwd', 'host', 'port',
2607 2612 'path', 'fragment'):
2608 2613 v = getattr(self, a)
2609 2614 if v is not None:
2610 2615 setattr(self, a, pycompat.urlunquote(v))
2611 2616
2612 2617 def __repr__(self):
2613 2618 attrs = []
2614 2619 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2615 2620 'query', 'fragment'):
2616 2621 v = getattr(self, a)
2617 2622 if v is not None:
2618 2623 attrs.append('%s: %r' % (a, v))
2619 2624 return '<url %s>' % ', '.join(attrs)
2620 2625
2621 2626 def __str__(self):
2622 2627 r"""Join the URL's components back into a URL string.
2623 2628
2624 2629 Examples:
2625 2630
2626 2631 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2627 2632 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2628 2633 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2629 2634 'http://user:pw@host:80/?foo=bar&baz=42'
2630 2635 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2631 2636 'http://user:pw@host:80/?foo=bar%3dbaz'
2632 2637 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2633 2638 'ssh://user:pw@[::1]:2200//home/joe#'
2634 2639 >>> str(url('http://localhost:80//'))
2635 2640 'http://localhost:80//'
2636 2641 >>> str(url('http://localhost:80/'))
2637 2642 'http://localhost:80/'
2638 2643 >>> str(url('http://localhost:80'))
2639 2644 'http://localhost:80/'
2640 2645 >>> str(url('bundle:foo'))
2641 2646 'bundle:foo'
2642 2647 >>> str(url('bundle://../foo'))
2643 2648 'bundle:../foo'
2644 2649 >>> str(url('path'))
2645 2650 'path'
2646 2651 >>> str(url('file:///tmp/foo/bar'))
2647 2652 'file:///tmp/foo/bar'
2648 2653 >>> str(url('file:///c:/tmp/foo/bar'))
2649 2654 'file:///c:/tmp/foo/bar'
2650 2655 >>> print url(r'bundle:foo\bar')
2651 2656 bundle:foo\bar
2652 2657 >>> print url(r'file:///D:\data\hg')
2653 2658 file:///D:\data\hg
2654 2659 """
2655 2660 if self._localpath:
2656 2661 s = self.path
2657 2662 if self.scheme == 'bundle':
2658 2663 s = 'bundle:' + s
2659 2664 if self.fragment:
2660 2665 s += '#' + self.fragment
2661 2666 return s
2662 2667
2663 2668 s = self.scheme + ':'
2664 2669 if self.user or self.passwd or self.host:
2665 2670 s += '//'
2666 2671 elif self.scheme and (not self.path or self.path.startswith('/')
2667 2672 or hasdriveletter(self.path)):
2668 2673 s += '//'
2669 2674 if hasdriveletter(self.path):
2670 2675 s += '/'
2671 2676 if self.user:
2672 2677 s += urlreq.quote(self.user, safe=self._safechars)
2673 2678 if self.passwd:
2674 2679 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2675 2680 if self.user or self.passwd:
2676 2681 s += '@'
2677 2682 if self.host:
2678 2683 if not (self.host.startswith('[') and self.host.endswith(']')):
2679 2684 s += urlreq.quote(self.host)
2680 2685 else:
2681 2686 s += self.host
2682 2687 if self.port:
2683 2688 s += ':' + urlreq.quote(self.port)
2684 2689 if self.host:
2685 2690 s += '/'
2686 2691 if self.path:
2687 2692 # TODO: similar to the query string, we should not unescape the
2688 2693 # path when we store it, the path might contain '%2f' = '/',
2689 2694 # which we should *not* escape.
2690 2695 s += urlreq.quote(self.path, safe=self._safepchars)
2691 2696 if self.query:
2692 2697 # we store the query in escaped form.
2693 2698 s += '?' + self.query
2694 2699 if self.fragment is not None:
2695 2700 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2696 2701 return s
2697 2702
2698 2703 def authinfo(self):
2699 2704 user, passwd = self.user, self.passwd
2700 2705 try:
2701 2706 self.user, self.passwd = None, None
2702 2707 s = str(self)
2703 2708 finally:
2704 2709 self.user, self.passwd = user, passwd
2705 2710 if not self.user:
2706 2711 return (s, None)
2707 2712 # authinfo[1] is passed to urllib2 password manager, and its
2708 2713 # URIs must not contain credentials. The host is passed in the
2709 2714 # URIs list because Python < 2.4.3 uses only that to search for
2710 2715 # a password.
2711 2716 return (s, (None, (s, self.host),
2712 2717 self.user, self.passwd or ''))
2713 2718
2714 2719 def isabs(self):
2715 2720 if self.scheme and self.scheme != 'file':
2716 2721 return True # remote URL
2717 2722 if hasdriveletter(self.path):
2718 2723 return True # absolute for our purposes - can't be joined()
2719 2724 if self.path.startswith(r'\\'):
2720 2725 return True # Windows UNC path
2721 2726 if self.path.startswith('/'):
2722 2727 return True # POSIX-style
2723 2728 return False
2724 2729
2725 2730 def localpath(self):
2726 2731 if self.scheme == 'file' or self.scheme == 'bundle':
2727 2732 path = self.path or '/'
2728 2733 # For Windows, we need to promote hosts containing drive
2729 2734 # letters to paths with drive letters.
2730 2735 if hasdriveletter(self._hostport):
2731 2736 path = self._hostport + '/' + self.path
2732 2737 elif (self.host is not None and self.path
2733 2738 and not hasdriveletter(path)):
2734 2739 path = '/' + path
2735 2740 return path
2736 2741 return self._origpath
2737 2742
2738 2743 def islocal(self):
2739 2744 '''whether localpath will return something that posixfile can open'''
2740 2745 return (not self.scheme or self.scheme == 'file'
2741 2746 or self.scheme == 'bundle')
2742 2747
2743 2748 def hasscheme(path):
2744 2749 return bool(url(path).scheme)
2745 2750
2746 2751 def hasdriveletter(path):
2747 2752 return path and path[1:2] == ':' and path[0:1].isalpha()
2748 2753
2749 2754 def urllocalpath(path):
2750 2755 return url(path, parsequery=False, parsefragment=False).localpath()
2751 2756
2752 2757 def hidepassword(u):
2753 2758 '''hide user credential in a url string'''
2754 2759 u = url(u)
2755 2760 if u.passwd:
2756 2761 u.passwd = '***'
2757 2762 return str(u)
2758 2763
2759 2764 def removeauth(u):
2760 2765 '''remove all authentication information from a url string'''
2761 2766 u = url(u)
2762 2767 u.user = u.passwd = None
2763 2768 return str(u)
2764 2769
2765 2770 timecount = unitcountfn(
2766 2771 (1, 1e3, _('%.0f s')),
2767 2772 (100, 1, _('%.1f s')),
2768 2773 (10, 1, _('%.2f s')),
2769 2774 (1, 1, _('%.3f s')),
2770 2775 (100, 0.001, _('%.1f ms')),
2771 2776 (10, 0.001, _('%.2f ms')),
2772 2777 (1, 0.001, _('%.3f ms')),
2773 2778 (100, 0.000001, _('%.1f us')),
2774 2779 (10, 0.000001, _('%.2f us')),
2775 2780 (1, 0.000001, _('%.3f us')),
2776 2781 (100, 0.000000001, _('%.1f ns')),
2777 2782 (10, 0.000000001, _('%.2f ns')),
2778 2783 (1, 0.000000001, _('%.3f ns')),
2779 2784 )
2780 2785
2781 2786 _timenesting = [0]
2782 2787
2783 2788 def timed(func):
2784 2789 '''Report the execution time of a function call to stderr.
2785 2790
2786 2791 During development, use as a decorator when you need to measure
2787 2792 the cost of a function, e.g. as follows:
2788 2793
2789 2794 @util.timed
2790 2795 def foo(a, b, c):
2791 2796 pass
2792 2797 '''
2793 2798
2794 2799 def wrapper(*args, **kwargs):
2795 2800 start = time.time()
2796 2801 indent = 2
2797 2802 _timenesting[0] += indent
2798 2803 try:
2799 2804 return func(*args, **kwargs)
2800 2805 finally:
2801 2806 elapsed = time.time() - start
2802 2807 _timenesting[0] -= indent
2803 2808 stderr.write('%s%s: %s\n' %
2804 2809 (' ' * _timenesting[0], func.__name__,
2805 2810 timecount(elapsed)))
2806 2811 return wrapper
2807 2812
2808 2813 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2809 2814 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2810 2815
2811 2816 def sizetoint(s):
2812 2817 '''Convert a space specifier to a byte count.
2813 2818
2814 2819 >>> sizetoint('30')
2815 2820 30
2816 2821 >>> sizetoint('2.2kb')
2817 2822 2252
2818 2823 >>> sizetoint('6M')
2819 2824 6291456
2820 2825 '''
2821 2826 t = s.strip().lower()
2822 2827 try:
2823 2828 for k, u in _sizeunits:
2824 2829 if t.endswith(k):
2825 2830 return int(float(t[:-len(k)]) * u)
2826 2831 return int(t)
2827 2832 except ValueError:
2828 2833 raise error.ParseError(_("couldn't parse size: %s") % s)
2829 2834
2830 2835 class hooks(object):
2831 2836 '''A collection of hook functions that can be used to extend a
2832 2837 function's behavior. Hooks are called in lexicographic order,
2833 2838 based on the names of their sources.'''
2834 2839
2835 2840 def __init__(self):
2836 2841 self._hooks = []
2837 2842
2838 2843 def add(self, source, hook):
2839 2844 self._hooks.append((source, hook))
2840 2845
2841 2846 def __call__(self, *args):
2842 2847 self._hooks.sort(key=lambda x: x[0])
2843 2848 results = []
2844 2849 for source, hook in self._hooks:
2845 2850 results.append(hook(*args))
2846 2851 return results
2847 2852
2848 2853 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2849 2854 '''Yields lines for a nicely formatted stacktrace.
2850 2855 Skips the 'skip' last entries.
2851 2856 Each file+linenumber is formatted according to fileline.
2852 2857 Each line is formatted according to line.
2853 2858 If line is None, it yields:
2854 2859 length of longest filepath+line number,
2855 2860 filepath+linenumber,
2856 2861 function
2857 2862
2858 2863 Not be used in production code but very convenient while developing.
2859 2864 '''
2860 2865 entries = [(fileline % (fn, ln), func)
2861 2866 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2862 2867 if entries:
2863 2868 fnmax = max(len(entry[0]) for entry in entries)
2864 2869 for fnln, func in entries:
2865 2870 if line is None:
2866 2871 yield (fnmax, fnln, func)
2867 2872 else:
2868 2873 yield line % (fnmax, fnln, func)
2869 2874
2870 2875 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2871 2876 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2872 2877 Skips the 'skip' last entries. By default it will flush stdout first.
2873 2878 It can be used everywhere and intentionally does not require an ui object.
2874 2879 Not be used in production code but very convenient while developing.
2875 2880 '''
2876 2881 if otherf:
2877 2882 otherf.flush()
2878 2883 f.write('%s at:\n' % msg)
2879 2884 for line in getstackframes(skip + 1):
2880 2885 f.write(line)
2881 2886 f.flush()
2882 2887
2883 2888 class dirs(object):
2884 2889 '''a multiset of directory names from a dirstate or manifest'''
2885 2890
2886 2891 def __init__(self, map, skip=None):
2887 2892 self._dirs = {}
2888 2893 addpath = self.addpath
2889 2894 if safehasattr(map, 'iteritems') and skip is not None:
2890 2895 for f, s in map.iteritems():
2891 2896 if s[0] != skip:
2892 2897 addpath(f)
2893 2898 else:
2894 2899 for f in map:
2895 2900 addpath(f)
2896 2901
2897 2902 def addpath(self, path):
2898 2903 dirs = self._dirs
2899 2904 for base in finddirs(path):
2900 2905 if base in dirs:
2901 2906 dirs[base] += 1
2902 2907 return
2903 2908 dirs[base] = 1
2904 2909
2905 2910 def delpath(self, path):
2906 2911 dirs = self._dirs
2907 2912 for base in finddirs(path):
2908 2913 if dirs[base] > 1:
2909 2914 dirs[base] -= 1
2910 2915 return
2911 2916 del dirs[base]
2912 2917
2913 2918 def __iter__(self):
2914 2919 return self._dirs.iterkeys()
2915 2920
2916 2921 def __contains__(self, d):
2917 2922 return d in self._dirs
2918 2923
2919 2924 if safehasattr(parsers, 'dirs'):
2920 2925 dirs = parsers.dirs
2921 2926
2922 2927 def finddirs(path):
2923 2928 pos = path.rfind('/')
2924 2929 while pos != -1:
2925 2930 yield path[:pos]
2926 2931 pos = path.rfind('/', 0, pos)
2927 2932
2928 2933 class ctxmanager(object):
2929 2934 '''A context manager for use in 'with' blocks to allow multiple
2930 2935 contexts to be entered at once. This is both safer and more
2931 2936 flexible than contextlib.nested.
2932 2937
2933 2938 Once Mercurial supports Python 2.7+, this will become mostly
2934 2939 unnecessary.
2935 2940 '''
2936 2941
2937 2942 def __init__(self, *args):
2938 2943 '''Accepts a list of no-argument functions that return context
2939 2944 managers. These will be invoked at __call__ time.'''
2940 2945 self._pending = args
2941 2946 self._atexit = []
2942 2947
2943 2948 def __enter__(self):
2944 2949 return self
2945 2950
2946 2951 def enter(self):
2947 2952 '''Create and enter context managers in the order in which they were
2948 2953 passed to the constructor.'''
2949 2954 values = []
2950 2955 for func in self._pending:
2951 2956 obj = func()
2952 2957 values.append(obj.__enter__())
2953 2958 self._atexit.append(obj.__exit__)
2954 2959 del self._pending
2955 2960 return values
2956 2961
2957 2962 def atexit(self, func, *args, **kwargs):
2958 2963 '''Add a function to call when this context manager exits. The
2959 2964 ordering of multiple atexit calls is unspecified, save that
2960 2965 they will happen before any __exit__ functions.'''
2961 2966 def wrapper(exc_type, exc_val, exc_tb):
2962 2967 func(*args, **kwargs)
2963 2968 self._atexit.append(wrapper)
2964 2969 return func
2965 2970
2966 2971 def __exit__(self, exc_type, exc_val, exc_tb):
2967 2972 '''Context managers are exited in the reverse order from which
2968 2973 they were created.'''
2969 2974 received = exc_type is not None
2970 2975 suppressed = False
2971 2976 pending = None
2972 2977 self._atexit.reverse()
2973 2978 for exitfunc in self._atexit:
2974 2979 try:
2975 2980 if exitfunc(exc_type, exc_val, exc_tb):
2976 2981 suppressed = True
2977 2982 exc_type = None
2978 2983 exc_val = None
2979 2984 exc_tb = None
2980 2985 except BaseException:
2981 2986 pending = sys.exc_info()
2982 2987 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2983 2988 del self._atexit
2984 2989 if pending:
2985 2990 raise exc_val
2986 2991 return received and suppressed
2987 2992
2988 2993 # compression code
2989 2994
2990 2995 SERVERROLE = 'server'
2991 2996 CLIENTROLE = 'client'
2992 2997
2993 2998 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
2994 2999 (u'name', u'serverpriority',
2995 3000 u'clientpriority'))
2996 3001
2997 3002 class compressormanager(object):
2998 3003 """Holds registrations of various compression engines.
2999 3004
3000 3005 This class essentially abstracts the differences between compression
3001 3006 engines to allow new compression formats to be added easily, possibly from
3002 3007 extensions.
3003 3008
3004 3009 Compressors are registered against the global instance by calling its
3005 3010 ``register()`` method.
3006 3011 """
3007 3012 def __init__(self):
3008 3013 self._engines = {}
3009 3014 # Bundle spec human name to engine name.
3010 3015 self._bundlenames = {}
3011 3016 # Internal bundle identifier to engine name.
3012 3017 self._bundletypes = {}
3013 3018 # Revlog header to engine name.
3014 3019 self._revlogheaders = {}
3015 3020 # Wire proto identifier to engine name.
3016 3021 self._wiretypes = {}
3017 3022
3018 3023 def __getitem__(self, key):
3019 3024 return self._engines[key]
3020 3025
3021 3026 def __contains__(self, key):
3022 3027 return key in self._engines
3023 3028
3024 3029 def __iter__(self):
3025 3030 return iter(self._engines.keys())
3026 3031
3027 3032 def register(self, engine):
3028 3033 """Register a compression engine with the manager.
3029 3034
3030 3035 The argument must be a ``compressionengine`` instance.
3031 3036 """
3032 3037 if not isinstance(engine, compressionengine):
3033 3038 raise ValueError(_('argument must be a compressionengine'))
3034 3039
3035 3040 name = engine.name()
3036 3041
3037 3042 if name in self._engines:
3038 3043 raise error.Abort(_('compression engine %s already registered') %
3039 3044 name)
3040 3045
3041 3046 bundleinfo = engine.bundletype()
3042 3047 if bundleinfo:
3043 3048 bundlename, bundletype = bundleinfo
3044 3049
3045 3050 if bundlename in self._bundlenames:
3046 3051 raise error.Abort(_('bundle name %s already registered') %
3047 3052 bundlename)
3048 3053 if bundletype in self._bundletypes:
3049 3054 raise error.Abort(_('bundle type %s already registered by %s') %
3050 3055 (bundletype, self._bundletypes[bundletype]))
3051 3056
3052 3057 # No external facing name declared.
3053 3058 if bundlename:
3054 3059 self._bundlenames[bundlename] = name
3055 3060
3056 3061 self._bundletypes[bundletype] = name
3057 3062
3058 3063 wiresupport = engine.wireprotosupport()
3059 3064 if wiresupport:
3060 3065 wiretype = wiresupport.name
3061 3066 if wiretype in self._wiretypes:
3062 3067 raise error.Abort(_('wire protocol compression %s already '
3063 3068 'registered by %s') %
3064 3069 (wiretype, self._wiretypes[wiretype]))
3065 3070
3066 3071 self._wiretypes[wiretype] = name
3067 3072
3068 3073 revlogheader = engine.revlogheader()
3069 3074 if revlogheader and revlogheader in self._revlogheaders:
3070 3075 raise error.Abort(_('revlog header %s already registered by %s') %
3071 3076 (revlogheader, self._revlogheaders[revlogheader]))
3072 3077
3073 3078 if revlogheader:
3074 3079 self._revlogheaders[revlogheader] = name
3075 3080
3076 3081 self._engines[name] = engine
3077 3082
3078 3083 @property
3079 3084 def supportedbundlenames(self):
3080 3085 return set(self._bundlenames.keys())
3081 3086
3082 3087 @property
3083 3088 def supportedbundletypes(self):
3084 3089 return set(self._bundletypes.keys())
3085 3090
3086 3091 def forbundlename(self, bundlename):
3087 3092 """Obtain a compression engine registered to a bundle name.
3088 3093
3089 3094 Will raise KeyError if the bundle type isn't registered.
3090 3095
3091 3096 Will abort if the engine is known but not available.
3092 3097 """
3093 3098 engine = self._engines[self._bundlenames[bundlename]]
3094 3099 if not engine.available():
3095 3100 raise error.Abort(_('compression engine %s could not be loaded') %
3096 3101 engine.name())
3097 3102 return engine
3098 3103
3099 3104 def forbundletype(self, bundletype):
3100 3105 """Obtain a compression engine registered to a bundle type.
3101 3106
3102 3107 Will raise KeyError if the bundle type isn't registered.
3103 3108
3104 3109 Will abort if the engine is known but not available.
3105 3110 """
3106 3111 engine = self._engines[self._bundletypes[bundletype]]
3107 3112 if not engine.available():
3108 3113 raise error.Abort(_('compression engine %s could not be loaded') %
3109 3114 engine.name())
3110 3115 return engine
3111 3116
3112 3117 def supportedwireengines(self, role, onlyavailable=True):
3113 3118 """Obtain compression engines that support the wire protocol.
3114 3119
3115 3120 Returns a list of engines in prioritized order, most desired first.
3116 3121
3117 3122 If ``onlyavailable`` is set, filter out engines that can't be
3118 3123 loaded.
3119 3124 """
3120 3125 assert role in (SERVERROLE, CLIENTROLE)
3121 3126
3122 3127 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3123 3128
3124 3129 engines = [self._engines[e] for e in self._wiretypes.values()]
3125 3130 if onlyavailable:
3126 3131 engines = [e for e in engines if e.available()]
3127 3132
3128 3133 def getkey(e):
3129 3134 # Sort first by priority, highest first. In case of tie, sort
3130 3135 # alphabetically. This is arbitrary, but ensures output is
3131 3136 # stable.
3132 3137 w = e.wireprotosupport()
3133 3138 return -1 * getattr(w, attr), w.name
3134 3139
3135 3140 return list(sorted(engines, key=getkey))
3136 3141
3137 3142 def forwiretype(self, wiretype):
3138 3143 engine = self._engines[self._wiretypes[wiretype]]
3139 3144 if not engine.available():
3140 3145 raise error.Abort(_('compression engine %s could not be loaded') %
3141 3146 engine.name())
3142 3147 return engine
3143 3148
3144 3149 def forrevlogheader(self, header):
3145 3150 """Obtain a compression engine registered to a revlog header.
3146 3151
3147 3152 Will raise KeyError if the revlog header value isn't registered.
3148 3153 """
3149 3154 return self._engines[self._revlogheaders[header]]
3150 3155
3151 3156 compengines = compressormanager()
3152 3157
3153 3158 class compressionengine(object):
3154 3159 """Base class for compression engines.
3155 3160
3156 3161 Compression engines must implement the interface defined by this class.
3157 3162 """
3158 3163 def name(self):
3159 3164 """Returns the name of the compression engine.
3160 3165
3161 3166 This is the key the engine is registered under.
3162 3167
3163 3168 This method must be implemented.
3164 3169 """
3165 3170 raise NotImplementedError()
3166 3171
3167 3172 def available(self):
3168 3173 """Whether the compression engine is available.
3169 3174
3170 3175 The intent of this method is to allow optional compression engines
3171 3176 that may not be available in all installations (such as engines relying
3172 3177 on C extensions that may not be present).
3173 3178 """
3174 3179 return True
3175 3180
3176 3181 def bundletype(self):
3177 3182 """Describes bundle identifiers for this engine.
3178 3183
3179 3184 If this compression engine isn't supported for bundles, returns None.
3180 3185
3181 3186 If this engine can be used for bundles, returns a 2-tuple of strings of
3182 3187 the user-facing "bundle spec" compression name and an internal
3183 3188 identifier used to denote the compression format within bundles. To
3184 3189 exclude the name from external usage, set the first element to ``None``.
3185 3190
3186 3191 If bundle compression is supported, the class must also implement
3187 3192 ``compressstream`` and `decompressorreader``.
3188 3193 """
3189 3194 return None
3190 3195
3191 3196 def wireprotosupport(self):
3192 3197 """Declare support for this compression format on the wire protocol.
3193 3198
3194 3199 If this compression engine isn't supported for compressing wire
3195 3200 protocol payloads, returns None.
3196 3201
3197 3202 Otherwise, returns ``compenginewireprotosupport`` with the following
3198 3203 fields:
3199 3204
3200 3205 * String format identifier
3201 3206 * Integer priority for the server
3202 3207 * Integer priority for the client
3203 3208
3204 3209 The integer priorities are used to order the advertisement of format
3205 3210 support by server and client. The highest integer is advertised
3206 3211 first. Integers with non-positive values aren't advertised.
3207 3212
3208 3213 The priority values are somewhat arbitrary and only used for default
3209 3214 ordering. The relative order can be changed via config options.
3210 3215
3211 3216 If wire protocol compression is supported, the class must also implement
3212 3217 ``compressstream`` and ``decompressorreader``.
3213 3218 """
3214 3219 return None
3215 3220
3216 3221 def revlogheader(self):
3217 3222 """Header added to revlog chunks that identifies this engine.
3218 3223
3219 3224 If this engine can be used to compress revlogs, this method should
3220 3225 return the bytes used to identify chunks compressed with this engine.
3221 3226 Else, the method should return ``None`` to indicate it does not
3222 3227 participate in revlog compression.
3223 3228 """
3224 3229 return None
3225 3230
3226 3231 def compressstream(self, it, opts=None):
3227 3232 """Compress an iterator of chunks.
3228 3233
3229 3234 The method receives an iterator (ideally a generator) of chunks of
3230 3235 bytes to be compressed. It returns an iterator (ideally a generator)
3231 3236 of bytes of chunks representing the compressed output.
3232 3237
3233 3238 Optionally accepts an argument defining how to perform compression.
3234 3239 Each engine treats this argument differently.
3235 3240 """
3236 3241 raise NotImplementedError()
3237 3242
3238 3243 def decompressorreader(self, fh):
3239 3244 """Perform decompression on a file object.
3240 3245
3241 3246 Argument is an object with a ``read(size)`` method that returns
3242 3247 compressed data. Return value is an object with a ``read(size)`` that
3243 3248 returns uncompressed data.
3244 3249 """
3245 3250 raise NotImplementedError()
3246 3251
3247 3252 def revlogcompressor(self, opts=None):
3248 3253 """Obtain an object that can be used to compress revlog entries.
3249 3254
3250 3255 The object has a ``compress(data)`` method that compresses binary
3251 3256 data. This method returns compressed binary data or ``None`` if
3252 3257 the data could not be compressed (too small, not compressible, etc).
3253 3258 The returned data should have a header uniquely identifying this
3254 3259 compression format so decompression can be routed to this engine.
3255 3260 This header should be identified by the ``revlogheader()`` return
3256 3261 value.
3257 3262
3258 3263 The object has a ``decompress(data)`` method that decompresses
3259 3264 data. The method will only be called if ``data`` begins with
3260 3265 ``revlogheader()``. The method should return the raw, uncompressed
3261 3266 data or raise a ``RevlogError``.
3262 3267
3263 3268 The object is reusable but is not thread safe.
3264 3269 """
3265 3270 raise NotImplementedError()
3266 3271
3267 3272 class _zlibengine(compressionengine):
3268 3273 def name(self):
3269 3274 return 'zlib'
3270 3275
3271 3276 def bundletype(self):
3272 3277 return 'gzip', 'GZ'
3273 3278
3274 3279 def wireprotosupport(self):
3275 3280 return compewireprotosupport('zlib', 20, 20)
3276 3281
3277 3282 def revlogheader(self):
3278 3283 return 'x'
3279 3284
3280 3285 def compressstream(self, it, opts=None):
3281 3286 opts = opts or {}
3282 3287
3283 3288 z = zlib.compressobj(opts.get('level', -1))
3284 3289 for chunk in it:
3285 3290 data = z.compress(chunk)
3286 3291 # Not all calls to compress emit data. It is cheaper to inspect
3287 3292 # here than to feed empty chunks through generator.
3288 3293 if data:
3289 3294 yield data
3290 3295
3291 3296 yield z.flush()
3292 3297
3293 3298 def decompressorreader(self, fh):
3294 3299 def gen():
3295 3300 d = zlib.decompressobj()
3296 3301 for chunk in filechunkiter(fh):
3297 3302 while chunk:
3298 3303 # Limit output size to limit memory.
3299 3304 yield d.decompress(chunk, 2 ** 18)
3300 3305 chunk = d.unconsumed_tail
3301 3306
3302 3307 return chunkbuffer(gen())
3303 3308
3304 3309 class zlibrevlogcompressor(object):
3305 3310 def compress(self, data):
3306 3311 insize = len(data)
3307 3312 # Caller handles empty input case.
3308 3313 assert insize > 0
3309 3314
3310 3315 if insize < 44:
3311 3316 return None
3312 3317
3313 3318 elif insize <= 1000000:
3314 3319 compressed = zlib.compress(data)
3315 3320 if len(compressed) < insize:
3316 3321 return compressed
3317 3322 return None
3318 3323
3319 3324 # zlib makes an internal copy of the input buffer, doubling
3320 3325 # memory usage for large inputs. So do streaming compression
3321 3326 # on large inputs.
3322 3327 else:
3323 3328 z = zlib.compressobj()
3324 3329 parts = []
3325 3330 pos = 0
3326 3331 while pos < insize:
3327 3332 pos2 = pos + 2**20
3328 3333 parts.append(z.compress(data[pos:pos2]))
3329 3334 pos = pos2
3330 3335 parts.append(z.flush())
3331 3336
3332 3337 if sum(map(len, parts)) < insize:
3333 3338 return ''.join(parts)
3334 3339 return None
3335 3340
3336 3341 def decompress(self, data):
3337 3342 try:
3338 3343 return zlib.decompress(data)
3339 3344 except zlib.error as e:
3340 3345 raise error.RevlogError(_('revlog decompress error: %s') %
3341 3346 str(e))
3342 3347
3343 3348 def revlogcompressor(self, opts=None):
3344 3349 return self.zlibrevlogcompressor()
3345 3350
3346 3351 compengines.register(_zlibengine())
3347 3352
3348 3353 class _bz2engine(compressionengine):
3349 3354 def name(self):
3350 3355 return 'bz2'
3351 3356
3352 3357 def bundletype(self):
3353 3358 return 'bzip2', 'BZ'
3354 3359
3355 3360 # We declare a protocol name but don't advertise by default because
3356 3361 # it is slow.
3357 3362 def wireprotosupport(self):
3358 3363 return compewireprotosupport('bzip2', 0, 0)
3359 3364
3360 3365 def compressstream(self, it, opts=None):
3361 3366 opts = opts or {}
3362 3367 z = bz2.BZ2Compressor(opts.get('level', 9))
3363 3368 for chunk in it:
3364 3369 data = z.compress(chunk)
3365 3370 if data:
3366 3371 yield data
3367 3372
3368 3373 yield z.flush()
3369 3374
3370 3375 def decompressorreader(self, fh):
3371 3376 def gen():
3372 3377 d = bz2.BZ2Decompressor()
3373 3378 for chunk in filechunkiter(fh):
3374 3379 yield d.decompress(chunk)
3375 3380
3376 3381 return chunkbuffer(gen())
3377 3382
3378 3383 compengines.register(_bz2engine())
3379 3384
3380 3385 class _truncatedbz2engine(compressionengine):
3381 3386 def name(self):
3382 3387 return 'bz2truncated'
3383 3388
3384 3389 def bundletype(self):
3385 3390 return None, '_truncatedBZ'
3386 3391
3387 3392 # We don't implement compressstream because it is hackily handled elsewhere.
3388 3393
3389 3394 def decompressorreader(self, fh):
3390 3395 def gen():
3391 3396 # The input stream doesn't have the 'BZ' header. So add it back.
3392 3397 d = bz2.BZ2Decompressor()
3393 3398 d.decompress('BZ')
3394 3399 for chunk in filechunkiter(fh):
3395 3400 yield d.decompress(chunk)
3396 3401
3397 3402 return chunkbuffer(gen())
3398 3403
3399 3404 compengines.register(_truncatedbz2engine())
3400 3405
3401 3406 class _noopengine(compressionengine):
3402 3407 def name(self):
3403 3408 return 'none'
3404 3409
3405 3410 def bundletype(self):
3406 3411 return 'none', 'UN'
3407 3412
3408 3413 # Clients always support uncompressed payloads. Servers don't because
3409 3414 # unless you are on a fast network, uncompressed payloads can easily
3410 3415 # saturate your network pipe.
3411 3416 def wireprotosupport(self):
3412 3417 return compewireprotosupport('none', 0, 10)
3413 3418
3414 3419 # We don't implement revlogheader because it is handled specially
3415 3420 # in the revlog class.
3416 3421
3417 3422 def compressstream(self, it, opts=None):
3418 3423 return it
3419 3424
3420 3425 def decompressorreader(self, fh):
3421 3426 return fh
3422 3427
3423 3428 class nooprevlogcompressor(object):
3424 3429 def compress(self, data):
3425 3430 return None
3426 3431
3427 3432 def revlogcompressor(self, opts=None):
3428 3433 return self.nooprevlogcompressor()
3429 3434
3430 3435 compengines.register(_noopengine())
3431 3436
3432 3437 class _zstdengine(compressionengine):
3433 3438 def name(self):
3434 3439 return 'zstd'
3435 3440
3436 3441 @propertycache
3437 3442 def _module(self):
3438 3443 # Not all installs have the zstd module available. So defer importing
3439 3444 # until first access.
3440 3445 try:
3441 3446 from . import zstd
3442 3447 # Force delayed import.
3443 3448 zstd.__version__
3444 3449 return zstd
3445 3450 except ImportError:
3446 3451 return None
3447 3452
3448 3453 def available(self):
3449 3454 return bool(self._module)
3450 3455
3451 3456 def bundletype(self):
3452 3457 return 'zstd', 'ZS'
3453 3458
3454 3459 def wireprotosupport(self):
3455 3460 return compewireprotosupport('zstd', 50, 50)
3456 3461
3457 3462 def revlogheader(self):
3458 3463 return '\x28'
3459 3464
3460 3465 def compressstream(self, it, opts=None):
3461 3466 opts = opts or {}
3462 3467 # zstd level 3 is almost always significantly faster than zlib
3463 3468 # while providing no worse compression. It strikes a good balance
3464 3469 # between speed and compression.
3465 3470 level = opts.get('level', 3)
3466 3471
3467 3472 zstd = self._module
3468 3473 z = zstd.ZstdCompressor(level=level).compressobj()
3469 3474 for chunk in it:
3470 3475 data = z.compress(chunk)
3471 3476 if data:
3472 3477 yield data
3473 3478
3474 3479 yield z.flush()
3475 3480
3476 3481 def decompressorreader(self, fh):
3477 3482 zstd = self._module
3478 3483 dctx = zstd.ZstdDecompressor()
3479 3484 return chunkbuffer(dctx.read_from(fh))
3480 3485
3481 3486 class zstdrevlogcompressor(object):
3482 3487 def __init__(self, zstd, level=3):
3483 3488 # Writing the content size adds a few bytes to the output. However,
3484 3489 # it allows decompression to be more optimal since we can
3485 3490 # pre-allocate a buffer to hold the result.
3486 3491 self._cctx = zstd.ZstdCompressor(level=level,
3487 3492 write_content_size=True)
3488 3493 self._dctx = zstd.ZstdDecompressor()
3489 3494 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3490 3495 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3491 3496
3492 3497 def compress(self, data):
3493 3498 insize = len(data)
3494 3499 # Caller handles empty input case.
3495 3500 assert insize > 0
3496 3501
3497 3502 if insize < 50:
3498 3503 return None
3499 3504
3500 3505 elif insize <= 1000000:
3501 3506 compressed = self._cctx.compress(data)
3502 3507 if len(compressed) < insize:
3503 3508 return compressed
3504 3509 return None
3505 3510 else:
3506 3511 z = self._cctx.compressobj()
3507 3512 chunks = []
3508 3513 pos = 0
3509 3514 while pos < insize:
3510 3515 pos2 = pos + self._compinsize
3511 3516 chunk = z.compress(data[pos:pos2])
3512 3517 if chunk:
3513 3518 chunks.append(chunk)
3514 3519 pos = pos2
3515 3520 chunks.append(z.flush())
3516 3521
3517 3522 if sum(map(len, chunks)) < insize:
3518 3523 return ''.join(chunks)
3519 3524 return None
3520 3525
3521 3526 def decompress(self, data):
3522 3527 insize = len(data)
3523 3528
3524 3529 try:
3525 3530 # This was measured to be faster than other streaming
3526 3531 # decompressors.
3527 3532 dobj = self._dctx.decompressobj()
3528 3533 chunks = []
3529 3534 pos = 0
3530 3535 while pos < insize:
3531 3536 pos2 = pos + self._decompinsize
3532 3537 chunk = dobj.decompress(data[pos:pos2])
3533 3538 if chunk:
3534 3539 chunks.append(chunk)
3535 3540 pos = pos2
3536 3541 # Frame should be exhausted, so no finish() API.
3537 3542
3538 3543 return ''.join(chunks)
3539 3544 except Exception as e:
3540 3545 raise error.RevlogError(_('revlog decompress error: %s') %
3541 3546 str(e))
3542 3547
3543 3548 def revlogcompressor(self, opts=None):
3544 3549 opts = opts or {}
3545 3550 return self.zstdrevlogcompressor(self._module,
3546 3551 level=opts.get('level', 3))
3547 3552
3548 3553 compengines.register(_zstdengine())
3549 3554
3550 3555 # convenient shortcut
3551 3556 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now