##// END OF EJS Templates
util: teach stringmatcher to handle forced case insensitive matches...
Matt Harbison -
r30773:c390b40f default
parent child Browse files
Show More
@@ -1,3349 +1,3369 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 if pycompat.osname == 'nt':
67 67 from . import windows as platform
68 68 stdout = platform.winstdout(pycompat.stdout)
69 69 else:
70 70 from . import posix as platform
71 71
72 72 _ = i18n._
73 73
74 74 bindunixsocket = platform.bindunixsocket
75 75 cachestat = platform.cachestat
76 76 checkexec = platform.checkexec
77 77 checklink = platform.checklink
78 78 copymode = platform.copymode
79 79 executablepath = platform.executablepath
80 80 expandglobs = platform.expandglobs
81 81 explainexit = platform.explainexit
82 82 findexe = platform.findexe
83 83 gethgcmd = platform.gethgcmd
84 84 getuser = platform.getuser
85 85 getpid = os.getpid
86 86 groupmembers = platform.groupmembers
87 87 groupname = platform.groupname
88 88 hidewindow = platform.hidewindow
89 89 isexec = platform.isexec
90 90 isowner = platform.isowner
91 91 localpath = platform.localpath
92 92 lookupreg = platform.lookupreg
93 93 makedir = platform.makedir
94 94 nlinks = platform.nlinks
95 95 normpath = platform.normpath
96 96 normcase = platform.normcase
97 97 normcasespec = platform.normcasespec
98 98 normcasefallback = platform.normcasefallback
99 99 openhardlinks = platform.openhardlinks
100 100 oslink = platform.oslink
101 101 parsepatchoutput = platform.parsepatchoutput
102 102 pconvert = platform.pconvert
103 103 poll = platform.poll
104 104 popen = platform.popen
105 105 posixfile = platform.posixfile
106 106 quotecommand = platform.quotecommand
107 107 readpipe = platform.readpipe
108 108 rename = platform.rename
109 109 removedirs = platform.removedirs
110 110 samedevice = platform.samedevice
111 111 samefile = platform.samefile
112 112 samestat = platform.samestat
113 113 setbinary = platform.setbinary
114 114 setflags = platform.setflags
115 115 setsignalhandler = platform.setsignalhandler
116 116 shellquote = platform.shellquote
117 117 spawndetached = platform.spawndetached
118 118 split = platform.split
119 119 sshargs = platform.sshargs
120 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
121 121 statisexec = platform.statisexec
122 122 statislink = platform.statislink
123 123 testpid = platform.testpid
124 124 umask = platform.umask
125 125 unlink = platform.unlink
126 126 unlinkpath = platform.unlinkpath
127 127 username = platform.username
128 128
129 129 # Python compatibility
130 130
131 131 _notset = object()
132 132
133 133 # disable Python's problematic floating point timestamps (issue4836)
134 134 # (Python hypocritically says you shouldn't change this behavior in
135 135 # libraries, and sure enough Mercurial is not a library.)
136 136 os.stat_float_times(False)
137 137
138 138 def safehasattr(thing, attr):
139 139 return getattr(thing, attr, _notset) is not _notset
140 140
141 141 def bitsfrom(container):
142 142 bits = 0
143 143 for bit in container:
144 144 bits |= bit
145 145 return bits
146 146
147 147 DIGESTS = {
148 148 'md5': hashlib.md5,
149 149 'sha1': hashlib.sha1,
150 150 'sha512': hashlib.sha512,
151 151 }
152 152 # List of digest types from strongest to weakest
153 153 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
154 154
155 155 for k in DIGESTS_BY_STRENGTH:
156 156 assert k in DIGESTS
157 157
158 158 class digester(object):
159 159 """helper to compute digests.
160 160
161 161 This helper can be used to compute one or more digests given their name.
162 162
163 163 >>> d = digester(['md5', 'sha1'])
164 164 >>> d.update('foo')
165 165 >>> [k for k in sorted(d)]
166 166 ['md5', 'sha1']
167 167 >>> d['md5']
168 168 'acbd18db4cc2f85cedef654fccc4a4d8'
169 169 >>> d['sha1']
170 170 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
171 171 >>> digester.preferred(['md5', 'sha1'])
172 172 'sha1'
173 173 """
174 174
175 175 def __init__(self, digests, s=''):
176 176 self._hashes = {}
177 177 for k in digests:
178 178 if k not in DIGESTS:
179 179 raise Abort(_('unknown digest type: %s') % k)
180 180 self._hashes[k] = DIGESTS[k]()
181 181 if s:
182 182 self.update(s)
183 183
184 184 def update(self, data):
185 185 for h in self._hashes.values():
186 186 h.update(data)
187 187
188 188 def __getitem__(self, key):
189 189 if key not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 return self._hashes[key].hexdigest()
192 192
193 193 def __iter__(self):
194 194 return iter(self._hashes)
195 195
196 196 @staticmethod
197 197 def preferred(supported):
198 198 """returns the strongest digest type in both supported and DIGESTS."""
199 199
200 200 for k in DIGESTS_BY_STRENGTH:
201 201 if k in supported:
202 202 return k
203 203 return None
204 204
205 205 class digestchecker(object):
206 206 """file handle wrapper that additionally checks content against a given
207 207 size and digests.
208 208
209 209 d = digestchecker(fh, size, {'md5': '...'})
210 210
211 211 When multiple digests are given, all of them are validated.
212 212 """
213 213
214 214 def __init__(self, fh, size, digests):
215 215 self._fh = fh
216 216 self._size = size
217 217 self._got = 0
218 218 self._digests = dict(digests)
219 219 self._digester = digester(self._digests.keys())
220 220
221 221 def read(self, length=-1):
222 222 content = self._fh.read(length)
223 223 self._digester.update(content)
224 224 self._got += len(content)
225 225 return content
226 226
227 227 def validate(self):
228 228 if self._size != self._got:
229 229 raise Abort(_('size mismatch: expected %d, got %d') %
230 230 (self._size, self._got))
231 231 for k, v in self._digests.items():
232 232 if v != self._digester[k]:
233 233 # i18n: first parameter is a digest name
234 234 raise Abort(_('%s mismatch: expected %s, got %s') %
235 235 (k, v, self._digester[k]))
236 236
237 237 try:
238 238 buffer = buffer
239 239 except NameError:
240 240 if not pycompat.ispy3:
241 241 def buffer(sliceable, offset=0):
242 242 return sliceable[offset:]
243 243 else:
244 244 def buffer(sliceable, offset=0):
245 245 return memoryview(sliceable)[offset:]
246 246
247 247 closefds = pycompat.osname == 'posix'
248 248
249 249 _chunksize = 4096
250 250
251 251 class bufferedinputpipe(object):
252 252 """a manually buffered input pipe
253 253
254 254 Python will not let us use buffered IO and lazy reading with 'polling' at
255 255 the same time. We cannot probe the buffer state and select will not detect
256 256 that data are ready to read if they are already buffered.
257 257
258 258 This class let us work around that by implementing its own buffering
259 259 (allowing efficient readline) while offering a way to know if the buffer is
260 260 empty from the output (allowing collaboration of the buffer with polling).
261 261
262 262 This class lives in the 'util' module because it makes use of the 'os'
263 263 module from the python stdlib.
264 264 """
265 265
266 266 def __init__(self, input):
267 267 self._input = input
268 268 self._buffer = []
269 269 self._eof = False
270 270 self._lenbuf = 0
271 271
272 272 @property
273 273 def hasbuffer(self):
274 274 """True is any data is currently buffered
275 275
276 276 This will be used externally a pre-step for polling IO. If there is
277 277 already data then no polling should be set in place."""
278 278 return bool(self._buffer)
279 279
280 280 @property
281 281 def closed(self):
282 282 return self._input.closed
283 283
284 284 def fileno(self):
285 285 return self._input.fileno()
286 286
287 287 def close(self):
288 288 return self._input.close()
289 289
290 290 def read(self, size):
291 291 while (not self._eof) and (self._lenbuf < size):
292 292 self._fillbuffer()
293 293 return self._frombuffer(size)
294 294
295 295 def readline(self, *args, **kwargs):
296 296 if 1 < len(self._buffer):
297 297 # this should not happen because both read and readline end with a
298 298 # _frombuffer call that collapse it.
299 299 self._buffer = [''.join(self._buffer)]
300 300 self._lenbuf = len(self._buffer[0])
301 301 lfi = -1
302 302 if self._buffer:
303 303 lfi = self._buffer[-1].find('\n')
304 304 while (not self._eof) and lfi < 0:
305 305 self._fillbuffer()
306 306 if self._buffer:
307 307 lfi = self._buffer[-1].find('\n')
308 308 size = lfi + 1
309 309 if lfi < 0: # end of file
310 310 size = self._lenbuf
311 311 elif 1 < len(self._buffer):
312 312 # we need to take previous chunks into account
313 313 size += self._lenbuf - len(self._buffer[-1])
314 314 return self._frombuffer(size)
315 315
316 316 def _frombuffer(self, size):
317 317 """return at most 'size' data from the buffer
318 318
319 319 The data are removed from the buffer."""
320 320 if size == 0 or not self._buffer:
321 321 return ''
322 322 buf = self._buffer[0]
323 323 if 1 < len(self._buffer):
324 324 buf = ''.join(self._buffer)
325 325
326 326 data = buf[:size]
327 327 buf = buf[len(data):]
328 328 if buf:
329 329 self._buffer = [buf]
330 330 self._lenbuf = len(buf)
331 331 else:
332 332 self._buffer = []
333 333 self._lenbuf = 0
334 334 return data
335 335
336 336 def _fillbuffer(self):
337 337 """read data to the buffer"""
338 338 data = os.read(self._input.fileno(), _chunksize)
339 339 if not data:
340 340 self._eof = True
341 341 else:
342 342 self._lenbuf += len(data)
343 343 self._buffer.append(data)
344 344
345 345 def popen2(cmd, env=None, newlines=False):
346 346 # Setting bufsize to -1 lets the system decide the buffer size.
347 347 # The default for bufsize is 0, meaning unbuffered. This leads to
348 348 # poor performance on Mac OS X: http://bugs.python.org/issue4194
349 349 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
350 350 close_fds=closefds,
351 351 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
352 352 universal_newlines=newlines,
353 353 env=env)
354 354 return p.stdin, p.stdout
355 355
356 356 def popen3(cmd, env=None, newlines=False):
357 357 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
358 358 return stdin, stdout, stderr
359 359
360 360 def popen4(cmd, env=None, newlines=False, bufsize=-1):
361 361 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
362 362 close_fds=closefds,
363 363 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
364 364 stderr=subprocess.PIPE,
365 365 universal_newlines=newlines,
366 366 env=env)
367 367 return p.stdin, p.stdout, p.stderr, p
368 368
369 369 def version():
370 370 """Return version information if available."""
371 371 try:
372 372 from . import __version__
373 373 return __version__.version
374 374 except ImportError:
375 375 return 'unknown'
376 376
377 377 def versiontuple(v=None, n=4):
378 378 """Parses a Mercurial version string into an N-tuple.
379 379
380 380 The version string to be parsed is specified with the ``v`` argument.
381 381 If it isn't defined, the current Mercurial version string will be parsed.
382 382
383 383 ``n`` can be 2, 3, or 4. Here is how some version strings map to
384 384 returned values:
385 385
386 386 >>> v = '3.6.1+190-df9b73d2d444'
387 387 >>> versiontuple(v, 2)
388 388 (3, 6)
389 389 >>> versiontuple(v, 3)
390 390 (3, 6, 1)
391 391 >>> versiontuple(v, 4)
392 392 (3, 6, 1, '190-df9b73d2d444')
393 393
394 394 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
395 395 (3, 6, 1, '190-df9b73d2d444+20151118')
396 396
397 397 >>> v = '3.6'
398 398 >>> versiontuple(v, 2)
399 399 (3, 6)
400 400 >>> versiontuple(v, 3)
401 401 (3, 6, None)
402 402 >>> versiontuple(v, 4)
403 403 (3, 6, None, None)
404 404
405 405 >>> v = '3.9-rc'
406 406 >>> versiontuple(v, 2)
407 407 (3, 9)
408 408 >>> versiontuple(v, 3)
409 409 (3, 9, None)
410 410 >>> versiontuple(v, 4)
411 411 (3, 9, None, 'rc')
412 412
413 413 >>> v = '3.9-rc+2-02a8fea4289b'
414 414 >>> versiontuple(v, 2)
415 415 (3, 9)
416 416 >>> versiontuple(v, 3)
417 417 (3, 9, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 9, None, 'rc+2-02a8fea4289b')
420 420 """
421 421 if not v:
422 422 v = version()
423 423 parts = remod.split('[\+-]', v, 1)
424 424 if len(parts) == 1:
425 425 vparts, extra = parts[0], None
426 426 else:
427 427 vparts, extra = parts
428 428
429 429 vints = []
430 430 for i in vparts.split('.'):
431 431 try:
432 432 vints.append(int(i))
433 433 except ValueError:
434 434 break
435 435 # (3, 6) -> (3, 6, None)
436 436 while len(vints) < 3:
437 437 vints.append(None)
438 438
439 439 if n == 2:
440 440 return (vints[0], vints[1])
441 441 if n == 3:
442 442 return (vints[0], vints[1], vints[2])
443 443 if n == 4:
444 444 return (vints[0], vints[1], vints[2], extra)
445 445
446 446 # used by parsedate
447 447 defaultdateformats = (
448 448 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
449 449 '%Y-%m-%dT%H:%M', # without seconds
450 450 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
451 451 '%Y-%m-%dT%H%M', # without seconds
452 452 '%Y-%m-%d %H:%M:%S', # our common legal variant
453 453 '%Y-%m-%d %H:%M', # without seconds
454 454 '%Y-%m-%d %H%M%S', # without :
455 455 '%Y-%m-%d %H%M', # without seconds
456 456 '%Y-%m-%d %I:%M:%S%p',
457 457 '%Y-%m-%d %H:%M',
458 458 '%Y-%m-%d %I:%M%p',
459 459 '%Y-%m-%d',
460 460 '%m-%d',
461 461 '%m/%d',
462 462 '%m/%d/%y',
463 463 '%m/%d/%Y',
464 464 '%a %b %d %H:%M:%S %Y',
465 465 '%a %b %d %I:%M:%S%p %Y',
466 466 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
467 467 '%b %d %H:%M:%S %Y',
468 468 '%b %d %I:%M:%S%p %Y',
469 469 '%b %d %H:%M:%S',
470 470 '%b %d %I:%M:%S%p',
471 471 '%b %d %H:%M',
472 472 '%b %d %I:%M%p',
473 473 '%b %d %Y',
474 474 '%b %d',
475 475 '%H:%M:%S',
476 476 '%I:%M:%S%p',
477 477 '%H:%M',
478 478 '%I:%M%p',
479 479 )
480 480
481 481 extendeddateformats = defaultdateformats + (
482 482 "%Y",
483 483 "%Y-%m",
484 484 "%b",
485 485 "%b %Y",
486 486 )
487 487
488 488 def cachefunc(func):
489 489 '''cache the result of function calls'''
490 490 # XXX doesn't handle keywords args
491 491 if func.__code__.co_argcount == 0:
492 492 cache = []
493 493 def f():
494 494 if len(cache) == 0:
495 495 cache.append(func())
496 496 return cache[0]
497 497 return f
498 498 cache = {}
499 499 if func.__code__.co_argcount == 1:
500 500 # we gain a small amount of time because
501 501 # we don't need to pack/unpack the list
502 502 def f(arg):
503 503 if arg not in cache:
504 504 cache[arg] = func(arg)
505 505 return cache[arg]
506 506 else:
507 507 def f(*args):
508 508 if args not in cache:
509 509 cache[args] = func(*args)
510 510 return cache[args]
511 511
512 512 return f
513 513
514 514 class sortdict(dict):
515 515 '''a simple sorted dictionary'''
516 516 def __init__(self, data=None):
517 517 self._list = []
518 518 if data:
519 519 self.update(data)
520 520 def copy(self):
521 521 return sortdict(self)
522 522 def __setitem__(self, key, val):
523 523 if key in self:
524 524 self._list.remove(key)
525 525 self._list.append(key)
526 526 dict.__setitem__(self, key, val)
527 527 def __iter__(self):
528 528 return self._list.__iter__()
529 529 def update(self, src):
530 530 if isinstance(src, dict):
531 531 src = src.iteritems()
532 532 for k, v in src:
533 533 self[k] = v
534 534 def clear(self):
535 535 dict.clear(self)
536 536 self._list = []
537 537 def items(self):
538 538 return [(k, self[k]) for k in self._list]
539 539 def __delitem__(self, key):
540 540 dict.__delitem__(self, key)
541 541 self._list.remove(key)
542 542 def pop(self, key, *args, **kwargs):
543 543 dict.pop(self, key, *args, **kwargs)
544 544 try:
545 545 self._list.remove(key)
546 546 except ValueError:
547 547 pass
548 548 def keys(self):
549 549 return self._list
550 550 def iterkeys(self):
551 551 return self._list.__iter__()
552 552 def iteritems(self):
553 553 for k in self._list:
554 554 yield k, self[k]
555 555 def insert(self, index, key, val):
556 556 self._list.insert(index, key)
557 557 dict.__setitem__(self, key, val)
558 558 def __repr__(self):
559 559 if not self:
560 560 return '%s()' % self.__class__.__name__
561 561 return '%s(%r)' % (self.__class__.__name__, self.items())
562 562
563 563 class _lrucachenode(object):
564 564 """A node in a doubly linked list.
565 565
566 566 Holds a reference to nodes on either side as well as a key-value
567 567 pair for the dictionary entry.
568 568 """
569 569 __slots__ = (u'next', u'prev', u'key', u'value')
570 570
571 571 def __init__(self):
572 572 self.next = None
573 573 self.prev = None
574 574
575 575 self.key = _notset
576 576 self.value = None
577 577
578 578 def markempty(self):
579 579 """Mark the node as emptied."""
580 580 self.key = _notset
581 581
582 582 class lrucachedict(object):
583 583 """Dict that caches most recent accesses and sets.
584 584
585 585 The dict consists of an actual backing dict - indexed by original
586 586 key - and a doubly linked circular list defining the order of entries in
587 587 the cache.
588 588
589 589 The head node is the newest entry in the cache. If the cache is full,
590 590 we recycle head.prev and make it the new head. Cache accesses result in
591 591 the node being moved to before the existing head and being marked as the
592 592 new head node.
593 593 """
594 594 def __init__(self, max):
595 595 self._cache = {}
596 596
597 597 self._head = head = _lrucachenode()
598 598 head.prev = head
599 599 head.next = head
600 600 self._size = 1
601 601 self._capacity = max
602 602
603 603 def __len__(self):
604 604 return len(self._cache)
605 605
606 606 def __contains__(self, k):
607 607 return k in self._cache
608 608
609 609 def __iter__(self):
610 610 # We don't have to iterate in cache order, but why not.
611 611 n = self._head
612 612 for i in range(len(self._cache)):
613 613 yield n.key
614 614 n = n.next
615 615
616 616 def __getitem__(self, k):
617 617 node = self._cache[k]
618 618 self._movetohead(node)
619 619 return node.value
620 620
621 621 def __setitem__(self, k, v):
622 622 node = self._cache.get(k)
623 623 # Replace existing value and mark as newest.
624 624 if node is not None:
625 625 node.value = v
626 626 self._movetohead(node)
627 627 return
628 628
629 629 if self._size < self._capacity:
630 630 node = self._addcapacity()
631 631 else:
632 632 # Grab the last/oldest item.
633 633 node = self._head.prev
634 634
635 635 # At capacity. Kill the old entry.
636 636 if node.key is not _notset:
637 637 del self._cache[node.key]
638 638
639 639 node.key = k
640 640 node.value = v
641 641 self._cache[k] = node
642 642 # And mark it as newest entry. No need to adjust order since it
643 643 # is already self._head.prev.
644 644 self._head = node
645 645
646 646 def __delitem__(self, k):
647 647 node = self._cache.pop(k)
648 648 node.markempty()
649 649
650 650 # Temporarily mark as newest item before re-adjusting head to make
651 651 # this node the oldest item.
652 652 self._movetohead(node)
653 653 self._head = node.next
654 654
655 655 # Additional dict methods.
656 656
657 657 def get(self, k, default=None):
658 658 try:
659 659 return self._cache[k].value
660 660 except KeyError:
661 661 return default
662 662
663 663 def clear(self):
664 664 n = self._head
665 665 while n.key is not _notset:
666 666 n.markempty()
667 667 n = n.next
668 668
669 669 self._cache.clear()
670 670
671 671 def copy(self):
672 672 result = lrucachedict(self._capacity)
673 673 n = self._head.prev
674 674 # Iterate in oldest-to-newest order, so the copy has the right ordering
675 675 for i in range(len(self._cache)):
676 676 result[n.key] = n.value
677 677 n = n.prev
678 678 return result
679 679
680 680 def _movetohead(self, node):
681 681 """Mark a node as the newest, making it the new head.
682 682
683 683 When a node is accessed, it becomes the freshest entry in the LRU
684 684 list, which is denoted by self._head.
685 685
686 686 Visually, let's make ``N`` the new head node (* denotes head):
687 687
688 688 previous/oldest <-> head <-> next/next newest
689 689
690 690 ----<->--- A* ---<->-----
691 691 | |
692 692 E <-> D <-> N <-> C <-> B
693 693
694 694 To:
695 695
696 696 ----<->--- N* ---<->-----
697 697 | |
698 698 E <-> D <-> C <-> B <-> A
699 699
700 700 This requires the following moves:
701 701
702 702 C.next = D (node.prev.next = node.next)
703 703 D.prev = C (node.next.prev = node.prev)
704 704 E.next = N (head.prev.next = node)
705 705 N.prev = E (node.prev = head.prev)
706 706 N.next = A (node.next = head)
707 707 A.prev = N (head.prev = node)
708 708 """
709 709 head = self._head
710 710 # C.next = D
711 711 node.prev.next = node.next
712 712 # D.prev = C
713 713 node.next.prev = node.prev
714 714 # N.prev = E
715 715 node.prev = head.prev
716 716 # N.next = A
717 717 # It is tempting to do just "head" here, however if node is
718 718 # adjacent to head, this will do bad things.
719 719 node.next = head.prev.next
720 720 # E.next = N
721 721 node.next.prev = node
722 722 # A.prev = N
723 723 node.prev.next = node
724 724
725 725 self._head = node
726 726
727 727 def _addcapacity(self):
728 728 """Add a node to the circular linked list.
729 729
730 730 The new node is inserted before the head node.
731 731 """
732 732 head = self._head
733 733 node = _lrucachenode()
734 734 head.prev.next = node
735 735 node.prev = head.prev
736 736 node.next = head
737 737 head.prev = node
738 738 self._size += 1
739 739 return node
740 740
741 741 def lrucachefunc(func):
742 742 '''cache most recent results of function calls'''
743 743 cache = {}
744 744 order = collections.deque()
745 745 if func.__code__.co_argcount == 1:
746 746 def f(arg):
747 747 if arg not in cache:
748 748 if len(cache) > 20:
749 749 del cache[order.popleft()]
750 750 cache[arg] = func(arg)
751 751 else:
752 752 order.remove(arg)
753 753 order.append(arg)
754 754 return cache[arg]
755 755 else:
756 756 def f(*args):
757 757 if args not in cache:
758 758 if len(cache) > 20:
759 759 del cache[order.popleft()]
760 760 cache[args] = func(*args)
761 761 else:
762 762 order.remove(args)
763 763 order.append(args)
764 764 return cache[args]
765 765
766 766 return f
767 767
768 768 class propertycache(object):
769 769 def __init__(self, func):
770 770 self.func = func
771 771 self.name = func.__name__
772 772 def __get__(self, obj, type=None):
773 773 result = self.func(obj)
774 774 self.cachevalue(obj, result)
775 775 return result
776 776
777 777 def cachevalue(self, obj, value):
778 778 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
779 779 obj.__dict__[self.name] = value
780 780
781 781 def pipefilter(s, cmd):
782 782 '''filter string S through command CMD, returning its output'''
783 783 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
784 784 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
785 785 pout, perr = p.communicate(s)
786 786 return pout
787 787
788 788 def tempfilter(s, cmd):
789 789 '''filter string S through a pair of temporary files with CMD.
790 790 CMD is used as a template to create the real command to be run,
791 791 with the strings INFILE and OUTFILE replaced by the real names of
792 792 the temporary files generated.'''
793 793 inname, outname = None, None
794 794 try:
795 795 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
796 796 fp = os.fdopen(infd, 'wb')
797 797 fp.write(s)
798 798 fp.close()
799 799 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
800 800 os.close(outfd)
801 801 cmd = cmd.replace('INFILE', inname)
802 802 cmd = cmd.replace('OUTFILE', outname)
803 803 code = os.system(cmd)
804 804 if pycompat.sysplatform == 'OpenVMS' and code & 1:
805 805 code = 0
806 806 if code:
807 807 raise Abort(_("command '%s' failed: %s") %
808 808 (cmd, explainexit(code)))
809 809 return readfile(outname)
810 810 finally:
811 811 try:
812 812 if inname:
813 813 os.unlink(inname)
814 814 except OSError:
815 815 pass
816 816 try:
817 817 if outname:
818 818 os.unlink(outname)
819 819 except OSError:
820 820 pass
821 821
822 822 filtertable = {
823 823 'tempfile:': tempfilter,
824 824 'pipe:': pipefilter,
825 825 }
826 826
827 827 def filter(s, cmd):
828 828 "filter a string through a command that transforms its input to its output"
829 829 for name, fn in filtertable.iteritems():
830 830 if cmd.startswith(name):
831 831 return fn(s, cmd[len(name):].lstrip())
832 832 return pipefilter(s, cmd)
833 833
834 834 def binary(s):
835 835 """return true if a string is binary data"""
836 836 return bool(s and '\0' in s)
837 837
838 838 def increasingchunks(source, min=1024, max=65536):
839 839 '''return no less than min bytes per chunk while data remains,
840 840 doubling min after each chunk until it reaches max'''
841 841 def log2(x):
842 842 if not x:
843 843 return 0
844 844 i = 0
845 845 while x:
846 846 x >>= 1
847 847 i += 1
848 848 return i - 1
849 849
850 850 buf = []
851 851 blen = 0
852 852 for chunk in source:
853 853 buf.append(chunk)
854 854 blen += len(chunk)
855 855 if blen >= min:
856 856 if min < max:
857 857 min = min << 1
858 858 nmin = 1 << log2(blen)
859 859 if nmin > min:
860 860 min = nmin
861 861 if min > max:
862 862 min = max
863 863 yield ''.join(buf)
864 864 blen = 0
865 865 buf = []
866 866 if buf:
867 867 yield ''.join(buf)
868 868
869 869 Abort = error.Abort
870 870
871 871 def always(fn):
872 872 return True
873 873
874 874 def never(fn):
875 875 return False
876 876
877 877 def nogc(func):
878 878 """disable garbage collector
879 879
880 880 Python's garbage collector triggers a GC each time a certain number of
881 881 container objects (the number being defined by gc.get_threshold()) are
882 882 allocated even when marked not to be tracked by the collector. Tracking has
883 883 no effect on when GCs are triggered, only on what objects the GC looks
884 884 into. As a workaround, disable GC while building complex (huge)
885 885 containers.
886 886
887 887 This garbage collector issue have been fixed in 2.7.
888 888 """
889 889 if sys.version_info >= (2, 7):
890 890 return func
891 891 def wrapper(*args, **kwargs):
892 892 gcenabled = gc.isenabled()
893 893 gc.disable()
894 894 try:
895 895 return func(*args, **kwargs)
896 896 finally:
897 897 if gcenabled:
898 898 gc.enable()
899 899 return wrapper
900 900
901 901 def pathto(root, n1, n2):
902 902 '''return the relative path from one place to another.
903 903 root should use os.sep to separate directories
904 904 n1 should use os.sep to separate directories
905 905 n2 should use "/" to separate directories
906 906 returns an os.sep-separated path.
907 907
908 908 If n1 is a relative path, it's assumed it's
909 909 relative to root.
910 910 n2 should always be relative to root.
911 911 '''
912 912 if not n1:
913 913 return localpath(n2)
914 914 if os.path.isabs(n1):
915 915 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
916 916 return os.path.join(root, localpath(n2))
917 917 n2 = '/'.join((pconvert(root), n2))
918 918 a, b = splitpath(n1), n2.split('/')
919 919 a.reverse()
920 920 b.reverse()
921 921 while a and b and a[-1] == b[-1]:
922 922 a.pop()
923 923 b.pop()
924 924 b.reverse()
925 925 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
926 926
927 927 def mainfrozen():
928 928 """return True if we are a frozen executable.
929 929
930 930 The code supports py2exe (most common, Windows only) and tools/freeze
931 931 (portable, not much used).
932 932 """
933 933 return (safehasattr(sys, "frozen") or # new py2exe
934 934 safehasattr(sys, "importers") or # old py2exe
935 935 imp.is_frozen(u"__main__")) # tools/freeze
936 936
937 937 # the location of data files matching the source code
938 938 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
939 939 # executable version (py2exe) doesn't support __file__
940 940 datapath = os.path.dirname(pycompat.sysexecutable)
941 941 else:
942 942 datapath = os.path.dirname(__file__)
943 943
944 944 if not isinstance(datapath, bytes):
945 945 datapath = pycompat.fsencode(datapath)
946 946
947 947 i18n.setdatapath(datapath)
948 948
949 949 _hgexecutable = None
950 950
951 951 def hgexecutable():
952 952 """return location of the 'hg' executable.
953 953
954 954 Defaults to $HG or 'hg' in the search path.
955 955 """
956 956 if _hgexecutable is None:
957 957 hg = encoding.environ.get('HG')
958 958 mainmod = sys.modules['__main__']
959 959 if hg:
960 960 _sethgexecutable(hg)
961 961 elif mainfrozen():
962 962 if getattr(sys, 'frozen', None) == 'macosx_app':
963 963 # Env variable set by py2app
964 964 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
965 965 else:
966 966 _sethgexecutable(pycompat.sysexecutable)
967 967 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
968 968 _sethgexecutable(mainmod.__file__)
969 969 else:
970 970 exe = findexe('hg') or os.path.basename(sys.argv[0])
971 971 _sethgexecutable(exe)
972 972 return _hgexecutable
973 973
974 974 def _sethgexecutable(path):
975 975 """set location of the 'hg' executable"""
976 976 global _hgexecutable
977 977 _hgexecutable = path
978 978
979 979 def _isstdout(f):
980 980 fileno = getattr(f, 'fileno', None)
981 981 return fileno and fileno() == sys.__stdout__.fileno()
982 982
983 983 def shellenviron(environ=None):
984 984 """return environ with optional override, useful for shelling out"""
985 985 def py2shell(val):
986 986 'convert python object into string that is useful to shell'
987 987 if val is None or val is False:
988 988 return '0'
989 989 if val is True:
990 990 return '1'
991 991 return str(val)
992 992 env = dict(encoding.environ)
993 993 if environ:
994 994 env.update((k, py2shell(v)) for k, v in environ.iteritems())
995 995 env['HG'] = hgexecutable()
996 996 return env
997 997
998 998 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
999 999 '''enhanced shell command execution.
1000 1000 run with environment maybe modified, maybe in different dir.
1001 1001
1002 1002 if command fails and onerr is None, return status, else raise onerr
1003 1003 object as exception.
1004 1004
1005 1005 if out is specified, it is assumed to be a file-like object that has a
1006 1006 write() method. stdout and stderr will be redirected to out.'''
1007 1007 try:
1008 1008 stdout.flush()
1009 1009 except Exception:
1010 1010 pass
1011 1011 origcmd = cmd
1012 1012 cmd = quotecommand(cmd)
1013 1013 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1014 1014 and sys.version_info[1] < 7):
1015 1015 # subprocess kludge to work around issues in half-baked Python
1016 1016 # ports, notably bichued/python:
1017 1017 if not cwd is None:
1018 1018 os.chdir(cwd)
1019 1019 rc = os.system(cmd)
1020 1020 else:
1021 1021 env = shellenviron(environ)
1022 1022 if out is None or _isstdout(out):
1023 1023 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1024 1024 env=env, cwd=cwd)
1025 1025 else:
1026 1026 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1027 1027 env=env, cwd=cwd, stdout=subprocess.PIPE,
1028 1028 stderr=subprocess.STDOUT)
1029 1029 for line in iter(proc.stdout.readline, ''):
1030 1030 out.write(line)
1031 1031 proc.wait()
1032 1032 rc = proc.returncode
1033 1033 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1034 1034 rc = 0
1035 1035 if rc and onerr:
1036 1036 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1037 1037 explainexit(rc)[0])
1038 1038 if errprefix:
1039 1039 errmsg = '%s: %s' % (errprefix, errmsg)
1040 1040 raise onerr(errmsg)
1041 1041 return rc
1042 1042
1043 1043 def checksignature(func):
1044 1044 '''wrap a function with code to check for calling errors'''
1045 1045 def check(*args, **kwargs):
1046 1046 try:
1047 1047 return func(*args, **kwargs)
1048 1048 except TypeError:
1049 1049 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1050 1050 raise error.SignatureError
1051 1051 raise
1052 1052
1053 1053 return check
1054 1054
1055 1055 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1056 1056 '''copy a file, preserving mode and optionally other stat info like
1057 1057 atime/mtime
1058 1058
1059 1059 checkambig argument is used with filestat, and is useful only if
1060 1060 destination file is guarded by any lock (e.g. repo.lock or
1061 1061 repo.wlock).
1062 1062
1063 1063 copystat and checkambig should be exclusive.
1064 1064 '''
1065 1065 assert not (copystat and checkambig)
1066 1066 oldstat = None
1067 1067 if os.path.lexists(dest):
1068 1068 if checkambig:
1069 1069 oldstat = checkambig and filestat(dest)
1070 1070 unlink(dest)
1071 1071 # hardlinks are problematic on CIFS, quietly ignore this flag
1072 1072 # until we find a way to work around it cleanly (issue4546)
1073 1073 if False and hardlink:
1074 1074 try:
1075 1075 oslink(src, dest)
1076 1076 return
1077 1077 except (IOError, OSError):
1078 1078 pass # fall back to normal copy
1079 1079 if os.path.islink(src):
1080 1080 os.symlink(os.readlink(src), dest)
1081 1081 # copytime is ignored for symlinks, but in general copytime isn't needed
1082 1082 # for them anyway
1083 1083 else:
1084 1084 try:
1085 1085 shutil.copyfile(src, dest)
1086 1086 if copystat:
1087 1087 # copystat also copies mode
1088 1088 shutil.copystat(src, dest)
1089 1089 else:
1090 1090 shutil.copymode(src, dest)
1091 1091 if oldstat and oldstat.stat:
1092 1092 newstat = filestat(dest)
1093 1093 if newstat.isambig(oldstat):
1094 1094 # stat of copied file is ambiguous to original one
1095 1095 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1096 1096 os.utime(dest, (advanced, advanced))
1097 1097 except shutil.Error as inst:
1098 1098 raise Abort(str(inst))
1099 1099
1100 1100 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1101 1101 """Copy a directory tree using hardlinks if possible."""
1102 1102 num = 0
1103 1103
1104 1104 if hardlink is None:
1105 1105 hardlink = (os.stat(src).st_dev ==
1106 1106 os.stat(os.path.dirname(dst)).st_dev)
1107 1107 if hardlink:
1108 1108 topic = _('linking')
1109 1109 else:
1110 1110 topic = _('copying')
1111 1111
1112 1112 if os.path.isdir(src):
1113 1113 os.mkdir(dst)
1114 1114 for name, kind in osutil.listdir(src):
1115 1115 srcname = os.path.join(src, name)
1116 1116 dstname = os.path.join(dst, name)
1117 1117 def nprog(t, pos):
1118 1118 if pos is not None:
1119 1119 return progress(t, pos + num)
1120 1120 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1121 1121 num += n
1122 1122 else:
1123 1123 if hardlink:
1124 1124 try:
1125 1125 oslink(src, dst)
1126 1126 except (IOError, OSError):
1127 1127 hardlink = False
1128 1128 shutil.copy(src, dst)
1129 1129 else:
1130 1130 shutil.copy(src, dst)
1131 1131 num += 1
1132 1132 progress(topic, num)
1133 1133 progress(topic, None)
1134 1134
1135 1135 return hardlink, num
1136 1136
1137 1137 _winreservednames = '''con prn aux nul
1138 1138 com1 com2 com3 com4 com5 com6 com7 com8 com9
1139 1139 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1140 1140 _winreservedchars = ':*?"<>|'
1141 1141 def checkwinfilename(path):
1142 1142 r'''Check that the base-relative path is a valid filename on Windows.
1143 1143 Returns None if the path is ok, or a UI string describing the problem.
1144 1144
1145 1145 >>> checkwinfilename("just/a/normal/path")
1146 1146 >>> checkwinfilename("foo/bar/con.xml")
1147 1147 "filename contains 'con', which is reserved on Windows"
1148 1148 >>> checkwinfilename("foo/con.xml/bar")
1149 1149 "filename contains 'con', which is reserved on Windows"
1150 1150 >>> checkwinfilename("foo/bar/xml.con")
1151 1151 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1152 1152 "filename contains 'AUX', which is reserved on Windows"
1153 1153 >>> checkwinfilename("foo/bar/bla:.txt")
1154 1154 "filename contains ':', which is reserved on Windows"
1155 1155 >>> checkwinfilename("foo/bar/b\07la.txt")
1156 1156 "filename contains '\\x07', which is invalid on Windows"
1157 1157 >>> checkwinfilename("foo/bar/bla ")
1158 1158 "filename ends with ' ', which is not allowed on Windows"
1159 1159 >>> checkwinfilename("../bar")
1160 1160 >>> checkwinfilename("foo\\")
1161 1161 "filename ends with '\\', which is invalid on Windows"
1162 1162 >>> checkwinfilename("foo\\/bar")
1163 1163 "directory name ends with '\\', which is invalid on Windows"
1164 1164 '''
1165 1165 if path.endswith('\\'):
1166 1166 return _("filename ends with '\\', which is invalid on Windows")
1167 1167 if '\\/' in path:
1168 1168 return _("directory name ends with '\\', which is invalid on Windows")
1169 1169 for n in path.replace('\\', '/').split('/'):
1170 1170 if not n:
1171 1171 continue
1172 1172 for c in n:
1173 1173 if c in _winreservedchars:
1174 1174 return _("filename contains '%s', which is reserved "
1175 1175 "on Windows") % c
1176 1176 if ord(c) <= 31:
1177 1177 return _("filename contains %r, which is invalid "
1178 1178 "on Windows") % c
1179 1179 base = n.split('.')[0]
1180 1180 if base and base.lower() in _winreservednames:
1181 1181 return _("filename contains '%s', which is reserved "
1182 1182 "on Windows") % base
1183 1183 t = n[-1]
1184 1184 if t in '. ' and n not in '..':
1185 1185 return _("filename ends with '%s', which is not allowed "
1186 1186 "on Windows") % t
1187 1187
1188 1188 if pycompat.osname == 'nt':
1189 1189 checkosfilename = checkwinfilename
1190 1190 else:
1191 1191 checkosfilename = platform.checkosfilename
1192 1192
1193 1193 def makelock(info, pathname):
1194 1194 try:
1195 1195 return os.symlink(info, pathname)
1196 1196 except OSError as why:
1197 1197 if why.errno == errno.EEXIST:
1198 1198 raise
1199 1199 except AttributeError: # no symlink in os
1200 1200 pass
1201 1201
1202 1202 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1203 1203 os.write(ld, info)
1204 1204 os.close(ld)
1205 1205
1206 1206 def readlock(pathname):
1207 1207 try:
1208 1208 return os.readlink(pathname)
1209 1209 except OSError as why:
1210 1210 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1211 1211 raise
1212 1212 except AttributeError: # no symlink in os
1213 1213 pass
1214 1214 fp = posixfile(pathname)
1215 1215 r = fp.read()
1216 1216 fp.close()
1217 1217 return r
1218 1218
1219 1219 def fstat(fp):
1220 1220 '''stat file object that may not have fileno method.'''
1221 1221 try:
1222 1222 return os.fstat(fp.fileno())
1223 1223 except AttributeError:
1224 1224 return os.stat(fp.name)
1225 1225
1226 1226 # File system features
1227 1227
1228 1228 def fscasesensitive(path):
1229 1229 """
1230 1230 Return true if the given path is on a case-sensitive filesystem
1231 1231
1232 1232 Requires a path (like /foo/.hg) ending with a foldable final
1233 1233 directory component.
1234 1234 """
1235 1235 s1 = os.lstat(path)
1236 1236 d, b = os.path.split(path)
1237 1237 b2 = b.upper()
1238 1238 if b == b2:
1239 1239 b2 = b.lower()
1240 1240 if b == b2:
1241 1241 return True # no evidence against case sensitivity
1242 1242 p2 = os.path.join(d, b2)
1243 1243 try:
1244 1244 s2 = os.lstat(p2)
1245 1245 if s2 == s1:
1246 1246 return False
1247 1247 return True
1248 1248 except OSError:
1249 1249 return True
1250 1250
1251 1251 try:
1252 1252 import re2
1253 1253 _re2 = None
1254 1254 except ImportError:
1255 1255 _re2 = False
1256 1256
1257 1257 class _re(object):
1258 1258 def _checkre2(self):
1259 1259 global _re2
1260 1260 try:
1261 1261 # check if match works, see issue3964
1262 1262 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1263 1263 except ImportError:
1264 1264 _re2 = False
1265 1265
1266 1266 def compile(self, pat, flags=0):
1267 1267 '''Compile a regular expression, using re2 if possible
1268 1268
1269 1269 For best performance, use only re2-compatible regexp features. The
1270 1270 only flags from the re module that are re2-compatible are
1271 1271 IGNORECASE and MULTILINE.'''
1272 1272 if _re2 is None:
1273 1273 self._checkre2()
1274 1274 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1275 1275 if flags & remod.IGNORECASE:
1276 1276 pat = '(?i)' + pat
1277 1277 if flags & remod.MULTILINE:
1278 1278 pat = '(?m)' + pat
1279 1279 try:
1280 1280 return re2.compile(pat)
1281 1281 except re2.error:
1282 1282 pass
1283 1283 return remod.compile(pat, flags)
1284 1284
1285 1285 @propertycache
1286 1286 def escape(self):
1287 1287 '''Return the version of escape corresponding to self.compile.
1288 1288
1289 1289 This is imperfect because whether re2 or re is used for a particular
1290 1290 function depends on the flags, etc, but it's the best we can do.
1291 1291 '''
1292 1292 global _re2
1293 1293 if _re2 is None:
1294 1294 self._checkre2()
1295 1295 if _re2:
1296 1296 return re2.escape
1297 1297 else:
1298 1298 return remod.escape
1299 1299
1300 1300 re = _re()
1301 1301
1302 1302 _fspathcache = {}
1303 1303 def fspath(name, root):
1304 1304 '''Get name in the case stored in the filesystem
1305 1305
1306 1306 The name should be relative to root, and be normcase-ed for efficiency.
1307 1307
1308 1308 Note that this function is unnecessary, and should not be
1309 1309 called, for case-sensitive filesystems (simply because it's expensive).
1310 1310
1311 1311 The root should be normcase-ed, too.
1312 1312 '''
1313 1313 def _makefspathcacheentry(dir):
1314 1314 return dict((normcase(n), n) for n in os.listdir(dir))
1315 1315
1316 1316 seps = pycompat.ossep
1317 1317 if pycompat.osaltsep:
1318 1318 seps = seps + pycompat.osaltsep
1319 1319 # Protect backslashes. This gets silly very quickly.
1320 1320 seps.replace('\\','\\\\')
1321 1321 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1322 1322 dir = os.path.normpath(root)
1323 1323 result = []
1324 1324 for part, sep in pattern.findall(name):
1325 1325 if sep:
1326 1326 result.append(sep)
1327 1327 continue
1328 1328
1329 1329 if dir not in _fspathcache:
1330 1330 _fspathcache[dir] = _makefspathcacheentry(dir)
1331 1331 contents = _fspathcache[dir]
1332 1332
1333 1333 found = contents.get(part)
1334 1334 if not found:
1335 1335 # retry "once per directory" per "dirstate.walk" which
1336 1336 # may take place for each patches of "hg qpush", for example
1337 1337 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1338 1338 found = contents.get(part)
1339 1339
1340 1340 result.append(found or part)
1341 1341 dir = os.path.join(dir, part)
1342 1342
1343 1343 return ''.join(result)
1344 1344
1345 1345 def checknlink(testfile):
1346 1346 '''check whether hardlink count reporting works properly'''
1347 1347
1348 1348 # testfile may be open, so we need a separate file for checking to
1349 1349 # work around issue2543 (or testfile may get lost on Samba shares)
1350 1350 f1 = testfile + ".hgtmp1"
1351 1351 if os.path.lexists(f1):
1352 1352 return False
1353 1353 try:
1354 1354 posixfile(f1, 'w').close()
1355 1355 except IOError:
1356 1356 try:
1357 1357 os.unlink(f1)
1358 1358 except OSError:
1359 1359 pass
1360 1360 return False
1361 1361
1362 1362 f2 = testfile + ".hgtmp2"
1363 1363 fd = None
1364 1364 try:
1365 1365 oslink(f1, f2)
1366 1366 # nlinks() may behave differently for files on Windows shares if
1367 1367 # the file is open.
1368 1368 fd = posixfile(f2)
1369 1369 return nlinks(f2) > 1
1370 1370 except OSError:
1371 1371 return False
1372 1372 finally:
1373 1373 if fd is not None:
1374 1374 fd.close()
1375 1375 for f in (f1, f2):
1376 1376 try:
1377 1377 os.unlink(f)
1378 1378 except OSError:
1379 1379 pass
1380 1380
1381 1381 def endswithsep(path):
1382 1382 '''Check path ends with os.sep or os.altsep.'''
1383 1383 return (path.endswith(pycompat.ossep)
1384 1384 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1385 1385
1386 1386 def splitpath(path):
1387 1387 '''Split path by os.sep.
1388 1388 Note that this function does not use os.altsep because this is
1389 1389 an alternative of simple "xxx.split(os.sep)".
1390 1390 It is recommended to use os.path.normpath() before using this
1391 1391 function if need.'''
1392 1392 return path.split(pycompat.ossep)
1393 1393
1394 1394 def gui():
1395 1395 '''Are we running in a GUI?'''
1396 1396 if pycompat.sysplatform == 'darwin':
1397 1397 if 'SSH_CONNECTION' in encoding.environ:
1398 1398 # handle SSH access to a box where the user is logged in
1399 1399 return False
1400 1400 elif getattr(osutil, 'isgui', None):
1401 1401 # check if a CoreGraphics session is available
1402 1402 return osutil.isgui()
1403 1403 else:
1404 1404 # pure build; use a safe default
1405 1405 return True
1406 1406 else:
1407 1407 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1408 1408
1409 1409 def mktempcopy(name, emptyok=False, createmode=None):
1410 1410 """Create a temporary file with the same contents from name
1411 1411
1412 1412 The permission bits are copied from the original file.
1413 1413
1414 1414 If the temporary file is going to be truncated immediately, you
1415 1415 can use emptyok=True as an optimization.
1416 1416
1417 1417 Returns the name of the temporary file.
1418 1418 """
1419 1419 d, fn = os.path.split(name)
1420 1420 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1421 1421 os.close(fd)
1422 1422 # Temporary files are created with mode 0600, which is usually not
1423 1423 # what we want. If the original file already exists, just copy
1424 1424 # its mode. Otherwise, manually obey umask.
1425 1425 copymode(name, temp, createmode)
1426 1426 if emptyok:
1427 1427 return temp
1428 1428 try:
1429 1429 try:
1430 1430 ifp = posixfile(name, "rb")
1431 1431 except IOError as inst:
1432 1432 if inst.errno == errno.ENOENT:
1433 1433 return temp
1434 1434 if not getattr(inst, 'filename', None):
1435 1435 inst.filename = name
1436 1436 raise
1437 1437 ofp = posixfile(temp, "wb")
1438 1438 for chunk in filechunkiter(ifp):
1439 1439 ofp.write(chunk)
1440 1440 ifp.close()
1441 1441 ofp.close()
1442 1442 except: # re-raises
1443 1443 try: os.unlink(temp)
1444 1444 except OSError: pass
1445 1445 raise
1446 1446 return temp
1447 1447
1448 1448 class filestat(object):
1449 1449 """help to exactly detect change of a file
1450 1450
1451 1451 'stat' attribute is result of 'os.stat()' if specified 'path'
1452 1452 exists. Otherwise, it is None. This can avoid preparative
1453 1453 'exists()' examination on client side of this class.
1454 1454 """
1455 1455 def __init__(self, path):
1456 1456 try:
1457 1457 self.stat = os.stat(path)
1458 1458 except OSError as err:
1459 1459 if err.errno != errno.ENOENT:
1460 1460 raise
1461 1461 self.stat = None
1462 1462
1463 1463 __hash__ = object.__hash__
1464 1464
1465 1465 def __eq__(self, old):
1466 1466 try:
1467 1467 # if ambiguity between stat of new and old file is
1468 1468 # avoided, comparison of size, ctime and mtime is enough
1469 1469 # to exactly detect change of a file regardless of platform
1470 1470 return (self.stat.st_size == old.stat.st_size and
1471 1471 self.stat.st_ctime == old.stat.st_ctime and
1472 1472 self.stat.st_mtime == old.stat.st_mtime)
1473 1473 except AttributeError:
1474 1474 return False
1475 1475
1476 1476 def isambig(self, old):
1477 1477 """Examine whether new (= self) stat is ambiguous against old one
1478 1478
1479 1479 "S[N]" below means stat of a file at N-th change:
1480 1480
1481 1481 - S[n-1].ctime < S[n].ctime: can detect change of a file
1482 1482 - S[n-1].ctime == S[n].ctime
1483 1483 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1484 1484 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1485 1485 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1486 1486 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1487 1487
1488 1488 Case (*2) above means that a file was changed twice or more at
1489 1489 same time in sec (= S[n-1].ctime), and comparison of timestamp
1490 1490 is ambiguous.
1491 1491
1492 1492 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1493 1493 timestamp is ambiguous".
1494 1494
1495 1495 But advancing mtime only in case (*2) doesn't work as
1496 1496 expected, because naturally advanced S[n].mtime in case (*1)
1497 1497 might be equal to manually advanced S[n-1 or earlier].mtime.
1498 1498
1499 1499 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1500 1500 treated as ambiguous regardless of mtime, to avoid overlooking
1501 1501 by confliction between such mtime.
1502 1502
1503 1503 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1504 1504 S[n].mtime", even if size of a file isn't changed.
1505 1505 """
1506 1506 try:
1507 1507 return (self.stat.st_ctime == old.stat.st_ctime)
1508 1508 except AttributeError:
1509 1509 return False
1510 1510
1511 1511 def avoidambig(self, path, old):
1512 1512 """Change file stat of specified path to avoid ambiguity
1513 1513
1514 1514 'old' should be previous filestat of 'path'.
1515 1515
1516 1516 This skips avoiding ambiguity, if a process doesn't have
1517 1517 appropriate privileges for 'path'.
1518 1518 """
1519 1519 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1520 1520 try:
1521 1521 os.utime(path, (advanced, advanced))
1522 1522 except OSError as inst:
1523 1523 if inst.errno == errno.EPERM:
1524 1524 # utime() on the file created by another user causes EPERM,
1525 1525 # if a process doesn't have appropriate privileges
1526 1526 return
1527 1527 raise
1528 1528
1529 1529 def __ne__(self, other):
1530 1530 return not self == other
1531 1531
1532 1532 class atomictempfile(object):
1533 1533 '''writable file object that atomically updates a file
1534 1534
1535 1535 All writes will go to a temporary copy of the original file. Call
1536 1536 close() when you are done writing, and atomictempfile will rename
1537 1537 the temporary copy to the original name, making the changes
1538 1538 visible. If the object is destroyed without being closed, all your
1539 1539 writes are discarded.
1540 1540
1541 1541 checkambig argument of constructor is used with filestat, and is
1542 1542 useful only if target file is guarded by any lock (e.g. repo.lock
1543 1543 or repo.wlock).
1544 1544 '''
1545 1545 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1546 1546 self.__name = name # permanent name
1547 1547 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1548 1548 createmode=createmode)
1549 1549 self._fp = posixfile(self._tempname, mode)
1550 1550 self._checkambig = checkambig
1551 1551
1552 1552 # delegated methods
1553 1553 self.read = self._fp.read
1554 1554 self.write = self._fp.write
1555 1555 self.seek = self._fp.seek
1556 1556 self.tell = self._fp.tell
1557 1557 self.fileno = self._fp.fileno
1558 1558
1559 1559 def close(self):
1560 1560 if not self._fp.closed:
1561 1561 self._fp.close()
1562 1562 filename = localpath(self.__name)
1563 1563 oldstat = self._checkambig and filestat(filename)
1564 1564 if oldstat and oldstat.stat:
1565 1565 rename(self._tempname, filename)
1566 1566 newstat = filestat(filename)
1567 1567 if newstat.isambig(oldstat):
1568 1568 # stat of changed file is ambiguous to original one
1569 1569 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1570 1570 os.utime(filename, (advanced, advanced))
1571 1571 else:
1572 1572 rename(self._tempname, filename)
1573 1573
1574 1574 def discard(self):
1575 1575 if not self._fp.closed:
1576 1576 try:
1577 1577 os.unlink(self._tempname)
1578 1578 except OSError:
1579 1579 pass
1580 1580 self._fp.close()
1581 1581
1582 1582 def __del__(self):
1583 1583 if safehasattr(self, '_fp'): # constructor actually did something
1584 1584 self.discard()
1585 1585
1586 1586 def __enter__(self):
1587 1587 return self
1588 1588
1589 1589 def __exit__(self, exctype, excvalue, traceback):
1590 1590 if exctype is not None:
1591 1591 self.discard()
1592 1592 else:
1593 1593 self.close()
1594 1594
1595 1595 def makedirs(name, mode=None, notindexed=False):
1596 1596 """recursive directory creation with parent mode inheritance
1597 1597
1598 1598 Newly created directories are marked as "not to be indexed by
1599 1599 the content indexing service", if ``notindexed`` is specified
1600 1600 for "write" mode access.
1601 1601 """
1602 1602 try:
1603 1603 makedir(name, notindexed)
1604 1604 except OSError as err:
1605 1605 if err.errno == errno.EEXIST:
1606 1606 return
1607 1607 if err.errno != errno.ENOENT or not name:
1608 1608 raise
1609 1609 parent = os.path.dirname(os.path.abspath(name))
1610 1610 if parent == name:
1611 1611 raise
1612 1612 makedirs(parent, mode, notindexed)
1613 1613 try:
1614 1614 makedir(name, notindexed)
1615 1615 except OSError as err:
1616 1616 # Catch EEXIST to handle races
1617 1617 if err.errno == errno.EEXIST:
1618 1618 return
1619 1619 raise
1620 1620 if mode is not None:
1621 1621 os.chmod(name, mode)
1622 1622
1623 1623 def readfile(path):
1624 1624 with open(path, 'rb') as fp:
1625 1625 return fp.read()
1626 1626
1627 1627 def writefile(path, text):
1628 1628 with open(path, 'wb') as fp:
1629 1629 fp.write(text)
1630 1630
1631 1631 def appendfile(path, text):
1632 1632 with open(path, 'ab') as fp:
1633 1633 fp.write(text)
1634 1634
1635 1635 class chunkbuffer(object):
1636 1636 """Allow arbitrary sized chunks of data to be efficiently read from an
1637 1637 iterator over chunks of arbitrary size."""
1638 1638
1639 1639 def __init__(self, in_iter):
1640 1640 """in_iter is the iterator that's iterating over the input chunks.
1641 1641 targetsize is how big a buffer to try to maintain."""
1642 1642 def splitbig(chunks):
1643 1643 for chunk in chunks:
1644 1644 if len(chunk) > 2**20:
1645 1645 pos = 0
1646 1646 while pos < len(chunk):
1647 1647 end = pos + 2 ** 18
1648 1648 yield chunk[pos:end]
1649 1649 pos = end
1650 1650 else:
1651 1651 yield chunk
1652 1652 self.iter = splitbig(in_iter)
1653 1653 self._queue = collections.deque()
1654 1654 self._chunkoffset = 0
1655 1655
1656 1656 def read(self, l=None):
1657 1657 """Read L bytes of data from the iterator of chunks of data.
1658 1658 Returns less than L bytes if the iterator runs dry.
1659 1659
1660 1660 If size parameter is omitted, read everything"""
1661 1661 if l is None:
1662 1662 return ''.join(self.iter)
1663 1663
1664 1664 left = l
1665 1665 buf = []
1666 1666 queue = self._queue
1667 1667 while left > 0:
1668 1668 # refill the queue
1669 1669 if not queue:
1670 1670 target = 2**18
1671 1671 for chunk in self.iter:
1672 1672 queue.append(chunk)
1673 1673 target -= len(chunk)
1674 1674 if target <= 0:
1675 1675 break
1676 1676 if not queue:
1677 1677 break
1678 1678
1679 1679 # The easy way to do this would be to queue.popleft(), modify the
1680 1680 # chunk (if necessary), then queue.appendleft(). However, for cases
1681 1681 # where we read partial chunk content, this incurs 2 dequeue
1682 1682 # mutations and creates a new str for the remaining chunk in the
1683 1683 # queue. Our code below avoids this overhead.
1684 1684
1685 1685 chunk = queue[0]
1686 1686 chunkl = len(chunk)
1687 1687 offset = self._chunkoffset
1688 1688
1689 1689 # Use full chunk.
1690 1690 if offset == 0 and left >= chunkl:
1691 1691 left -= chunkl
1692 1692 queue.popleft()
1693 1693 buf.append(chunk)
1694 1694 # self._chunkoffset remains at 0.
1695 1695 continue
1696 1696
1697 1697 chunkremaining = chunkl - offset
1698 1698
1699 1699 # Use all of unconsumed part of chunk.
1700 1700 if left >= chunkremaining:
1701 1701 left -= chunkremaining
1702 1702 queue.popleft()
1703 1703 # offset == 0 is enabled by block above, so this won't merely
1704 1704 # copy via ``chunk[0:]``.
1705 1705 buf.append(chunk[offset:])
1706 1706 self._chunkoffset = 0
1707 1707
1708 1708 # Partial chunk needed.
1709 1709 else:
1710 1710 buf.append(chunk[offset:offset + left])
1711 1711 self._chunkoffset += left
1712 1712 left -= chunkremaining
1713 1713
1714 1714 return ''.join(buf)
1715 1715
1716 1716 def filechunkiter(f, size=131072, limit=None):
1717 1717 """Create a generator that produces the data in the file size
1718 1718 (default 131072) bytes at a time, up to optional limit (default is
1719 1719 to read all data). Chunks may be less than size bytes if the
1720 1720 chunk is the last chunk in the file, or the file is a socket or
1721 1721 some other type of file that sometimes reads less data than is
1722 1722 requested."""
1723 1723 assert size >= 0
1724 1724 assert limit is None or limit >= 0
1725 1725 while True:
1726 1726 if limit is None:
1727 1727 nbytes = size
1728 1728 else:
1729 1729 nbytes = min(limit, size)
1730 1730 s = nbytes and f.read(nbytes)
1731 1731 if not s:
1732 1732 break
1733 1733 if limit:
1734 1734 limit -= len(s)
1735 1735 yield s
1736 1736
1737 1737 def makedate(timestamp=None):
1738 1738 '''Return a unix timestamp (or the current time) as a (unixtime,
1739 1739 offset) tuple based off the local timezone.'''
1740 1740 if timestamp is None:
1741 1741 timestamp = time.time()
1742 1742 if timestamp < 0:
1743 1743 hint = _("check your clock")
1744 1744 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1745 1745 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1746 1746 datetime.datetime.fromtimestamp(timestamp))
1747 1747 tz = delta.days * 86400 + delta.seconds
1748 1748 return timestamp, tz
1749 1749
1750 1750 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1751 1751 """represent a (unixtime, offset) tuple as a localized time.
1752 1752 unixtime is seconds since the epoch, and offset is the time zone's
1753 1753 number of seconds away from UTC.
1754 1754
1755 1755 >>> datestr((0, 0))
1756 1756 'Thu Jan 01 00:00:00 1970 +0000'
1757 1757 >>> datestr((42, 0))
1758 1758 'Thu Jan 01 00:00:42 1970 +0000'
1759 1759 >>> datestr((-42, 0))
1760 1760 'Wed Dec 31 23:59:18 1969 +0000'
1761 1761 >>> datestr((0x7fffffff, 0))
1762 1762 'Tue Jan 19 03:14:07 2038 +0000'
1763 1763 >>> datestr((-0x80000000, 0))
1764 1764 'Fri Dec 13 20:45:52 1901 +0000'
1765 1765 """
1766 1766 t, tz = date or makedate()
1767 1767 if "%1" in format or "%2" in format or "%z" in format:
1768 1768 sign = (tz > 0) and "-" or "+"
1769 1769 minutes = abs(tz) // 60
1770 1770 q, r = divmod(minutes, 60)
1771 1771 format = format.replace("%z", "%1%2")
1772 1772 format = format.replace("%1", "%c%02d" % (sign, q))
1773 1773 format = format.replace("%2", "%02d" % r)
1774 1774 d = t - tz
1775 1775 if d > 0x7fffffff:
1776 1776 d = 0x7fffffff
1777 1777 elif d < -0x80000000:
1778 1778 d = -0x80000000
1779 1779 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1780 1780 # because they use the gmtime() system call which is buggy on Windows
1781 1781 # for negative values.
1782 1782 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1783 1783 s = t.strftime(format)
1784 1784 return s
1785 1785
1786 1786 def shortdate(date=None):
1787 1787 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1788 1788 return datestr(date, format='%Y-%m-%d')
1789 1789
1790 1790 def parsetimezone(s):
1791 1791 """find a trailing timezone, if any, in string, and return a
1792 1792 (offset, remainder) pair"""
1793 1793
1794 1794 if s.endswith("GMT") or s.endswith("UTC"):
1795 1795 return 0, s[:-3].rstrip()
1796 1796
1797 1797 # Unix-style timezones [+-]hhmm
1798 1798 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1799 1799 sign = (s[-5] == "+") and 1 or -1
1800 1800 hours = int(s[-4:-2])
1801 1801 minutes = int(s[-2:])
1802 1802 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1803 1803
1804 1804 # ISO8601 trailing Z
1805 1805 if s.endswith("Z") and s[-2:-1].isdigit():
1806 1806 return 0, s[:-1]
1807 1807
1808 1808 # ISO8601-style [+-]hh:mm
1809 1809 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1810 1810 s[-5:-3].isdigit() and s[-2:].isdigit()):
1811 1811 sign = (s[-6] == "+") and 1 or -1
1812 1812 hours = int(s[-5:-3])
1813 1813 minutes = int(s[-2:])
1814 1814 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1815 1815
1816 1816 return None, s
1817 1817
1818 1818 def strdate(string, format, defaults=[]):
1819 1819 """parse a localized time string and return a (unixtime, offset) tuple.
1820 1820 if the string cannot be parsed, ValueError is raised."""
1821 1821 # NOTE: unixtime = localunixtime + offset
1822 1822 offset, date = parsetimezone(string)
1823 1823
1824 1824 # add missing elements from defaults
1825 1825 usenow = False # default to using biased defaults
1826 1826 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1827 1827 found = [True for p in part if ("%"+p) in format]
1828 1828 if not found:
1829 1829 date += "@" + defaults[part][usenow]
1830 1830 format += "@%" + part[0]
1831 1831 else:
1832 1832 # We've found a specific time element, less specific time
1833 1833 # elements are relative to today
1834 1834 usenow = True
1835 1835
1836 1836 timetuple = time.strptime(date, format)
1837 1837 localunixtime = int(calendar.timegm(timetuple))
1838 1838 if offset is None:
1839 1839 # local timezone
1840 1840 unixtime = int(time.mktime(timetuple))
1841 1841 offset = unixtime - localunixtime
1842 1842 else:
1843 1843 unixtime = localunixtime + offset
1844 1844 return unixtime, offset
1845 1845
1846 1846 def parsedate(date, formats=None, bias=None):
1847 1847 """parse a localized date/time and return a (unixtime, offset) tuple.
1848 1848
1849 1849 The date may be a "unixtime offset" string or in one of the specified
1850 1850 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1851 1851
1852 1852 >>> parsedate(' today ') == parsedate(\
1853 1853 datetime.date.today().strftime('%b %d'))
1854 1854 True
1855 1855 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1856 1856 datetime.timedelta(days=1)\
1857 1857 ).strftime('%b %d'))
1858 1858 True
1859 1859 >>> now, tz = makedate()
1860 1860 >>> strnow, strtz = parsedate('now')
1861 1861 >>> (strnow - now) < 1
1862 1862 True
1863 1863 >>> tz == strtz
1864 1864 True
1865 1865 """
1866 1866 if bias is None:
1867 1867 bias = {}
1868 1868 if not date:
1869 1869 return 0, 0
1870 1870 if isinstance(date, tuple) and len(date) == 2:
1871 1871 return date
1872 1872 if not formats:
1873 1873 formats = defaultdateformats
1874 1874 date = date.strip()
1875 1875
1876 1876 if date == 'now' or date == _('now'):
1877 1877 return makedate()
1878 1878 if date == 'today' or date == _('today'):
1879 1879 date = datetime.date.today().strftime('%b %d')
1880 1880 elif date == 'yesterday' or date == _('yesterday'):
1881 1881 date = (datetime.date.today() -
1882 1882 datetime.timedelta(days=1)).strftime('%b %d')
1883 1883
1884 1884 try:
1885 1885 when, offset = map(int, date.split(' '))
1886 1886 except ValueError:
1887 1887 # fill out defaults
1888 1888 now = makedate()
1889 1889 defaults = {}
1890 1890 for part in ("d", "mb", "yY", "HI", "M", "S"):
1891 1891 # this piece is for rounding the specific end of unknowns
1892 1892 b = bias.get(part)
1893 1893 if b is None:
1894 1894 if part[0] in "HMS":
1895 1895 b = "00"
1896 1896 else:
1897 1897 b = "0"
1898 1898
1899 1899 # this piece is for matching the generic end to today's date
1900 1900 n = datestr(now, "%" + part[0])
1901 1901
1902 1902 defaults[part] = (b, n)
1903 1903
1904 1904 for format in formats:
1905 1905 try:
1906 1906 when, offset = strdate(date, format, defaults)
1907 1907 except (ValueError, OverflowError):
1908 1908 pass
1909 1909 else:
1910 1910 break
1911 1911 else:
1912 1912 raise Abort(_('invalid date: %r') % date)
1913 1913 # validate explicit (probably user-specified) date and
1914 1914 # time zone offset. values must fit in signed 32 bits for
1915 1915 # current 32-bit linux runtimes. timezones go from UTC-12
1916 1916 # to UTC+14
1917 1917 if when < -0x80000000 or when > 0x7fffffff:
1918 1918 raise Abort(_('date exceeds 32 bits: %d') % when)
1919 1919 if offset < -50400 or offset > 43200:
1920 1920 raise Abort(_('impossible time zone offset: %d') % offset)
1921 1921 return when, offset
1922 1922
1923 1923 def matchdate(date):
1924 1924 """Return a function that matches a given date match specifier
1925 1925
1926 1926 Formats include:
1927 1927
1928 1928 '{date}' match a given date to the accuracy provided
1929 1929
1930 1930 '<{date}' on or before a given date
1931 1931
1932 1932 '>{date}' on or after a given date
1933 1933
1934 1934 >>> p1 = parsedate("10:29:59")
1935 1935 >>> p2 = parsedate("10:30:00")
1936 1936 >>> p3 = parsedate("10:30:59")
1937 1937 >>> p4 = parsedate("10:31:00")
1938 1938 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1939 1939 >>> f = matchdate("10:30")
1940 1940 >>> f(p1[0])
1941 1941 False
1942 1942 >>> f(p2[0])
1943 1943 True
1944 1944 >>> f(p3[0])
1945 1945 True
1946 1946 >>> f(p4[0])
1947 1947 False
1948 1948 >>> f(p5[0])
1949 1949 False
1950 1950 """
1951 1951
1952 1952 def lower(date):
1953 1953 d = {'mb': "1", 'd': "1"}
1954 1954 return parsedate(date, extendeddateformats, d)[0]
1955 1955
1956 1956 def upper(date):
1957 1957 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1958 1958 for days in ("31", "30", "29"):
1959 1959 try:
1960 1960 d["d"] = days
1961 1961 return parsedate(date, extendeddateformats, d)[0]
1962 1962 except Abort:
1963 1963 pass
1964 1964 d["d"] = "28"
1965 1965 return parsedate(date, extendeddateformats, d)[0]
1966 1966
1967 1967 date = date.strip()
1968 1968
1969 1969 if not date:
1970 1970 raise Abort(_("dates cannot consist entirely of whitespace"))
1971 1971 elif date[0] == "<":
1972 1972 if not date[1:]:
1973 1973 raise Abort(_("invalid day spec, use '<DATE'"))
1974 1974 when = upper(date[1:])
1975 1975 return lambda x: x <= when
1976 1976 elif date[0] == ">":
1977 1977 if not date[1:]:
1978 1978 raise Abort(_("invalid day spec, use '>DATE'"))
1979 1979 when = lower(date[1:])
1980 1980 return lambda x: x >= when
1981 1981 elif date[0] == "-":
1982 1982 try:
1983 1983 days = int(date[1:])
1984 1984 except ValueError:
1985 1985 raise Abort(_("invalid day spec: %s") % date[1:])
1986 1986 if days < 0:
1987 1987 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1988 1988 % date[1:])
1989 1989 when = makedate()[0] - days * 3600 * 24
1990 1990 return lambda x: x >= when
1991 1991 elif " to " in date:
1992 1992 a, b = date.split(" to ")
1993 1993 start, stop = lower(a), upper(b)
1994 1994 return lambda x: x >= start and x <= stop
1995 1995 else:
1996 1996 start, stop = lower(date), upper(date)
1997 1997 return lambda x: x >= start and x <= stop
1998 1998
1999 def stringmatcher(pattern):
1999 def stringmatcher(pattern, casesensitive=True):
2000 2000 """
2001 2001 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2002 2002 returns the matcher name, pattern, and matcher function.
2003 2003 missing or unknown prefixes are treated as literal matches.
2004 2004
2005 2005 helper for tests:
2006 2006 >>> def test(pattern, *tests):
2007 2007 ... kind, pattern, matcher = stringmatcher(pattern)
2008 2008 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2009 >>> def itest(pattern, *tests):
2010 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2011 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2009 2012
2010 2013 exact matching (no prefix):
2011 2014 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2012 2015 ('literal', 'abcdefg', [False, False, True])
2013 2016
2014 2017 regex matching ('re:' prefix)
2015 2018 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2016 2019 ('re', 'a.+b', [False, False, True])
2017 2020
2018 2021 force exact matches ('literal:' prefix)
2019 2022 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2020 2023 ('literal', 're:foobar', [False, True])
2021 2024
2022 2025 unknown prefixes are ignored and treated as literals
2023 2026 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2024 2027 ('literal', 'foo:bar', [False, False, True])
2028
2029 case insensitive regex matches
2030 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2031 ('re', 'A.+b', [False, False, True])
2032
2033 case insensitive literal matches
2034 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2035 ('literal', 'ABCDEFG', [False, False, True])
2025 2036 """
2026 2037 if pattern.startswith('re:'):
2027 2038 pattern = pattern[3:]
2028 2039 try:
2029 regex = remod.compile(pattern)
2040 flags = 0
2041 if not casesensitive:
2042 flags = remod.I
2043 regex = remod.compile(pattern, flags)
2030 2044 except remod.error as e:
2031 2045 raise error.ParseError(_('invalid regular expression: %s')
2032 2046 % e)
2033 2047 return 're', pattern, regex.search
2034 2048 elif pattern.startswith('literal:'):
2035 2049 pattern = pattern[8:]
2036 return 'literal', pattern, pattern.__eq__
2050
2051 match = pattern.__eq__
2052
2053 if not casesensitive:
2054 ipat = encoding.lower(pattern)
2055 match = lambda s: ipat == encoding.lower(s)
2056 return 'literal', pattern, match
2037 2057
2038 2058 def shortuser(user):
2039 2059 """Return a short representation of a user name or email address."""
2040 2060 f = user.find('@')
2041 2061 if f >= 0:
2042 2062 user = user[:f]
2043 2063 f = user.find('<')
2044 2064 if f >= 0:
2045 2065 user = user[f + 1:]
2046 2066 f = user.find(' ')
2047 2067 if f >= 0:
2048 2068 user = user[:f]
2049 2069 f = user.find('.')
2050 2070 if f >= 0:
2051 2071 user = user[:f]
2052 2072 return user
2053 2073
2054 2074 def emailuser(user):
2055 2075 """Return the user portion of an email address."""
2056 2076 f = user.find('@')
2057 2077 if f >= 0:
2058 2078 user = user[:f]
2059 2079 f = user.find('<')
2060 2080 if f >= 0:
2061 2081 user = user[f + 1:]
2062 2082 return user
2063 2083
2064 2084 def email(author):
2065 2085 '''get email of author.'''
2066 2086 r = author.find('>')
2067 2087 if r == -1:
2068 2088 r = None
2069 2089 return author[author.find('<') + 1:r]
2070 2090
2071 2091 def ellipsis(text, maxlength=400):
2072 2092 """Trim string to at most maxlength (default: 400) columns in display."""
2073 2093 return encoding.trim(text, maxlength, ellipsis='...')
2074 2094
2075 2095 def unitcountfn(*unittable):
2076 2096 '''return a function that renders a readable count of some quantity'''
2077 2097
2078 2098 def go(count):
2079 2099 for multiplier, divisor, format in unittable:
2080 2100 if count >= divisor * multiplier:
2081 2101 return format % (count / float(divisor))
2082 2102 return unittable[-1][2] % count
2083 2103
2084 2104 return go
2085 2105
2086 2106 bytecount = unitcountfn(
2087 2107 (100, 1 << 30, _('%.0f GB')),
2088 2108 (10, 1 << 30, _('%.1f GB')),
2089 2109 (1, 1 << 30, _('%.2f GB')),
2090 2110 (100, 1 << 20, _('%.0f MB')),
2091 2111 (10, 1 << 20, _('%.1f MB')),
2092 2112 (1, 1 << 20, _('%.2f MB')),
2093 2113 (100, 1 << 10, _('%.0f KB')),
2094 2114 (10, 1 << 10, _('%.1f KB')),
2095 2115 (1, 1 << 10, _('%.2f KB')),
2096 2116 (1, 1, _('%.0f bytes')),
2097 2117 )
2098 2118
2099 2119 def uirepr(s):
2100 2120 # Avoid double backslash in Windows path repr()
2101 2121 return repr(s).replace('\\\\', '\\')
2102 2122
2103 2123 # delay import of textwrap
2104 2124 def MBTextWrapper(**kwargs):
2105 2125 class tw(textwrap.TextWrapper):
2106 2126 """
2107 2127 Extend TextWrapper for width-awareness.
2108 2128
2109 2129 Neither number of 'bytes' in any encoding nor 'characters' is
2110 2130 appropriate to calculate terminal columns for specified string.
2111 2131
2112 2132 Original TextWrapper implementation uses built-in 'len()' directly,
2113 2133 so overriding is needed to use width information of each characters.
2114 2134
2115 2135 In addition, characters classified into 'ambiguous' width are
2116 2136 treated as wide in East Asian area, but as narrow in other.
2117 2137
2118 2138 This requires use decision to determine width of such characters.
2119 2139 """
2120 2140 def _cutdown(self, ucstr, space_left):
2121 2141 l = 0
2122 2142 colwidth = encoding.ucolwidth
2123 2143 for i in xrange(len(ucstr)):
2124 2144 l += colwidth(ucstr[i])
2125 2145 if space_left < l:
2126 2146 return (ucstr[:i], ucstr[i:])
2127 2147 return ucstr, ''
2128 2148
2129 2149 # overriding of base class
2130 2150 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2131 2151 space_left = max(width - cur_len, 1)
2132 2152
2133 2153 if self.break_long_words:
2134 2154 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2135 2155 cur_line.append(cut)
2136 2156 reversed_chunks[-1] = res
2137 2157 elif not cur_line:
2138 2158 cur_line.append(reversed_chunks.pop())
2139 2159
2140 2160 # this overriding code is imported from TextWrapper of Python 2.6
2141 2161 # to calculate columns of string by 'encoding.ucolwidth()'
2142 2162 def _wrap_chunks(self, chunks):
2143 2163 colwidth = encoding.ucolwidth
2144 2164
2145 2165 lines = []
2146 2166 if self.width <= 0:
2147 2167 raise ValueError("invalid width %r (must be > 0)" % self.width)
2148 2168
2149 2169 # Arrange in reverse order so items can be efficiently popped
2150 2170 # from a stack of chucks.
2151 2171 chunks.reverse()
2152 2172
2153 2173 while chunks:
2154 2174
2155 2175 # Start the list of chunks that will make up the current line.
2156 2176 # cur_len is just the length of all the chunks in cur_line.
2157 2177 cur_line = []
2158 2178 cur_len = 0
2159 2179
2160 2180 # Figure out which static string will prefix this line.
2161 2181 if lines:
2162 2182 indent = self.subsequent_indent
2163 2183 else:
2164 2184 indent = self.initial_indent
2165 2185
2166 2186 # Maximum width for this line.
2167 2187 width = self.width - len(indent)
2168 2188
2169 2189 # First chunk on line is whitespace -- drop it, unless this
2170 2190 # is the very beginning of the text (i.e. no lines started yet).
2171 2191 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2172 2192 del chunks[-1]
2173 2193
2174 2194 while chunks:
2175 2195 l = colwidth(chunks[-1])
2176 2196
2177 2197 # Can at least squeeze this chunk onto the current line.
2178 2198 if cur_len + l <= width:
2179 2199 cur_line.append(chunks.pop())
2180 2200 cur_len += l
2181 2201
2182 2202 # Nope, this line is full.
2183 2203 else:
2184 2204 break
2185 2205
2186 2206 # The current line is full, and the next chunk is too big to
2187 2207 # fit on *any* line (not just this one).
2188 2208 if chunks and colwidth(chunks[-1]) > width:
2189 2209 self._handle_long_word(chunks, cur_line, cur_len, width)
2190 2210
2191 2211 # If the last chunk on this line is all whitespace, drop it.
2192 2212 if (self.drop_whitespace and
2193 2213 cur_line and cur_line[-1].strip() == ''):
2194 2214 del cur_line[-1]
2195 2215
2196 2216 # Convert current line back to a string and store it in list
2197 2217 # of all lines (return value).
2198 2218 if cur_line:
2199 2219 lines.append(indent + ''.join(cur_line))
2200 2220
2201 2221 return lines
2202 2222
2203 2223 global MBTextWrapper
2204 2224 MBTextWrapper = tw
2205 2225 return tw(**kwargs)
2206 2226
2207 2227 def wrap(line, width, initindent='', hangindent=''):
2208 2228 maxindent = max(len(hangindent), len(initindent))
2209 2229 if width <= maxindent:
2210 2230 # adjust for weird terminal size
2211 2231 width = max(78, maxindent + 1)
2212 2232 line = line.decode(encoding.encoding, encoding.encodingmode)
2213 2233 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2214 2234 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2215 2235 wrapper = MBTextWrapper(width=width,
2216 2236 initial_indent=initindent,
2217 2237 subsequent_indent=hangindent)
2218 2238 return wrapper.fill(line).encode(encoding.encoding)
2219 2239
2220 2240 if (pyplatform.python_implementation() == 'CPython' and
2221 2241 sys.version_info < (3, 0)):
2222 2242 # There is an issue in CPython that some IO methods do not handle EINTR
2223 2243 # correctly. The following table shows what CPython version (and functions)
2224 2244 # are affected (buggy: has the EINTR bug, okay: otherwise):
2225 2245 #
2226 2246 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2227 2247 # --------------------------------------------------
2228 2248 # fp.__iter__ | buggy | buggy | okay
2229 2249 # fp.read* | buggy | okay [1] | okay
2230 2250 #
2231 2251 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2232 2252 #
2233 2253 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2234 2254 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2235 2255 #
2236 2256 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2237 2257 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2238 2258 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2239 2259 # fp.__iter__ but not other fp.read* methods.
2240 2260 #
2241 2261 # On modern systems like Linux, the "read" syscall cannot be interrupted
2242 2262 # when reading "fast" files like on-disk files. So the EINTR issue only
2243 2263 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2244 2264 # files approximately as "fast" files and use the fast (unsafe) code path,
2245 2265 # to minimize the performance impact.
2246 2266 if sys.version_info >= (2, 7, 4):
2247 2267 # fp.readline deals with EINTR correctly, use it as a workaround.
2248 2268 def _safeiterfile(fp):
2249 2269 return iter(fp.readline, '')
2250 2270 else:
2251 2271 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2252 2272 # note: this may block longer than necessary because of bufsize.
2253 2273 def _safeiterfile(fp, bufsize=4096):
2254 2274 fd = fp.fileno()
2255 2275 line = ''
2256 2276 while True:
2257 2277 try:
2258 2278 buf = os.read(fd, bufsize)
2259 2279 except OSError as ex:
2260 2280 # os.read only raises EINTR before any data is read
2261 2281 if ex.errno == errno.EINTR:
2262 2282 continue
2263 2283 else:
2264 2284 raise
2265 2285 line += buf
2266 2286 if '\n' in buf:
2267 2287 splitted = line.splitlines(True)
2268 2288 line = ''
2269 2289 for l in splitted:
2270 2290 if l[-1] == '\n':
2271 2291 yield l
2272 2292 else:
2273 2293 line = l
2274 2294 if not buf:
2275 2295 break
2276 2296 if line:
2277 2297 yield line
2278 2298
2279 2299 def iterfile(fp):
2280 2300 fastpath = True
2281 2301 if type(fp) is file:
2282 2302 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2283 2303 if fastpath:
2284 2304 return fp
2285 2305 else:
2286 2306 return _safeiterfile(fp)
2287 2307 else:
2288 2308 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2289 2309 def iterfile(fp):
2290 2310 return fp
2291 2311
2292 2312 def iterlines(iterator):
2293 2313 for chunk in iterator:
2294 2314 for line in chunk.splitlines():
2295 2315 yield line
2296 2316
2297 2317 def expandpath(path):
2298 2318 return os.path.expanduser(os.path.expandvars(path))
2299 2319
2300 2320 def hgcmd():
2301 2321 """Return the command used to execute current hg
2302 2322
2303 2323 This is different from hgexecutable() because on Windows we want
2304 2324 to avoid things opening new shell windows like batch files, so we
2305 2325 get either the python call or current executable.
2306 2326 """
2307 2327 if mainfrozen():
2308 2328 if getattr(sys, 'frozen', None) == 'macosx_app':
2309 2329 # Env variable set by py2app
2310 2330 return [encoding.environ['EXECUTABLEPATH']]
2311 2331 else:
2312 2332 return [pycompat.sysexecutable]
2313 2333 return gethgcmd()
2314 2334
2315 2335 def rundetached(args, condfn):
2316 2336 """Execute the argument list in a detached process.
2317 2337
2318 2338 condfn is a callable which is called repeatedly and should return
2319 2339 True once the child process is known to have started successfully.
2320 2340 At this point, the child process PID is returned. If the child
2321 2341 process fails to start or finishes before condfn() evaluates to
2322 2342 True, return -1.
2323 2343 """
2324 2344 # Windows case is easier because the child process is either
2325 2345 # successfully starting and validating the condition or exiting
2326 2346 # on failure. We just poll on its PID. On Unix, if the child
2327 2347 # process fails to start, it will be left in a zombie state until
2328 2348 # the parent wait on it, which we cannot do since we expect a long
2329 2349 # running process on success. Instead we listen for SIGCHLD telling
2330 2350 # us our child process terminated.
2331 2351 terminated = set()
2332 2352 def handler(signum, frame):
2333 2353 terminated.add(os.wait())
2334 2354 prevhandler = None
2335 2355 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2336 2356 if SIGCHLD is not None:
2337 2357 prevhandler = signal.signal(SIGCHLD, handler)
2338 2358 try:
2339 2359 pid = spawndetached(args)
2340 2360 while not condfn():
2341 2361 if ((pid in terminated or not testpid(pid))
2342 2362 and not condfn()):
2343 2363 return -1
2344 2364 time.sleep(0.1)
2345 2365 return pid
2346 2366 finally:
2347 2367 if prevhandler is not None:
2348 2368 signal.signal(signal.SIGCHLD, prevhandler)
2349 2369
2350 2370 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2351 2371 """Return the result of interpolating items in the mapping into string s.
2352 2372
2353 2373 prefix is a single character string, or a two character string with
2354 2374 a backslash as the first character if the prefix needs to be escaped in
2355 2375 a regular expression.
2356 2376
2357 2377 fn is an optional function that will be applied to the replacement text
2358 2378 just before replacement.
2359 2379
2360 2380 escape_prefix is an optional flag that allows using doubled prefix for
2361 2381 its escaping.
2362 2382 """
2363 2383 fn = fn or (lambda s: s)
2364 2384 patterns = '|'.join(mapping.keys())
2365 2385 if escape_prefix:
2366 2386 patterns += '|' + prefix
2367 2387 if len(prefix) > 1:
2368 2388 prefix_char = prefix[1:]
2369 2389 else:
2370 2390 prefix_char = prefix
2371 2391 mapping[prefix_char] = prefix_char
2372 2392 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2373 2393 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2374 2394
2375 2395 def getport(port):
2376 2396 """Return the port for a given network service.
2377 2397
2378 2398 If port is an integer, it's returned as is. If it's a string, it's
2379 2399 looked up using socket.getservbyname(). If there's no matching
2380 2400 service, error.Abort is raised.
2381 2401 """
2382 2402 try:
2383 2403 return int(port)
2384 2404 except ValueError:
2385 2405 pass
2386 2406
2387 2407 try:
2388 2408 return socket.getservbyname(port)
2389 2409 except socket.error:
2390 2410 raise Abort(_("no port number associated with service '%s'") % port)
2391 2411
2392 2412 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2393 2413 '0': False, 'no': False, 'false': False, 'off': False,
2394 2414 'never': False}
2395 2415
2396 2416 def parsebool(s):
2397 2417 """Parse s into a boolean.
2398 2418
2399 2419 If s is not a valid boolean, returns None.
2400 2420 """
2401 2421 return _booleans.get(s.lower(), None)
2402 2422
2403 2423 _hextochr = dict((a + b, chr(int(a + b, 16)))
2404 2424 for a in string.hexdigits for b in string.hexdigits)
2405 2425
2406 2426 class url(object):
2407 2427 r"""Reliable URL parser.
2408 2428
2409 2429 This parses URLs and provides attributes for the following
2410 2430 components:
2411 2431
2412 2432 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2413 2433
2414 2434 Missing components are set to None. The only exception is
2415 2435 fragment, which is set to '' if present but empty.
2416 2436
2417 2437 If parsefragment is False, fragment is included in query. If
2418 2438 parsequery is False, query is included in path. If both are
2419 2439 False, both fragment and query are included in path.
2420 2440
2421 2441 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2422 2442
2423 2443 Note that for backward compatibility reasons, bundle URLs do not
2424 2444 take host names. That means 'bundle://../' has a path of '../'.
2425 2445
2426 2446 Examples:
2427 2447
2428 2448 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2429 2449 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2430 2450 >>> url('ssh://[::1]:2200//home/joe/repo')
2431 2451 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2432 2452 >>> url('file:///home/joe/repo')
2433 2453 <url scheme: 'file', path: '/home/joe/repo'>
2434 2454 >>> url('file:///c:/temp/foo/')
2435 2455 <url scheme: 'file', path: 'c:/temp/foo/'>
2436 2456 >>> url('bundle:foo')
2437 2457 <url scheme: 'bundle', path: 'foo'>
2438 2458 >>> url('bundle://../foo')
2439 2459 <url scheme: 'bundle', path: '../foo'>
2440 2460 >>> url(r'c:\foo\bar')
2441 2461 <url path: 'c:\\foo\\bar'>
2442 2462 >>> url(r'\\blah\blah\blah')
2443 2463 <url path: '\\\\blah\\blah\\blah'>
2444 2464 >>> url(r'\\blah\blah\blah#baz')
2445 2465 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2446 2466 >>> url(r'file:///C:\users\me')
2447 2467 <url scheme: 'file', path: 'C:\\users\\me'>
2448 2468
2449 2469 Authentication credentials:
2450 2470
2451 2471 >>> url('ssh://joe:xyz@x/repo')
2452 2472 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2453 2473 >>> url('ssh://joe@x/repo')
2454 2474 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2455 2475
2456 2476 Query strings and fragments:
2457 2477
2458 2478 >>> url('http://host/a?b#c')
2459 2479 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2460 2480 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2461 2481 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2462 2482
2463 2483 Empty path:
2464 2484
2465 2485 >>> url('')
2466 2486 <url path: ''>
2467 2487 >>> url('#a')
2468 2488 <url path: '', fragment: 'a'>
2469 2489 >>> url('http://host/')
2470 2490 <url scheme: 'http', host: 'host', path: ''>
2471 2491 >>> url('http://host/#a')
2472 2492 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2473 2493
2474 2494 Only scheme:
2475 2495
2476 2496 >>> url('http:')
2477 2497 <url scheme: 'http'>
2478 2498 """
2479 2499
2480 2500 _safechars = "!~*'()+"
2481 2501 _safepchars = "/!~*'()+:\\"
2482 2502 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2483 2503
2484 2504 def __init__(self, path, parsequery=True, parsefragment=True):
2485 2505 # We slowly chomp away at path until we have only the path left
2486 2506 self.scheme = self.user = self.passwd = self.host = None
2487 2507 self.port = self.path = self.query = self.fragment = None
2488 2508 self._localpath = True
2489 2509 self._hostport = ''
2490 2510 self._origpath = path
2491 2511
2492 2512 if parsefragment and '#' in path:
2493 2513 path, self.fragment = path.split('#', 1)
2494 2514
2495 2515 # special case for Windows drive letters and UNC paths
2496 2516 if hasdriveletter(path) or path.startswith('\\\\'):
2497 2517 self.path = path
2498 2518 return
2499 2519
2500 2520 # For compatibility reasons, we can't handle bundle paths as
2501 2521 # normal URLS
2502 2522 if path.startswith('bundle:'):
2503 2523 self.scheme = 'bundle'
2504 2524 path = path[7:]
2505 2525 if path.startswith('//'):
2506 2526 path = path[2:]
2507 2527 self.path = path
2508 2528 return
2509 2529
2510 2530 if self._matchscheme(path):
2511 2531 parts = path.split(':', 1)
2512 2532 if parts[0]:
2513 2533 self.scheme, path = parts
2514 2534 self._localpath = False
2515 2535
2516 2536 if not path:
2517 2537 path = None
2518 2538 if self._localpath:
2519 2539 self.path = ''
2520 2540 return
2521 2541 else:
2522 2542 if self._localpath:
2523 2543 self.path = path
2524 2544 return
2525 2545
2526 2546 if parsequery and '?' in path:
2527 2547 path, self.query = path.split('?', 1)
2528 2548 if not path:
2529 2549 path = None
2530 2550 if not self.query:
2531 2551 self.query = None
2532 2552
2533 2553 # // is required to specify a host/authority
2534 2554 if path and path.startswith('//'):
2535 2555 parts = path[2:].split('/', 1)
2536 2556 if len(parts) > 1:
2537 2557 self.host, path = parts
2538 2558 else:
2539 2559 self.host = parts[0]
2540 2560 path = None
2541 2561 if not self.host:
2542 2562 self.host = None
2543 2563 # path of file:///d is /d
2544 2564 # path of file:///d:/ is d:/, not /d:/
2545 2565 if path and not hasdriveletter(path):
2546 2566 path = '/' + path
2547 2567
2548 2568 if self.host and '@' in self.host:
2549 2569 self.user, self.host = self.host.rsplit('@', 1)
2550 2570 if ':' in self.user:
2551 2571 self.user, self.passwd = self.user.split(':', 1)
2552 2572 if not self.host:
2553 2573 self.host = None
2554 2574
2555 2575 # Don't split on colons in IPv6 addresses without ports
2556 2576 if (self.host and ':' in self.host and
2557 2577 not (self.host.startswith('[') and self.host.endswith(']'))):
2558 2578 self._hostport = self.host
2559 2579 self.host, self.port = self.host.rsplit(':', 1)
2560 2580 if not self.host:
2561 2581 self.host = None
2562 2582
2563 2583 if (self.host and self.scheme == 'file' and
2564 2584 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2565 2585 raise Abort(_('file:// URLs can only refer to localhost'))
2566 2586
2567 2587 self.path = path
2568 2588
2569 2589 # leave the query string escaped
2570 2590 for a in ('user', 'passwd', 'host', 'port',
2571 2591 'path', 'fragment'):
2572 2592 v = getattr(self, a)
2573 2593 if v is not None:
2574 2594 setattr(self, a, pycompat.urlunquote(v))
2575 2595
2576 2596 def __repr__(self):
2577 2597 attrs = []
2578 2598 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2579 2599 'query', 'fragment'):
2580 2600 v = getattr(self, a)
2581 2601 if v is not None:
2582 2602 attrs.append('%s: %r' % (a, v))
2583 2603 return '<url %s>' % ', '.join(attrs)
2584 2604
2585 2605 def __str__(self):
2586 2606 r"""Join the URL's components back into a URL string.
2587 2607
2588 2608 Examples:
2589 2609
2590 2610 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2591 2611 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2592 2612 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2593 2613 'http://user:pw@host:80/?foo=bar&baz=42'
2594 2614 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2595 2615 'http://user:pw@host:80/?foo=bar%3dbaz'
2596 2616 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2597 2617 'ssh://user:pw@[::1]:2200//home/joe#'
2598 2618 >>> str(url('http://localhost:80//'))
2599 2619 'http://localhost:80//'
2600 2620 >>> str(url('http://localhost:80/'))
2601 2621 'http://localhost:80/'
2602 2622 >>> str(url('http://localhost:80'))
2603 2623 'http://localhost:80/'
2604 2624 >>> str(url('bundle:foo'))
2605 2625 'bundle:foo'
2606 2626 >>> str(url('bundle://../foo'))
2607 2627 'bundle:../foo'
2608 2628 >>> str(url('path'))
2609 2629 'path'
2610 2630 >>> str(url('file:///tmp/foo/bar'))
2611 2631 'file:///tmp/foo/bar'
2612 2632 >>> str(url('file:///c:/tmp/foo/bar'))
2613 2633 'file:///c:/tmp/foo/bar'
2614 2634 >>> print url(r'bundle:foo\bar')
2615 2635 bundle:foo\bar
2616 2636 >>> print url(r'file:///D:\data\hg')
2617 2637 file:///D:\data\hg
2618 2638 """
2619 2639 if self._localpath:
2620 2640 s = self.path
2621 2641 if self.scheme == 'bundle':
2622 2642 s = 'bundle:' + s
2623 2643 if self.fragment:
2624 2644 s += '#' + self.fragment
2625 2645 return s
2626 2646
2627 2647 s = self.scheme + ':'
2628 2648 if self.user or self.passwd or self.host:
2629 2649 s += '//'
2630 2650 elif self.scheme and (not self.path or self.path.startswith('/')
2631 2651 or hasdriveletter(self.path)):
2632 2652 s += '//'
2633 2653 if hasdriveletter(self.path):
2634 2654 s += '/'
2635 2655 if self.user:
2636 2656 s += urlreq.quote(self.user, safe=self._safechars)
2637 2657 if self.passwd:
2638 2658 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2639 2659 if self.user or self.passwd:
2640 2660 s += '@'
2641 2661 if self.host:
2642 2662 if not (self.host.startswith('[') and self.host.endswith(']')):
2643 2663 s += urlreq.quote(self.host)
2644 2664 else:
2645 2665 s += self.host
2646 2666 if self.port:
2647 2667 s += ':' + urlreq.quote(self.port)
2648 2668 if self.host:
2649 2669 s += '/'
2650 2670 if self.path:
2651 2671 # TODO: similar to the query string, we should not unescape the
2652 2672 # path when we store it, the path might contain '%2f' = '/',
2653 2673 # which we should *not* escape.
2654 2674 s += urlreq.quote(self.path, safe=self._safepchars)
2655 2675 if self.query:
2656 2676 # we store the query in escaped form.
2657 2677 s += '?' + self.query
2658 2678 if self.fragment is not None:
2659 2679 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2660 2680 return s
2661 2681
2662 2682 def authinfo(self):
2663 2683 user, passwd = self.user, self.passwd
2664 2684 try:
2665 2685 self.user, self.passwd = None, None
2666 2686 s = str(self)
2667 2687 finally:
2668 2688 self.user, self.passwd = user, passwd
2669 2689 if not self.user:
2670 2690 return (s, None)
2671 2691 # authinfo[1] is passed to urllib2 password manager, and its
2672 2692 # URIs must not contain credentials. The host is passed in the
2673 2693 # URIs list because Python < 2.4.3 uses only that to search for
2674 2694 # a password.
2675 2695 return (s, (None, (s, self.host),
2676 2696 self.user, self.passwd or ''))
2677 2697
2678 2698 def isabs(self):
2679 2699 if self.scheme and self.scheme != 'file':
2680 2700 return True # remote URL
2681 2701 if hasdriveletter(self.path):
2682 2702 return True # absolute for our purposes - can't be joined()
2683 2703 if self.path.startswith(r'\\'):
2684 2704 return True # Windows UNC path
2685 2705 if self.path.startswith('/'):
2686 2706 return True # POSIX-style
2687 2707 return False
2688 2708
2689 2709 def localpath(self):
2690 2710 if self.scheme == 'file' or self.scheme == 'bundle':
2691 2711 path = self.path or '/'
2692 2712 # For Windows, we need to promote hosts containing drive
2693 2713 # letters to paths with drive letters.
2694 2714 if hasdriveletter(self._hostport):
2695 2715 path = self._hostport + '/' + self.path
2696 2716 elif (self.host is not None and self.path
2697 2717 and not hasdriveletter(path)):
2698 2718 path = '/' + path
2699 2719 return path
2700 2720 return self._origpath
2701 2721
2702 2722 def islocal(self):
2703 2723 '''whether localpath will return something that posixfile can open'''
2704 2724 return (not self.scheme or self.scheme == 'file'
2705 2725 or self.scheme == 'bundle')
2706 2726
2707 2727 def hasscheme(path):
2708 2728 return bool(url(path).scheme)
2709 2729
2710 2730 def hasdriveletter(path):
2711 2731 return path and path[1:2] == ':' and path[0:1].isalpha()
2712 2732
2713 2733 def urllocalpath(path):
2714 2734 return url(path, parsequery=False, parsefragment=False).localpath()
2715 2735
2716 2736 def hidepassword(u):
2717 2737 '''hide user credential in a url string'''
2718 2738 u = url(u)
2719 2739 if u.passwd:
2720 2740 u.passwd = '***'
2721 2741 return str(u)
2722 2742
2723 2743 def removeauth(u):
2724 2744 '''remove all authentication information from a url string'''
2725 2745 u = url(u)
2726 2746 u.user = u.passwd = None
2727 2747 return str(u)
2728 2748
2729 2749 def isatty(fp):
2730 2750 try:
2731 2751 return fp.isatty()
2732 2752 except AttributeError:
2733 2753 return False
2734 2754
2735 2755 timecount = unitcountfn(
2736 2756 (1, 1e3, _('%.0f s')),
2737 2757 (100, 1, _('%.1f s')),
2738 2758 (10, 1, _('%.2f s')),
2739 2759 (1, 1, _('%.3f s')),
2740 2760 (100, 0.001, _('%.1f ms')),
2741 2761 (10, 0.001, _('%.2f ms')),
2742 2762 (1, 0.001, _('%.3f ms')),
2743 2763 (100, 0.000001, _('%.1f us')),
2744 2764 (10, 0.000001, _('%.2f us')),
2745 2765 (1, 0.000001, _('%.3f us')),
2746 2766 (100, 0.000000001, _('%.1f ns')),
2747 2767 (10, 0.000000001, _('%.2f ns')),
2748 2768 (1, 0.000000001, _('%.3f ns')),
2749 2769 )
2750 2770
2751 2771 _timenesting = [0]
2752 2772
2753 2773 def timed(func):
2754 2774 '''Report the execution time of a function call to stderr.
2755 2775
2756 2776 During development, use as a decorator when you need to measure
2757 2777 the cost of a function, e.g. as follows:
2758 2778
2759 2779 @util.timed
2760 2780 def foo(a, b, c):
2761 2781 pass
2762 2782 '''
2763 2783
2764 2784 def wrapper(*args, **kwargs):
2765 2785 start = time.time()
2766 2786 indent = 2
2767 2787 _timenesting[0] += indent
2768 2788 try:
2769 2789 return func(*args, **kwargs)
2770 2790 finally:
2771 2791 elapsed = time.time() - start
2772 2792 _timenesting[0] -= indent
2773 2793 stderr.write('%s%s: %s\n' %
2774 2794 (' ' * _timenesting[0], func.__name__,
2775 2795 timecount(elapsed)))
2776 2796 return wrapper
2777 2797
2778 2798 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2779 2799 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2780 2800
2781 2801 def sizetoint(s):
2782 2802 '''Convert a space specifier to a byte count.
2783 2803
2784 2804 >>> sizetoint('30')
2785 2805 30
2786 2806 >>> sizetoint('2.2kb')
2787 2807 2252
2788 2808 >>> sizetoint('6M')
2789 2809 6291456
2790 2810 '''
2791 2811 t = s.strip().lower()
2792 2812 try:
2793 2813 for k, u in _sizeunits:
2794 2814 if t.endswith(k):
2795 2815 return int(float(t[:-len(k)]) * u)
2796 2816 return int(t)
2797 2817 except ValueError:
2798 2818 raise error.ParseError(_("couldn't parse size: %s") % s)
2799 2819
2800 2820 class hooks(object):
2801 2821 '''A collection of hook functions that can be used to extend a
2802 2822 function's behavior. Hooks are called in lexicographic order,
2803 2823 based on the names of their sources.'''
2804 2824
2805 2825 def __init__(self):
2806 2826 self._hooks = []
2807 2827
2808 2828 def add(self, source, hook):
2809 2829 self._hooks.append((source, hook))
2810 2830
2811 2831 def __call__(self, *args):
2812 2832 self._hooks.sort(key=lambda x: x[0])
2813 2833 results = []
2814 2834 for source, hook in self._hooks:
2815 2835 results.append(hook(*args))
2816 2836 return results
2817 2837
2818 2838 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2819 2839 '''Yields lines for a nicely formatted stacktrace.
2820 2840 Skips the 'skip' last entries.
2821 2841 Each file+linenumber is formatted according to fileline.
2822 2842 Each line is formatted according to line.
2823 2843 If line is None, it yields:
2824 2844 length of longest filepath+line number,
2825 2845 filepath+linenumber,
2826 2846 function
2827 2847
2828 2848 Not be used in production code but very convenient while developing.
2829 2849 '''
2830 2850 entries = [(fileline % (fn, ln), func)
2831 2851 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2832 2852 if entries:
2833 2853 fnmax = max(len(entry[0]) for entry in entries)
2834 2854 for fnln, func in entries:
2835 2855 if line is None:
2836 2856 yield (fnmax, fnln, func)
2837 2857 else:
2838 2858 yield line % (fnmax, fnln, func)
2839 2859
2840 2860 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2841 2861 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2842 2862 Skips the 'skip' last entries. By default it will flush stdout first.
2843 2863 It can be used everywhere and intentionally does not require an ui object.
2844 2864 Not be used in production code but very convenient while developing.
2845 2865 '''
2846 2866 if otherf:
2847 2867 otherf.flush()
2848 2868 f.write('%s at:\n' % msg)
2849 2869 for line in getstackframes(skip + 1):
2850 2870 f.write(line)
2851 2871 f.flush()
2852 2872
2853 2873 class dirs(object):
2854 2874 '''a multiset of directory names from a dirstate or manifest'''
2855 2875
2856 2876 def __init__(self, map, skip=None):
2857 2877 self._dirs = {}
2858 2878 addpath = self.addpath
2859 2879 if safehasattr(map, 'iteritems') and skip is not None:
2860 2880 for f, s in map.iteritems():
2861 2881 if s[0] != skip:
2862 2882 addpath(f)
2863 2883 else:
2864 2884 for f in map:
2865 2885 addpath(f)
2866 2886
2867 2887 def addpath(self, path):
2868 2888 dirs = self._dirs
2869 2889 for base in finddirs(path):
2870 2890 if base in dirs:
2871 2891 dirs[base] += 1
2872 2892 return
2873 2893 dirs[base] = 1
2874 2894
2875 2895 def delpath(self, path):
2876 2896 dirs = self._dirs
2877 2897 for base in finddirs(path):
2878 2898 if dirs[base] > 1:
2879 2899 dirs[base] -= 1
2880 2900 return
2881 2901 del dirs[base]
2882 2902
2883 2903 def __iter__(self):
2884 2904 return self._dirs.iterkeys()
2885 2905
2886 2906 def __contains__(self, d):
2887 2907 return d in self._dirs
2888 2908
2889 2909 if safehasattr(parsers, 'dirs'):
2890 2910 dirs = parsers.dirs
2891 2911
2892 2912 def finddirs(path):
2893 2913 pos = path.rfind('/')
2894 2914 while pos != -1:
2895 2915 yield path[:pos]
2896 2916 pos = path.rfind('/', 0, pos)
2897 2917
2898 2918 class ctxmanager(object):
2899 2919 '''A context manager for use in 'with' blocks to allow multiple
2900 2920 contexts to be entered at once. This is both safer and more
2901 2921 flexible than contextlib.nested.
2902 2922
2903 2923 Once Mercurial supports Python 2.7+, this will become mostly
2904 2924 unnecessary.
2905 2925 '''
2906 2926
2907 2927 def __init__(self, *args):
2908 2928 '''Accepts a list of no-argument functions that return context
2909 2929 managers. These will be invoked at __call__ time.'''
2910 2930 self._pending = args
2911 2931 self._atexit = []
2912 2932
2913 2933 def __enter__(self):
2914 2934 return self
2915 2935
2916 2936 def enter(self):
2917 2937 '''Create and enter context managers in the order in which they were
2918 2938 passed to the constructor.'''
2919 2939 values = []
2920 2940 for func in self._pending:
2921 2941 obj = func()
2922 2942 values.append(obj.__enter__())
2923 2943 self._atexit.append(obj.__exit__)
2924 2944 del self._pending
2925 2945 return values
2926 2946
2927 2947 def atexit(self, func, *args, **kwargs):
2928 2948 '''Add a function to call when this context manager exits. The
2929 2949 ordering of multiple atexit calls is unspecified, save that
2930 2950 they will happen before any __exit__ functions.'''
2931 2951 def wrapper(exc_type, exc_val, exc_tb):
2932 2952 func(*args, **kwargs)
2933 2953 self._atexit.append(wrapper)
2934 2954 return func
2935 2955
2936 2956 def __exit__(self, exc_type, exc_val, exc_tb):
2937 2957 '''Context managers are exited in the reverse order from which
2938 2958 they were created.'''
2939 2959 received = exc_type is not None
2940 2960 suppressed = False
2941 2961 pending = None
2942 2962 self._atexit.reverse()
2943 2963 for exitfunc in self._atexit:
2944 2964 try:
2945 2965 if exitfunc(exc_type, exc_val, exc_tb):
2946 2966 suppressed = True
2947 2967 exc_type = None
2948 2968 exc_val = None
2949 2969 exc_tb = None
2950 2970 except BaseException:
2951 2971 pending = sys.exc_info()
2952 2972 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2953 2973 del self._atexit
2954 2974 if pending:
2955 2975 raise exc_val
2956 2976 return received and suppressed
2957 2977
2958 2978 # compression code
2959 2979
2960 2980 SERVERROLE = 'server'
2961 2981 CLIENTROLE = 'client'
2962 2982
2963 2983 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
2964 2984 (u'name', u'serverpriority',
2965 2985 u'clientpriority'))
2966 2986
2967 2987 class compressormanager(object):
2968 2988 """Holds registrations of various compression engines.
2969 2989
2970 2990 This class essentially abstracts the differences between compression
2971 2991 engines to allow new compression formats to be added easily, possibly from
2972 2992 extensions.
2973 2993
2974 2994 Compressors are registered against the global instance by calling its
2975 2995 ``register()`` method.
2976 2996 """
2977 2997 def __init__(self):
2978 2998 self._engines = {}
2979 2999 # Bundle spec human name to engine name.
2980 3000 self._bundlenames = {}
2981 3001 # Internal bundle identifier to engine name.
2982 3002 self._bundletypes = {}
2983 3003 # Wire proto identifier to engine name.
2984 3004 self._wiretypes = {}
2985 3005
2986 3006 def __getitem__(self, key):
2987 3007 return self._engines[key]
2988 3008
2989 3009 def __contains__(self, key):
2990 3010 return key in self._engines
2991 3011
2992 3012 def __iter__(self):
2993 3013 return iter(self._engines.keys())
2994 3014
2995 3015 def register(self, engine):
2996 3016 """Register a compression engine with the manager.
2997 3017
2998 3018 The argument must be a ``compressionengine`` instance.
2999 3019 """
3000 3020 if not isinstance(engine, compressionengine):
3001 3021 raise ValueError(_('argument must be a compressionengine'))
3002 3022
3003 3023 name = engine.name()
3004 3024
3005 3025 if name in self._engines:
3006 3026 raise error.Abort(_('compression engine %s already registered') %
3007 3027 name)
3008 3028
3009 3029 bundleinfo = engine.bundletype()
3010 3030 if bundleinfo:
3011 3031 bundlename, bundletype = bundleinfo
3012 3032
3013 3033 if bundlename in self._bundlenames:
3014 3034 raise error.Abort(_('bundle name %s already registered') %
3015 3035 bundlename)
3016 3036 if bundletype in self._bundletypes:
3017 3037 raise error.Abort(_('bundle type %s already registered by %s') %
3018 3038 (bundletype, self._bundletypes[bundletype]))
3019 3039
3020 3040 # No external facing name declared.
3021 3041 if bundlename:
3022 3042 self._bundlenames[bundlename] = name
3023 3043
3024 3044 self._bundletypes[bundletype] = name
3025 3045
3026 3046 wiresupport = engine.wireprotosupport()
3027 3047 if wiresupport:
3028 3048 wiretype = wiresupport.name
3029 3049 if wiretype in self._wiretypes:
3030 3050 raise error.Abort(_('wire protocol compression %s already '
3031 3051 'registered by %s') %
3032 3052 (wiretype, self._wiretypes[wiretype]))
3033 3053
3034 3054 self._wiretypes[wiretype] = name
3035 3055
3036 3056 self._engines[name] = engine
3037 3057
3038 3058 @property
3039 3059 def supportedbundlenames(self):
3040 3060 return set(self._bundlenames.keys())
3041 3061
3042 3062 @property
3043 3063 def supportedbundletypes(self):
3044 3064 return set(self._bundletypes.keys())
3045 3065
3046 3066 def forbundlename(self, bundlename):
3047 3067 """Obtain a compression engine registered to a bundle name.
3048 3068
3049 3069 Will raise KeyError if the bundle type isn't registered.
3050 3070
3051 3071 Will abort if the engine is known but not available.
3052 3072 """
3053 3073 engine = self._engines[self._bundlenames[bundlename]]
3054 3074 if not engine.available():
3055 3075 raise error.Abort(_('compression engine %s could not be loaded') %
3056 3076 engine.name())
3057 3077 return engine
3058 3078
3059 3079 def forbundletype(self, bundletype):
3060 3080 """Obtain a compression engine registered to a bundle type.
3061 3081
3062 3082 Will raise KeyError if the bundle type isn't registered.
3063 3083
3064 3084 Will abort if the engine is known but not available.
3065 3085 """
3066 3086 engine = self._engines[self._bundletypes[bundletype]]
3067 3087 if not engine.available():
3068 3088 raise error.Abort(_('compression engine %s could not be loaded') %
3069 3089 engine.name())
3070 3090 return engine
3071 3091
3072 3092 def supportedwireengines(self, role, onlyavailable=True):
3073 3093 """Obtain compression engines that support the wire protocol.
3074 3094
3075 3095 Returns a list of engines in prioritized order, most desired first.
3076 3096
3077 3097 If ``onlyavailable`` is set, filter out engines that can't be
3078 3098 loaded.
3079 3099 """
3080 3100 assert role in (SERVERROLE, CLIENTROLE)
3081 3101
3082 3102 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3083 3103
3084 3104 engines = [self._engines[e] for e in self._wiretypes.values()]
3085 3105 if onlyavailable:
3086 3106 engines = [e for e in engines if e.available()]
3087 3107
3088 3108 def getkey(e):
3089 3109 # Sort first by priority, highest first. In case of tie, sort
3090 3110 # alphabetically. This is arbitrary, but ensures output is
3091 3111 # stable.
3092 3112 w = e.wireprotosupport()
3093 3113 return -1 * getattr(w, attr), w.name
3094 3114
3095 3115 return list(sorted(engines, key=getkey))
3096 3116
3097 3117 def forwiretype(self, wiretype):
3098 3118 engine = self._engines[self._wiretypes[wiretype]]
3099 3119 if not engine.available():
3100 3120 raise error.Abort(_('compression engine %s could not be loaded') %
3101 3121 engine.name())
3102 3122 return engine
3103 3123
3104 3124 compengines = compressormanager()
3105 3125
3106 3126 class compressionengine(object):
3107 3127 """Base class for compression engines.
3108 3128
3109 3129 Compression engines must implement the interface defined by this class.
3110 3130 """
3111 3131 def name(self):
3112 3132 """Returns the name of the compression engine.
3113 3133
3114 3134 This is the key the engine is registered under.
3115 3135
3116 3136 This method must be implemented.
3117 3137 """
3118 3138 raise NotImplementedError()
3119 3139
3120 3140 def available(self):
3121 3141 """Whether the compression engine is available.
3122 3142
3123 3143 The intent of this method is to allow optional compression engines
3124 3144 that may not be available in all installations (such as engines relying
3125 3145 on C extensions that may not be present).
3126 3146 """
3127 3147 return True
3128 3148
3129 3149 def bundletype(self):
3130 3150 """Describes bundle identifiers for this engine.
3131 3151
3132 3152 If this compression engine isn't supported for bundles, returns None.
3133 3153
3134 3154 If this engine can be used for bundles, returns a 2-tuple of strings of
3135 3155 the user-facing "bundle spec" compression name and an internal
3136 3156 identifier used to denote the compression format within bundles. To
3137 3157 exclude the name from external usage, set the first element to ``None``.
3138 3158
3139 3159 If bundle compression is supported, the class must also implement
3140 3160 ``compressstream`` and `decompressorreader``.
3141 3161 """
3142 3162 return None
3143 3163
3144 3164 def wireprotosupport(self):
3145 3165 """Declare support for this compression format on the wire protocol.
3146 3166
3147 3167 If this compression engine isn't supported for compressing wire
3148 3168 protocol payloads, returns None.
3149 3169
3150 3170 Otherwise, returns ``compenginewireprotosupport`` with the following
3151 3171 fields:
3152 3172
3153 3173 * String format identifier
3154 3174 * Integer priority for the server
3155 3175 * Integer priority for the client
3156 3176
3157 3177 The integer priorities are used to order the advertisement of format
3158 3178 support by server and client. The highest integer is advertised
3159 3179 first. Integers with non-positive values aren't advertised.
3160 3180
3161 3181 The priority values are somewhat arbitrary and only used for default
3162 3182 ordering. The relative order can be changed via config options.
3163 3183
3164 3184 If wire protocol compression is supported, the class must also implement
3165 3185 ``compressstream`` and ``decompressorreader``.
3166 3186 """
3167 3187 return None
3168 3188
3169 3189 def compressstream(self, it, opts=None):
3170 3190 """Compress an iterator of chunks.
3171 3191
3172 3192 The method receives an iterator (ideally a generator) of chunks of
3173 3193 bytes to be compressed. It returns an iterator (ideally a generator)
3174 3194 of bytes of chunks representing the compressed output.
3175 3195
3176 3196 Optionally accepts an argument defining how to perform compression.
3177 3197 Each engine treats this argument differently.
3178 3198 """
3179 3199 raise NotImplementedError()
3180 3200
3181 3201 def decompressorreader(self, fh):
3182 3202 """Perform decompression on a file object.
3183 3203
3184 3204 Argument is an object with a ``read(size)`` method that returns
3185 3205 compressed data. Return value is an object with a ``read(size)`` that
3186 3206 returns uncompressed data.
3187 3207 """
3188 3208 raise NotImplementedError()
3189 3209
3190 3210 class _zlibengine(compressionengine):
3191 3211 def name(self):
3192 3212 return 'zlib'
3193 3213
3194 3214 def bundletype(self):
3195 3215 return 'gzip', 'GZ'
3196 3216
3197 3217 def wireprotosupport(self):
3198 3218 return compewireprotosupport('zlib', 20, 20)
3199 3219
3200 3220 def compressstream(self, it, opts=None):
3201 3221 opts = opts or {}
3202 3222
3203 3223 z = zlib.compressobj(opts.get('level', -1))
3204 3224 for chunk in it:
3205 3225 data = z.compress(chunk)
3206 3226 # Not all calls to compress emit data. It is cheaper to inspect
3207 3227 # here than to feed empty chunks through generator.
3208 3228 if data:
3209 3229 yield data
3210 3230
3211 3231 yield z.flush()
3212 3232
3213 3233 def decompressorreader(self, fh):
3214 3234 def gen():
3215 3235 d = zlib.decompressobj()
3216 3236 for chunk in filechunkiter(fh):
3217 3237 while chunk:
3218 3238 # Limit output size to limit memory.
3219 3239 yield d.decompress(chunk, 2 ** 18)
3220 3240 chunk = d.unconsumed_tail
3221 3241
3222 3242 return chunkbuffer(gen())
3223 3243
3224 3244 compengines.register(_zlibengine())
3225 3245
3226 3246 class _bz2engine(compressionengine):
3227 3247 def name(self):
3228 3248 return 'bz2'
3229 3249
3230 3250 def bundletype(self):
3231 3251 return 'bzip2', 'BZ'
3232 3252
3233 3253 # We declare a protocol name but don't advertise by default because
3234 3254 # it is slow.
3235 3255 def wireprotosupport(self):
3236 3256 return compewireprotosupport('bzip2', 0, 0)
3237 3257
3238 3258 def compressstream(self, it, opts=None):
3239 3259 opts = opts or {}
3240 3260 z = bz2.BZ2Compressor(opts.get('level', 9))
3241 3261 for chunk in it:
3242 3262 data = z.compress(chunk)
3243 3263 if data:
3244 3264 yield data
3245 3265
3246 3266 yield z.flush()
3247 3267
3248 3268 def decompressorreader(self, fh):
3249 3269 def gen():
3250 3270 d = bz2.BZ2Decompressor()
3251 3271 for chunk in filechunkiter(fh):
3252 3272 yield d.decompress(chunk)
3253 3273
3254 3274 return chunkbuffer(gen())
3255 3275
3256 3276 compengines.register(_bz2engine())
3257 3277
3258 3278 class _truncatedbz2engine(compressionengine):
3259 3279 def name(self):
3260 3280 return 'bz2truncated'
3261 3281
3262 3282 def bundletype(self):
3263 3283 return None, '_truncatedBZ'
3264 3284
3265 3285 # We don't implement compressstream because it is hackily handled elsewhere.
3266 3286
3267 3287 def decompressorreader(self, fh):
3268 3288 def gen():
3269 3289 # The input stream doesn't have the 'BZ' header. So add it back.
3270 3290 d = bz2.BZ2Decompressor()
3271 3291 d.decompress('BZ')
3272 3292 for chunk in filechunkiter(fh):
3273 3293 yield d.decompress(chunk)
3274 3294
3275 3295 return chunkbuffer(gen())
3276 3296
3277 3297 compengines.register(_truncatedbz2engine())
3278 3298
3279 3299 class _noopengine(compressionengine):
3280 3300 def name(self):
3281 3301 return 'none'
3282 3302
3283 3303 def bundletype(self):
3284 3304 return 'none', 'UN'
3285 3305
3286 3306 # Clients always support uncompressed payloads. Servers don't because
3287 3307 # unless you are on a fast network, uncompressed payloads can easily
3288 3308 # saturate your network pipe.
3289 3309 def wireprotosupport(self):
3290 3310 return compewireprotosupport('none', 0, 10)
3291 3311
3292 3312 def compressstream(self, it, opts=None):
3293 3313 return it
3294 3314
3295 3315 def decompressorreader(self, fh):
3296 3316 return fh
3297 3317
3298 3318 compengines.register(_noopengine())
3299 3319
3300 3320 class _zstdengine(compressionengine):
3301 3321 def name(self):
3302 3322 return 'zstd'
3303 3323
3304 3324 @propertycache
3305 3325 def _module(self):
3306 3326 # Not all installs have the zstd module available. So defer importing
3307 3327 # until first access.
3308 3328 try:
3309 3329 from . import zstd
3310 3330 # Force delayed import.
3311 3331 zstd.__version__
3312 3332 return zstd
3313 3333 except ImportError:
3314 3334 return None
3315 3335
3316 3336 def available(self):
3317 3337 return bool(self._module)
3318 3338
3319 3339 def bundletype(self):
3320 3340 return 'zstd', 'ZS'
3321 3341
3322 3342 def wireprotosupport(self):
3323 3343 return compewireprotosupport('zstd', 50, 50)
3324 3344
3325 3345 def compressstream(self, it, opts=None):
3326 3346 opts = opts or {}
3327 3347 # zstd level 3 is almost always significantly faster than zlib
3328 3348 # while providing no worse compression. It strikes a good balance
3329 3349 # between speed and compression.
3330 3350 level = opts.get('level', 3)
3331 3351
3332 3352 zstd = self._module
3333 3353 z = zstd.ZstdCompressor(level=level).compressobj()
3334 3354 for chunk in it:
3335 3355 data = z.compress(chunk)
3336 3356 if data:
3337 3357 yield data
3338 3358
3339 3359 yield z.flush()
3340 3360
3341 3361 def decompressorreader(self, fh):
3342 3362 zstd = self._module
3343 3363 dctx = zstd.ZstdDecompressor()
3344 3364 return chunkbuffer(dctx.read_from(fh))
3345 3365
3346 3366 compengines.register(_zstdengine())
3347 3367
3348 3368 # convenient shortcut
3349 3369 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now