##// END OF EJS Templates
util: add tryunlink function...
Ryan McElroy -
r31540:6d5b77ab default
parent child Browse files
Show More
@@ -1,3580 +1,3588 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlparse = pycompat.urlparse
64 64 urlreq = pycompat.urlreq
65 65 xmlrpclib = pycompat.xmlrpclib
66 66
67 67 def isatty(fp):
68 68 try:
69 69 return fp.isatty()
70 70 except AttributeError:
71 71 return False
72 72
73 73 # glibc determines buffering on first write to stdout - if we replace a TTY
74 74 # destined stdout with a pipe destined stdout (e.g. pager), we want line
75 75 # buffering
76 76 if isatty(stdout):
77 77 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
78 78
79 79 if pycompat.osname == 'nt':
80 80 from . import windows as platform
81 81 stdout = platform.winstdout(stdout)
82 82 else:
83 83 from . import posix as platform
84 84
85 85 _ = i18n._
86 86
87 87 bindunixsocket = platform.bindunixsocket
88 88 cachestat = platform.cachestat
89 89 checkexec = platform.checkexec
90 90 checklink = platform.checklink
91 91 copymode = platform.copymode
92 92 executablepath = platform.executablepath
93 93 expandglobs = platform.expandglobs
94 94 explainexit = platform.explainexit
95 95 findexe = platform.findexe
96 96 gethgcmd = platform.gethgcmd
97 97 getuser = platform.getuser
98 98 getpid = os.getpid
99 99 groupmembers = platform.groupmembers
100 100 groupname = platform.groupname
101 101 hidewindow = platform.hidewindow
102 102 isexec = platform.isexec
103 103 isowner = platform.isowner
104 104 localpath = platform.localpath
105 105 lookupreg = platform.lookupreg
106 106 makedir = platform.makedir
107 107 nlinks = platform.nlinks
108 108 normpath = platform.normpath
109 109 normcase = platform.normcase
110 110 normcasespec = platform.normcasespec
111 111 normcasefallback = platform.normcasefallback
112 112 openhardlinks = platform.openhardlinks
113 113 oslink = platform.oslink
114 114 parsepatchoutput = platform.parsepatchoutput
115 115 pconvert = platform.pconvert
116 116 poll = platform.poll
117 117 popen = platform.popen
118 118 posixfile = platform.posixfile
119 119 quotecommand = platform.quotecommand
120 120 readpipe = platform.readpipe
121 121 rename = platform.rename
122 122 removedirs = platform.removedirs
123 123 samedevice = platform.samedevice
124 124 samefile = platform.samefile
125 125 samestat = platform.samestat
126 126 setbinary = platform.setbinary
127 127 setflags = platform.setflags
128 128 setsignalhandler = platform.setsignalhandler
129 129 shellquote = platform.shellquote
130 130 spawndetached = platform.spawndetached
131 131 split = platform.split
132 132 sshargs = platform.sshargs
133 133 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
134 134 statisexec = platform.statisexec
135 135 statislink = platform.statislink
136 136 testpid = platform.testpid
137 137 umask = platform.umask
138 138 unlink = platform.unlink
139 139 username = platform.username
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 # disable Python's problematic floating point timestamps (issue4836)
146 146 # (Python hypocritically says you shouldn't change this behavior in
147 147 # libraries, and sure enough Mercurial is not a library.)
148 148 os.stat_float_times(False)
149 149
150 150 def safehasattr(thing, attr):
151 151 return getattr(thing, attr, _notset) is not _notset
152 152
153 153 def bitsfrom(container):
154 154 bits = 0
155 155 for bit in container:
156 156 bits |= bit
157 157 return bits
158 158
159 159 DIGESTS = {
160 160 'md5': hashlib.md5,
161 161 'sha1': hashlib.sha1,
162 162 'sha512': hashlib.sha512,
163 163 }
164 164 # List of digest types from strongest to weakest
165 165 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
166 166
167 167 for k in DIGESTS_BY_STRENGTH:
168 168 assert k in DIGESTS
169 169
170 170 class digester(object):
171 171 """helper to compute digests.
172 172
173 173 This helper can be used to compute one or more digests given their name.
174 174
175 175 >>> d = digester(['md5', 'sha1'])
176 176 >>> d.update('foo')
177 177 >>> [k for k in sorted(d)]
178 178 ['md5', 'sha1']
179 179 >>> d['md5']
180 180 'acbd18db4cc2f85cedef654fccc4a4d8'
181 181 >>> d['sha1']
182 182 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
183 183 >>> digester.preferred(['md5', 'sha1'])
184 184 'sha1'
185 185 """
186 186
187 187 def __init__(self, digests, s=''):
188 188 self._hashes = {}
189 189 for k in digests:
190 190 if k not in DIGESTS:
191 191 raise Abort(_('unknown digest type: %s') % k)
192 192 self._hashes[k] = DIGESTS[k]()
193 193 if s:
194 194 self.update(s)
195 195
196 196 def update(self, data):
197 197 for h in self._hashes.values():
198 198 h.update(data)
199 199
200 200 def __getitem__(self, key):
201 201 if key not in DIGESTS:
202 202 raise Abort(_('unknown digest type: %s') % k)
203 203 return self._hashes[key].hexdigest()
204 204
205 205 def __iter__(self):
206 206 return iter(self._hashes)
207 207
208 208 @staticmethod
209 209 def preferred(supported):
210 210 """returns the strongest digest type in both supported and DIGESTS."""
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 if k in supported:
214 214 return k
215 215 return None
216 216
217 217 class digestchecker(object):
218 218 """file handle wrapper that additionally checks content against a given
219 219 size and digests.
220 220
221 221 d = digestchecker(fh, size, {'md5': '...'})
222 222
223 223 When multiple digests are given, all of them are validated.
224 224 """
225 225
226 226 def __init__(self, fh, size, digests):
227 227 self._fh = fh
228 228 self._size = size
229 229 self._got = 0
230 230 self._digests = dict(digests)
231 231 self._digester = digester(self._digests.keys())
232 232
233 233 def read(self, length=-1):
234 234 content = self._fh.read(length)
235 235 self._digester.update(content)
236 236 self._got += len(content)
237 237 return content
238 238
239 239 def validate(self):
240 240 if self._size != self._got:
241 241 raise Abort(_('size mismatch: expected %d, got %d') %
242 242 (self._size, self._got))
243 243 for k, v in self._digests.items():
244 244 if v != self._digester[k]:
245 245 # i18n: first parameter is a digest name
246 246 raise Abort(_('%s mismatch: expected %s, got %s') %
247 247 (k, v, self._digester[k]))
248 248
249 249 try:
250 250 buffer = buffer
251 251 except NameError:
252 252 if not pycompat.ispy3:
253 253 def buffer(sliceable, offset=0, length=None):
254 254 if length is not None:
255 255 return sliceable[offset:offset + length]
256 256 return sliceable[offset:]
257 257 else:
258 258 def buffer(sliceable, offset=0, length=None):
259 259 if length is not None:
260 260 return memoryview(sliceable)[offset:offset + length]
261 261 return memoryview(sliceable)[offset:]
262 262
263 263 closefds = pycompat.osname == 'posix'
264 264
265 265 _chunksize = 4096
266 266
267 267 class bufferedinputpipe(object):
268 268 """a manually buffered input pipe
269 269
270 270 Python will not let us use buffered IO and lazy reading with 'polling' at
271 271 the same time. We cannot probe the buffer state and select will not detect
272 272 that data are ready to read if they are already buffered.
273 273
274 274 This class let us work around that by implementing its own buffering
275 275 (allowing efficient readline) while offering a way to know if the buffer is
276 276 empty from the output (allowing collaboration of the buffer with polling).
277 277
278 278 This class lives in the 'util' module because it makes use of the 'os'
279 279 module from the python stdlib.
280 280 """
281 281
282 282 def __init__(self, input):
283 283 self._input = input
284 284 self._buffer = []
285 285 self._eof = False
286 286 self._lenbuf = 0
287 287
288 288 @property
289 289 def hasbuffer(self):
290 290 """True is any data is currently buffered
291 291
292 292 This will be used externally a pre-step for polling IO. If there is
293 293 already data then no polling should be set in place."""
294 294 return bool(self._buffer)
295 295
296 296 @property
297 297 def closed(self):
298 298 return self._input.closed
299 299
300 300 def fileno(self):
301 301 return self._input.fileno()
302 302
303 303 def close(self):
304 304 return self._input.close()
305 305
306 306 def read(self, size):
307 307 while (not self._eof) and (self._lenbuf < size):
308 308 self._fillbuffer()
309 309 return self._frombuffer(size)
310 310
311 311 def readline(self, *args, **kwargs):
312 312 if 1 < len(self._buffer):
313 313 # this should not happen because both read and readline end with a
314 314 # _frombuffer call that collapse it.
315 315 self._buffer = [''.join(self._buffer)]
316 316 self._lenbuf = len(self._buffer[0])
317 317 lfi = -1
318 318 if self._buffer:
319 319 lfi = self._buffer[-1].find('\n')
320 320 while (not self._eof) and lfi < 0:
321 321 self._fillbuffer()
322 322 if self._buffer:
323 323 lfi = self._buffer[-1].find('\n')
324 324 size = lfi + 1
325 325 if lfi < 0: # end of file
326 326 size = self._lenbuf
327 327 elif 1 < len(self._buffer):
328 328 # we need to take previous chunks into account
329 329 size += self._lenbuf - len(self._buffer[-1])
330 330 return self._frombuffer(size)
331 331
332 332 def _frombuffer(self, size):
333 333 """return at most 'size' data from the buffer
334 334
335 335 The data are removed from the buffer."""
336 336 if size == 0 or not self._buffer:
337 337 return ''
338 338 buf = self._buffer[0]
339 339 if 1 < len(self._buffer):
340 340 buf = ''.join(self._buffer)
341 341
342 342 data = buf[:size]
343 343 buf = buf[len(data):]
344 344 if buf:
345 345 self._buffer = [buf]
346 346 self._lenbuf = len(buf)
347 347 else:
348 348 self._buffer = []
349 349 self._lenbuf = 0
350 350 return data
351 351
352 352 def _fillbuffer(self):
353 353 """read data to the buffer"""
354 354 data = os.read(self._input.fileno(), _chunksize)
355 355 if not data:
356 356 self._eof = True
357 357 else:
358 358 self._lenbuf += len(data)
359 359 self._buffer.append(data)
360 360
361 361 def popen2(cmd, env=None, newlines=False):
362 362 # Setting bufsize to -1 lets the system decide the buffer size.
363 363 # The default for bufsize is 0, meaning unbuffered. This leads to
364 364 # poor performance on Mac OS X: http://bugs.python.org/issue4194
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 universal_newlines=newlines,
369 369 env=env)
370 370 return p.stdin, p.stdout
371 371
372 372 def popen3(cmd, env=None, newlines=False):
373 373 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
374 374 return stdin, stdout, stderr
375 375
376 376 def popen4(cmd, env=None, newlines=False, bufsize=-1):
377 377 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
378 378 close_fds=closefds,
379 379 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
380 380 stderr=subprocess.PIPE,
381 381 universal_newlines=newlines,
382 382 env=env)
383 383 return p.stdin, p.stdout, p.stderr, p
384 384
385 385 def version():
386 386 """Return version information if available."""
387 387 try:
388 388 from . import __version__
389 389 return __version__.version
390 390 except ImportError:
391 391 return 'unknown'
392 392
393 393 def versiontuple(v=None, n=4):
394 394 """Parses a Mercurial version string into an N-tuple.
395 395
396 396 The version string to be parsed is specified with the ``v`` argument.
397 397 If it isn't defined, the current Mercurial version string will be parsed.
398 398
399 399 ``n`` can be 2, 3, or 4. Here is how some version strings map to
400 400 returned values:
401 401
402 402 >>> v = '3.6.1+190-df9b73d2d444'
403 403 >>> versiontuple(v, 2)
404 404 (3, 6)
405 405 >>> versiontuple(v, 3)
406 406 (3, 6, 1)
407 407 >>> versiontuple(v, 4)
408 408 (3, 6, 1, '190-df9b73d2d444')
409 409
410 410 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
411 411 (3, 6, 1, '190-df9b73d2d444+20151118')
412 412
413 413 >>> v = '3.6'
414 414 >>> versiontuple(v, 2)
415 415 (3, 6)
416 416 >>> versiontuple(v, 3)
417 417 (3, 6, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 6, None, None)
420 420
421 421 >>> v = '3.9-rc'
422 422 >>> versiontuple(v, 2)
423 423 (3, 9)
424 424 >>> versiontuple(v, 3)
425 425 (3, 9, None)
426 426 >>> versiontuple(v, 4)
427 427 (3, 9, None, 'rc')
428 428
429 429 >>> v = '3.9-rc+2-02a8fea4289b'
430 430 >>> versiontuple(v, 2)
431 431 (3, 9)
432 432 >>> versiontuple(v, 3)
433 433 (3, 9, None)
434 434 >>> versiontuple(v, 4)
435 435 (3, 9, None, 'rc+2-02a8fea4289b')
436 436 """
437 437 if not v:
438 438 v = version()
439 439 parts = remod.split('[\+-]', v, 1)
440 440 if len(parts) == 1:
441 441 vparts, extra = parts[0], None
442 442 else:
443 443 vparts, extra = parts
444 444
445 445 vints = []
446 446 for i in vparts.split('.'):
447 447 try:
448 448 vints.append(int(i))
449 449 except ValueError:
450 450 break
451 451 # (3, 6) -> (3, 6, None)
452 452 while len(vints) < 3:
453 453 vints.append(None)
454 454
455 455 if n == 2:
456 456 return (vints[0], vints[1])
457 457 if n == 3:
458 458 return (vints[0], vints[1], vints[2])
459 459 if n == 4:
460 460 return (vints[0], vints[1], vints[2], extra)
461 461
462 462 # used by parsedate
463 463 defaultdateformats = (
464 464 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
465 465 '%Y-%m-%dT%H:%M', # without seconds
466 466 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
467 467 '%Y-%m-%dT%H%M', # without seconds
468 468 '%Y-%m-%d %H:%M:%S', # our common legal variant
469 469 '%Y-%m-%d %H:%M', # without seconds
470 470 '%Y-%m-%d %H%M%S', # without :
471 471 '%Y-%m-%d %H%M', # without seconds
472 472 '%Y-%m-%d %I:%M:%S%p',
473 473 '%Y-%m-%d %H:%M',
474 474 '%Y-%m-%d %I:%M%p',
475 475 '%Y-%m-%d',
476 476 '%m-%d',
477 477 '%m/%d',
478 478 '%m/%d/%y',
479 479 '%m/%d/%Y',
480 480 '%a %b %d %H:%M:%S %Y',
481 481 '%a %b %d %I:%M:%S%p %Y',
482 482 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
483 483 '%b %d %H:%M:%S %Y',
484 484 '%b %d %I:%M:%S%p %Y',
485 485 '%b %d %H:%M:%S',
486 486 '%b %d %I:%M:%S%p',
487 487 '%b %d %H:%M',
488 488 '%b %d %I:%M%p',
489 489 '%b %d %Y',
490 490 '%b %d',
491 491 '%H:%M:%S',
492 492 '%I:%M:%S%p',
493 493 '%H:%M',
494 494 '%I:%M%p',
495 495 )
496 496
497 497 extendeddateformats = defaultdateformats + (
498 498 "%Y",
499 499 "%Y-%m",
500 500 "%b",
501 501 "%b %Y",
502 502 )
503 503
504 504 def cachefunc(func):
505 505 '''cache the result of function calls'''
506 506 # XXX doesn't handle keywords args
507 507 if func.__code__.co_argcount == 0:
508 508 cache = []
509 509 def f():
510 510 if len(cache) == 0:
511 511 cache.append(func())
512 512 return cache[0]
513 513 return f
514 514 cache = {}
515 515 if func.__code__.co_argcount == 1:
516 516 # we gain a small amount of time because
517 517 # we don't need to pack/unpack the list
518 518 def f(arg):
519 519 if arg not in cache:
520 520 cache[arg] = func(arg)
521 521 return cache[arg]
522 522 else:
523 523 def f(*args):
524 524 if args not in cache:
525 525 cache[args] = func(*args)
526 526 return cache[args]
527 527
528 528 return f
529 529
530 530 class sortdict(dict):
531 531 '''a simple sorted dictionary'''
532 532 def __init__(self, data=None):
533 533 self._list = []
534 534 if data:
535 535 self.update(data)
536 536 def copy(self):
537 537 return sortdict(self)
538 538 def __setitem__(self, key, val):
539 539 if key in self:
540 540 self._list.remove(key)
541 541 self._list.append(key)
542 542 dict.__setitem__(self, key, val)
543 543 def __iter__(self):
544 544 return self._list.__iter__()
545 545 def update(self, src):
546 546 if isinstance(src, dict):
547 547 src = src.iteritems()
548 548 for k, v in src:
549 549 self[k] = v
550 550 def clear(self):
551 551 dict.clear(self)
552 552 self._list = []
553 553 def items(self):
554 554 return [(k, self[k]) for k in self._list]
555 555 def __delitem__(self, key):
556 556 dict.__delitem__(self, key)
557 557 self._list.remove(key)
558 558 def pop(self, key, *args, **kwargs):
559 559 dict.pop(self, key, *args, **kwargs)
560 560 try:
561 561 self._list.remove(key)
562 562 except ValueError:
563 563 pass
564 564 def keys(self):
565 565 return self._list[:]
566 566 def iterkeys(self):
567 567 return self._list.__iter__()
568 568 def iteritems(self):
569 569 for k in self._list:
570 570 yield k, self[k]
571 571 def insert(self, index, key, val):
572 572 self._list.insert(index, key)
573 573 dict.__setitem__(self, key, val)
574 574 def __repr__(self):
575 575 if not self:
576 576 return '%s()' % self.__class__.__name__
577 577 return '%s(%r)' % (self.__class__.__name__, self.items())
578 578
579 579 class _lrucachenode(object):
580 580 """A node in a doubly linked list.
581 581
582 582 Holds a reference to nodes on either side as well as a key-value
583 583 pair for the dictionary entry.
584 584 """
585 585 __slots__ = (u'next', u'prev', u'key', u'value')
586 586
587 587 def __init__(self):
588 588 self.next = None
589 589 self.prev = None
590 590
591 591 self.key = _notset
592 592 self.value = None
593 593
594 594 def markempty(self):
595 595 """Mark the node as emptied."""
596 596 self.key = _notset
597 597
598 598 class lrucachedict(object):
599 599 """Dict that caches most recent accesses and sets.
600 600
601 601 The dict consists of an actual backing dict - indexed by original
602 602 key - and a doubly linked circular list defining the order of entries in
603 603 the cache.
604 604
605 605 The head node is the newest entry in the cache. If the cache is full,
606 606 we recycle head.prev and make it the new head. Cache accesses result in
607 607 the node being moved to before the existing head and being marked as the
608 608 new head node.
609 609 """
610 610 def __init__(self, max):
611 611 self._cache = {}
612 612
613 613 self._head = head = _lrucachenode()
614 614 head.prev = head
615 615 head.next = head
616 616 self._size = 1
617 617 self._capacity = max
618 618
619 619 def __len__(self):
620 620 return len(self._cache)
621 621
622 622 def __contains__(self, k):
623 623 return k in self._cache
624 624
625 625 def __iter__(self):
626 626 # We don't have to iterate in cache order, but why not.
627 627 n = self._head
628 628 for i in range(len(self._cache)):
629 629 yield n.key
630 630 n = n.next
631 631
632 632 def __getitem__(self, k):
633 633 node = self._cache[k]
634 634 self._movetohead(node)
635 635 return node.value
636 636
637 637 def __setitem__(self, k, v):
638 638 node = self._cache.get(k)
639 639 # Replace existing value and mark as newest.
640 640 if node is not None:
641 641 node.value = v
642 642 self._movetohead(node)
643 643 return
644 644
645 645 if self._size < self._capacity:
646 646 node = self._addcapacity()
647 647 else:
648 648 # Grab the last/oldest item.
649 649 node = self._head.prev
650 650
651 651 # At capacity. Kill the old entry.
652 652 if node.key is not _notset:
653 653 del self._cache[node.key]
654 654
655 655 node.key = k
656 656 node.value = v
657 657 self._cache[k] = node
658 658 # And mark it as newest entry. No need to adjust order since it
659 659 # is already self._head.prev.
660 660 self._head = node
661 661
662 662 def __delitem__(self, k):
663 663 node = self._cache.pop(k)
664 664 node.markempty()
665 665
666 666 # Temporarily mark as newest item before re-adjusting head to make
667 667 # this node the oldest item.
668 668 self._movetohead(node)
669 669 self._head = node.next
670 670
671 671 # Additional dict methods.
672 672
673 673 def get(self, k, default=None):
674 674 try:
675 675 return self._cache[k].value
676 676 except KeyError:
677 677 return default
678 678
679 679 def clear(self):
680 680 n = self._head
681 681 while n.key is not _notset:
682 682 n.markempty()
683 683 n = n.next
684 684
685 685 self._cache.clear()
686 686
687 687 def copy(self):
688 688 result = lrucachedict(self._capacity)
689 689 n = self._head.prev
690 690 # Iterate in oldest-to-newest order, so the copy has the right ordering
691 691 for i in range(len(self._cache)):
692 692 result[n.key] = n.value
693 693 n = n.prev
694 694 return result
695 695
696 696 def _movetohead(self, node):
697 697 """Mark a node as the newest, making it the new head.
698 698
699 699 When a node is accessed, it becomes the freshest entry in the LRU
700 700 list, which is denoted by self._head.
701 701
702 702 Visually, let's make ``N`` the new head node (* denotes head):
703 703
704 704 previous/oldest <-> head <-> next/next newest
705 705
706 706 ----<->--- A* ---<->-----
707 707 | |
708 708 E <-> D <-> N <-> C <-> B
709 709
710 710 To:
711 711
712 712 ----<->--- N* ---<->-----
713 713 | |
714 714 E <-> D <-> C <-> B <-> A
715 715
716 716 This requires the following moves:
717 717
718 718 C.next = D (node.prev.next = node.next)
719 719 D.prev = C (node.next.prev = node.prev)
720 720 E.next = N (head.prev.next = node)
721 721 N.prev = E (node.prev = head.prev)
722 722 N.next = A (node.next = head)
723 723 A.prev = N (head.prev = node)
724 724 """
725 725 head = self._head
726 726 # C.next = D
727 727 node.prev.next = node.next
728 728 # D.prev = C
729 729 node.next.prev = node.prev
730 730 # N.prev = E
731 731 node.prev = head.prev
732 732 # N.next = A
733 733 # It is tempting to do just "head" here, however if node is
734 734 # adjacent to head, this will do bad things.
735 735 node.next = head.prev.next
736 736 # E.next = N
737 737 node.next.prev = node
738 738 # A.prev = N
739 739 node.prev.next = node
740 740
741 741 self._head = node
742 742
743 743 def _addcapacity(self):
744 744 """Add a node to the circular linked list.
745 745
746 746 The new node is inserted before the head node.
747 747 """
748 748 head = self._head
749 749 node = _lrucachenode()
750 750 head.prev.next = node
751 751 node.prev = head.prev
752 752 node.next = head
753 753 head.prev = node
754 754 self._size += 1
755 755 return node
756 756
757 757 def lrucachefunc(func):
758 758 '''cache most recent results of function calls'''
759 759 cache = {}
760 760 order = collections.deque()
761 761 if func.__code__.co_argcount == 1:
762 762 def f(arg):
763 763 if arg not in cache:
764 764 if len(cache) > 20:
765 765 del cache[order.popleft()]
766 766 cache[arg] = func(arg)
767 767 else:
768 768 order.remove(arg)
769 769 order.append(arg)
770 770 return cache[arg]
771 771 else:
772 772 def f(*args):
773 773 if args not in cache:
774 774 if len(cache) > 20:
775 775 del cache[order.popleft()]
776 776 cache[args] = func(*args)
777 777 else:
778 778 order.remove(args)
779 779 order.append(args)
780 780 return cache[args]
781 781
782 782 return f
783 783
784 784 class propertycache(object):
785 785 def __init__(self, func):
786 786 self.func = func
787 787 self.name = func.__name__
788 788 def __get__(self, obj, type=None):
789 789 result = self.func(obj)
790 790 self.cachevalue(obj, result)
791 791 return result
792 792
793 793 def cachevalue(self, obj, value):
794 794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
795 795 obj.__dict__[self.name] = value
796 796
797 797 def pipefilter(s, cmd):
798 798 '''filter string S through command CMD, returning its output'''
799 799 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
800 800 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
801 801 pout, perr = p.communicate(s)
802 802 return pout
803 803
804 804 def tempfilter(s, cmd):
805 805 '''filter string S through a pair of temporary files with CMD.
806 806 CMD is used as a template to create the real command to be run,
807 807 with the strings INFILE and OUTFILE replaced by the real names of
808 808 the temporary files generated.'''
809 809 inname, outname = None, None
810 810 try:
811 811 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
812 812 fp = os.fdopen(infd, pycompat.sysstr('wb'))
813 813 fp.write(s)
814 814 fp.close()
815 815 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
816 816 os.close(outfd)
817 817 cmd = cmd.replace('INFILE', inname)
818 818 cmd = cmd.replace('OUTFILE', outname)
819 819 code = os.system(cmd)
820 820 if pycompat.sysplatform == 'OpenVMS' and code & 1:
821 821 code = 0
822 822 if code:
823 823 raise Abort(_("command '%s' failed: %s") %
824 824 (cmd, explainexit(code)))
825 825 return readfile(outname)
826 826 finally:
827 827 try:
828 828 if inname:
829 829 os.unlink(inname)
830 830 except OSError:
831 831 pass
832 832 try:
833 833 if outname:
834 834 os.unlink(outname)
835 835 except OSError:
836 836 pass
837 837
838 838 filtertable = {
839 839 'tempfile:': tempfilter,
840 840 'pipe:': pipefilter,
841 841 }
842 842
843 843 def filter(s, cmd):
844 844 "filter a string through a command that transforms its input to its output"
845 845 for name, fn in filtertable.iteritems():
846 846 if cmd.startswith(name):
847 847 return fn(s, cmd[len(name):].lstrip())
848 848 return pipefilter(s, cmd)
849 849
850 850 def binary(s):
851 851 """return true if a string is binary data"""
852 852 return bool(s and '\0' in s)
853 853
854 854 def increasingchunks(source, min=1024, max=65536):
855 855 '''return no less than min bytes per chunk while data remains,
856 856 doubling min after each chunk until it reaches max'''
857 857 def log2(x):
858 858 if not x:
859 859 return 0
860 860 i = 0
861 861 while x:
862 862 x >>= 1
863 863 i += 1
864 864 return i - 1
865 865
866 866 buf = []
867 867 blen = 0
868 868 for chunk in source:
869 869 buf.append(chunk)
870 870 blen += len(chunk)
871 871 if blen >= min:
872 872 if min < max:
873 873 min = min << 1
874 874 nmin = 1 << log2(blen)
875 875 if nmin > min:
876 876 min = nmin
877 877 if min > max:
878 878 min = max
879 879 yield ''.join(buf)
880 880 blen = 0
881 881 buf = []
882 882 if buf:
883 883 yield ''.join(buf)
884 884
885 885 Abort = error.Abort
886 886
887 887 def always(fn):
888 888 return True
889 889
890 890 def never(fn):
891 891 return False
892 892
893 893 def nogc(func):
894 894 """disable garbage collector
895 895
896 896 Python's garbage collector triggers a GC each time a certain number of
897 897 container objects (the number being defined by gc.get_threshold()) are
898 898 allocated even when marked not to be tracked by the collector. Tracking has
899 899 no effect on when GCs are triggered, only on what objects the GC looks
900 900 into. As a workaround, disable GC while building complex (huge)
901 901 containers.
902 902
903 903 This garbage collector issue have been fixed in 2.7.
904 904 """
905 905 if sys.version_info >= (2, 7):
906 906 return func
907 907 def wrapper(*args, **kwargs):
908 908 gcenabled = gc.isenabled()
909 909 gc.disable()
910 910 try:
911 911 return func(*args, **kwargs)
912 912 finally:
913 913 if gcenabled:
914 914 gc.enable()
915 915 return wrapper
916 916
917 917 def pathto(root, n1, n2):
918 918 '''return the relative path from one place to another.
919 919 root should use os.sep to separate directories
920 920 n1 should use os.sep to separate directories
921 921 n2 should use "/" to separate directories
922 922 returns an os.sep-separated path.
923 923
924 924 If n1 is a relative path, it's assumed it's
925 925 relative to root.
926 926 n2 should always be relative to root.
927 927 '''
928 928 if not n1:
929 929 return localpath(n2)
930 930 if os.path.isabs(n1):
931 931 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
932 932 return os.path.join(root, localpath(n2))
933 933 n2 = '/'.join((pconvert(root), n2))
934 934 a, b = splitpath(n1), n2.split('/')
935 935 a.reverse()
936 936 b.reverse()
937 937 while a and b and a[-1] == b[-1]:
938 938 a.pop()
939 939 b.pop()
940 940 b.reverse()
941 941 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
942 942
943 943 def mainfrozen():
944 944 """return True if we are a frozen executable.
945 945
946 946 The code supports py2exe (most common, Windows only) and tools/freeze
947 947 (portable, not much used).
948 948 """
949 949 return (safehasattr(sys, "frozen") or # new py2exe
950 950 safehasattr(sys, "importers") or # old py2exe
951 951 imp.is_frozen(u"__main__")) # tools/freeze
952 952
953 953 # the location of data files matching the source code
954 954 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
955 955 # executable version (py2exe) doesn't support __file__
956 956 datapath = os.path.dirname(pycompat.sysexecutable)
957 957 else:
958 958 datapath = os.path.dirname(pycompat.fsencode(__file__))
959 959
960 960 i18n.setdatapath(datapath)
961 961
962 962 _hgexecutable = None
963 963
964 964 def hgexecutable():
965 965 """return location of the 'hg' executable.
966 966
967 967 Defaults to $HG or 'hg' in the search path.
968 968 """
969 969 if _hgexecutable is None:
970 970 hg = encoding.environ.get('HG')
971 971 mainmod = sys.modules[pycompat.sysstr('__main__')]
972 972 if hg:
973 973 _sethgexecutable(hg)
974 974 elif mainfrozen():
975 975 if getattr(sys, 'frozen', None) == 'macosx_app':
976 976 # Env variable set by py2app
977 977 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
978 978 else:
979 979 _sethgexecutable(pycompat.sysexecutable)
980 980 elif (os.path.basename(
981 981 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
982 982 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
983 983 else:
984 984 exe = findexe('hg') or os.path.basename(sys.argv[0])
985 985 _sethgexecutable(exe)
986 986 return _hgexecutable
987 987
988 988 def _sethgexecutable(path):
989 989 """set location of the 'hg' executable"""
990 990 global _hgexecutable
991 991 _hgexecutable = path
992 992
993 993 def _isstdout(f):
994 994 fileno = getattr(f, 'fileno', None)
995 995 return fileno and fileno() == sys.__stdout__.fileno()
996 996
997 997 def shellenviron(environ=None):
998 998 """return environ with optional override, useful for shelling out"""
999 999 def py2shell(val):
1000 1000 'convert python object into string that is useful to shell'
1001 1001 if val is None or val is False:
1002 1002 return '0'
1003 1003 if val is True:
1004 1004 return '1'
1005 1005 return str(val)
1006 1006 env = dict(encoding.environ)
1007 1007 if environ:
1008 1008 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1009 1009 env['HG'] = hgexecutable()
1010 1010 return env
1011 1011
1012 1012 def system(cmd, environ=None, cwd=None, out=None):
1013 1013 '''enhanced shell command execution.
1014 1014 run with environment maybe modified, maybe in different dir.
1015 1015
1016 1016 if out is specified, it is assumed to be a file-like object that has a
1017 1017 write() method. stdout and stderr will be redirected to out.'''
1018 1018 try:
1019 1019 stdout.flush()
1020 1020 except Exception:
1021 1021 pass
1022 1022 cmd = quotecommand(cmd)
1023 1023 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1024 1024 and sys.version_info[1] < 7):
1025 1025 # subprocess kludge to work around issues in half-baked Python
1026 1026 # ports, notably bichued/python:
1027 1027 if not cwd is None:
1028 1028 os.chdir(cwd)
1029 1029 rc = os.system(cmd)
1030 1030 else:
1031 1031 env = shellenviron(environ)
1032 1032 if out is None or _isstdout(out):
1033 1033 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1034 1034 env=env, cwd=cwd)
1035 1035 else:
1036 1036 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1037 1037 env=env, cwd=cwd, stdout=subprocess.PIPE,
1038 1038 stderr=subprocess.STDOUT)
1039 1039 for line in iter(proc.stdout.readline, ''):
1040 1040 out.write(line)
1041 1041 proc.wait()
1042 1042 rc = proc.returncode
1043 1043 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1044 1044 rc = 0
1045 1045 return rc
1046 1046
1047 1047 def checksignature(func):
1048 1048 '''wrap a function with code to check for calling errors'''
1049 1049 def check(*args, **kwargs):
1050 1050 try:
1051 1051 return func(*args, **kwargs)
1052 1052 except TypeError:
1053 1053 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1054 1054 raise error.SignatureError
1055 1055 raise
1056 1056
1057 1057 return check
1058 1058
1059 1059 # Hardlinks are problematic on CIFS, do not allow hardlinks
1060 1060 # until we find a way to work around it cleanly (issue4546).
1061 1061 # This is a variable so extensions can opt-in to using them.
1062 1062 allowhardlinks = False
1063 1063
1064 1064 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1065 1065 '''copy a file, preserving mode and optionally other stat info like
1066 1066 atime/mtime
1067 1067
1068 1068 checkambig argument is used with filestat, and is useful only if
1069 1069 destination file is guarded by any lock (e.g. repo.lock or
1070 1070 repo.wlock).
1071 1071
1072 1072 copystat and checkambig should be exclusive.
1073 1073 '''
1074 1074 assert not (copystat and checkambig)
1075 1075 oldstat = None
1076 1076 if os.path.lexists(dest):
1077 1077 if checkambig:
1078 1078 oldstat = checkambig and filestat(dest)
1079 1079 unlink(dest)
1080 1080 if allowhardlinks and hardlink:
1081 1081 try:
1082 1082 oslink(src, dest)
1083 1083 return
1084 1084 except (IOError, OSError):
1085 1085 pass # fall back to normal copy
1086 1086 if os.path.islink(src):
1087 1087 os.symlink(os.readlink(src), dest)
1088 1088 # copytime is ignored for symlinks, but in general copytime isn't needed
1089 1089 # for them anyway
1090 1090 else:
1091 1091 try:
1092 1092 shutil.copyfile(src, dest)
1093 1093 if copystat:
1094 1094 # copystat also copies mode
1095 1095 shutil.copystat(src, dest)
1096 1096 else:
1097 1097 shutil.copymode(src, dest)
1098 1098 if oldstat and oldstat.stat:
1099 1099 newstat = filestat(dest)
1100 1100 if newstat.isambig(oldstat):
1101 1101 # stat of copied file is ambiguous to original one
1102 1102 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1103 1103 os.utime(dest, (advanced, advanced))
1104 1104 except shutil.Error as inst:
1105 1105 raise Abort(str(inst))
1106 1106
1107 1107 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1108 1108 """Copy a directory tree using hardlinks if possible."""
1109 1109 num = 0
1110 1110
1111 1111 if hardlink is None:
1112 1112 hardlink = (os.stat(src).st_dev ==
1113 1113 os.stat(os.path.dirname(dst)).st_dev)
1114 1114 if hardlink:
1115 1115 topic = _('linking')
1116 1116 else:
1117 1117 topic = _('copying')
1118 1118
1119 1119 if os.path.isdir(src):
1120 1120 os.mkdir(dst)
1121 1121 for name, kind in osutil.listdir(src):
1122 1122 srcname = os.path.join(src, name)
1123 1123 dstname = os.path.join(dst, name)
1124 1124 def nprog(t, pos):
1125 1125 if pos is not None:
1126 1126 return progress(t, pos + num)
1127 1127 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1128 1128 num += n
1129 1129 else:
1130 1130 if hardlink:
1131 1131 try:
1132 1132 oslink(src, dst)
1133 1133 except (IOError, OSError):
1134 1134 hardlink = False
1135 1135 shutil.copy(src, dst)
1136 1136 else:
1137 1137 shutil.copy(src, dst)
1138 1138 num += 1
1139 1139 progress(topic, num)
1140 1140 progress(topic, None)
1141 1141
1142 1142 return hardlink, num
1143 1143
1144 1144 _winreservednames = '''con prn aux nul
1145 1145 com1 com2 com3 com4 com5 com6 com7 com8 com9
1146 1146 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1147 1147 _winreservedchars = ':*?"<>|'
1148 1148 def checkwinfilename(path):
1149 1149 r'''Check that the base-relative path is a valid filename on Windows.
1150 1150 Returns None if the path is ok, or a UI string describing the problem.
1151 1151
1152 1152 >>> checkwinfilename("just/a/normal/path")
1153 1153 >>> checkwinfilename("foo/bar/con.xml")
1154 1154 "filename contains 'con', which is reserved on Windows"
1155 1155 >>> checkwinfilename("foo/con.xml/bar")
1156 1156 "filename contains 'con', which is reserved on Windows"
1157 1157 >>> checkwinfilename("foo/bar/xml.con")
1158 1158 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1159 1159 "filename contains 'AUX', which is reserved on Windows"
1160 1160 >>> checkwinfilename("foo/bar/bla:.txt")
1161 1161 "filename contains ':', which is reserved on Windows"
1162 1162 >>> checkwinfilename("foo/bar/b\07la.txt")
1163 1163 "filename contains '\\x07', which is invalid on Windows"
1164 1164 >>> checkwinfilename("foo/bar/bla ")
1165 1165 "filename ends with ' ', which is not allowed on Windows"
1166 1166 >>> checkwinfilename("../bar")
1167 1167 >>> checkwinfilename("foo\\")
1168 1168 "filename ends with '\\', which is invalid on Windows"
1169 1169 >>> checkwinfilename("foo\\/bar")
1170 1170 "directory name ends with '\\', which is invalid on Windows"
1171 1171 '''
1172 1172 if path.endswith('\\'):
1173 1173 return _("filename ends with '\\', which is invalid on Windows")
1174 1174 if '\\/' in path:
1175 1175 return _("directory name ends with '\\', which is invalid on Windows")
1176 1176 for n in path.replace('\\', '/').split('/'):
1177 1177 if not n:
1178 1178 continue
1179 1179 for c in pycompat.bytestr(n):
1180 1180 if c in _winreservedchars:
1181 1181 return _("filename contains '%s', which is reserved "
1182 1182 "on Windows") % c
1183 1183 if ord(c) <= 31:
1184 1184 return _("filename contains %r, which is invalid "
1185 1185 "on Windows") % c
1186 1186 base = n.split('.')[0]
1187 1187 if base and base.lower() in _winreservednames:
1188 1188 return _("filename contains '%s', which is reserved "
1189 1189 "on Windows") % base
1190 1190 t = n[-1]
1191 1191 if t in '. ' and n not in '..':
1192 1192 return _("filename ends with '%s', which is not allowed "
1193 1193 "on Windows") % t
1194 1194
1195 1195 if pycompat.osname == 'nt':
1196 1196 checkosfilename = checkwinfilename
1197 1197 timer = time.clock
1198 1198 else:
1199 1199 checkosfilename = platform.checkosfilename
1200 1200 timer = time.time
1201 1201
1202 1202 if safehasattr(time, "perf_counter"):
1203 1203 timer = time.perf_counter
1204 1204
1205 1205 def makelock(info, pathname):
1206 1206 try:
1207 1207 return os.symlink(info, pathname)
1208 1208 except OSError as why:
1209 1209 if why.errno == errno.EEXIST:
1210 1210 raise
1211 1211 except AttributeError: # no symlink in os
1212 1212 pass
1213 1213
1214 1214 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1215 1215 os.write(ld, info)
1216 1216 os.close(ld)
1217 1217
1218 1218 def readlock(pathname):
1219 1219 try:
1220 1220 return os.readlink(pathname)
1221 1221 except OSError as why:
1222 1222 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1223 1223 raise
1224 1224 except AttributeError: # no symlink in os
1225 1225 pass
1226 1226 fp = posixfile(pathname)
1227 1227 r = fp.read()
1228 1228 fp.close()
1229 1229 return r
1230 1230
1231 1231 def fstat(fp):
1232 1232 '''stat file object that may not have fileno method.'''
1233 1233 try:
1234 1234 return os.fstat(fp.fileno())
1235 1235 except AttributeError:
1236 1236 return os.stat(fp.name)
1237 1237
1238 1238 # File system features
1239 1239
1240 1240 def fscasesensitive(path):
1241 1241 """
1242 1242 Return true if the given path is on a case-sensitive filesystem
1243 1243
1244 1244 Requires a path (like /foo/.hg) ending with a foldable final
1245 1245 directory component.
1246 1246 """
1247 1247 s1 = os.lstat(path)
1248 1248 d, b = os.path.split(path)
1249 1249 b2 = b.upper()
1250 1250 if b == b2:
1251 1251 b2 = b.lower()
1252 1252 if b == b2:
1253 1253 return True # no evidence against case sensitivity
1254 1254 p2 = os.path.join(d, b2)
1255 1255 try:
1256 1256 s2 = os.lstat(p2)
1257 1257 if s2 == s1:
1258 1258 return False
1259 1259 return True
1260 1260 except OSError:
1261 1261 return True
1262 1262
1263 1263 try:
1264 1264 import re2
1265 1265 _re2 = None
1266 1266 except ImportError:
1267 1267 _re2 = False
1268 1268
1269 1269 class _re(object):
1270 1270 def _checkre2(self):
1271 1271 global _re2
1272 1272 try:
1273 1273 # check if match works, see issue3964
1274 1274 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1275 1275 except ImportError:
1276 1276 _re2 = False
1277 1277
1278 1278 def compile(self, pat, flags=0):
1279 1279 '''Compile a regular expression, using re2 if possible
1280 1280
1281 1281 For best performance, use only re2-compatible regexp features. The
1282 1282 only flags from the re module that are re2-compatible are
1283 1283 IGNORECASE and MULTILINE.'''
1284 1284 if _re2 is None:
1285 1285 self._checkre2()
1286 1286 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1287 1287 if flags & remod.IGNORECASE:
1288 1288 pat = '(?i)' + pat
1289 1289 if flags & remod.MULTILINE:
1290 1290 pat = '(?m)' + pat
1291 1291 try:
1292 1292 return re2.compile(pat)
1293 1293 except re2.error:
1294 1294 pass
1295 1295 return remod.compile(pat, flags)
1296 1296
1297 1297 @propertycache
1298 1298 def escape(self):
1299 1299 '''Return the version of escape corresponding to self.compile.
1300 1300
1301 1301 This is imperfect because whether re2 or re is used for a particular
1302 1302 function depends on the flags, etc, but it's the best we can do.
1303 1303 '''
1304 1304 global _re2
1305 1305 if _re2 is None:
1306 1306 self._checkre2()
1307 1307 if _re2:
1308 1308 return re2.escape
1309 1309 else:
1310 1310 return remod.escape
1311 1311
1312 1312 re = _re()
1313 1313
1314 1314 _fspathcache = {}
1315 1315 def fspath(name, root):
1316 1316 '''Get name in the case stored in the filesystem
1317 1317
1318 1318 The name should be relative to root, and be normcase-ed for efficiency.
1319 1319
1320 1320 Note that this function is unnecessary, and should not be
1321 1321 called, for case-sensitive filesystems (simply because it's expensive).
1322 1322
1323 1323 The root should be normcase-ed, too.
1324 1324 '''
1325 1325 def _makefspathcacheentry(dir):
1326 1326 return dict((normcase(n), n) for n in os.listdir(dir))
1327 1327
1328 1328 seps = pycompat.ossep
1329 1329 if pycompat.osaltsep:
1330 1330 seps = seps + pycompat.osaltsep
1331 1331 # Protect backslashes. This gets silly very quickly.
1332 1332 seps.replace('\\','\\\\')
1333 1333 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1334 1334 dir = os.path.normpath(root)
1335 1335 result = []
1336 1336 for part, sep in pattern.findall(name):
1337 1337 if sep:
1338 1338 result.append(sep)
1339 1339 continue
1340 1340
1341 1341 if dir not in _fspathcache:
1342 1342 _fspathcache[dir] = _makefspathcacheentry(dir)
1343 1343 contents = _fspathcache[dir]
1344 1344
1345 1345 found = contents.get(part)
1346 1346 if not found:
1347 1347 # retry "once per directory" per "dirstate.walk" which
1348 1348 # may take place for each patches of "hg qpush", for example
1349 1349 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1350 1350 found = contents.get(part)
1351 1351
1352 1352 result.append(found or part)
1353 1353 dir = os.path.join(dir, part)
1354 1354
1355 1355 return ''.join(result)
1356 1356
1357 1357 def checknlink(testfile):
1358 1358 '''check whether hardlink count reporting works properly'''
1359 1359
1360 1360 # testfile may be open, so we need a separate file for checking to
1361 1361 # work around issue2543 (or testfile may get lost on Samba shares)
1362 1362 f1 = testfile + ".hgtmp1"
1363 1363 if os.path.lexists(f1):
1364 1364 return False
1365 1365 try:
1366 1366 posixfile(f1, 'w').close()
1367 1367 except IOError:
1368 1368 try:
1369 1369 os.unlink(f1)
1370 1370 except OSError:
1371 1371 pass
1372 1372 return False
1373 1373
1374 1374 f2 = testfile + ".hgtmp2"
1375 1375 fd = None
1376 1376 try:
1377 1377 oslink(f1, f2)
1378 1378 # nlinks() may behave differently for files on Windows shares if
1379 1379 # the file is open.
1380 1380 fd = posixfile(f2)
1381 1381 return nlinks(f2) > 1
1382 1382 except OSError:
1383 1383 return False
1384 1384 finally:
1385 1385 if fd is not None:
1386 1386 fd.close()
1387 1387 for f in (f1, f2):
1388 1388 try:
1389 1389 os.unlink(f)
1390 1390 except OSError:
1391 1391 pass
1392 1392
1393 1393 def endswithsep(path):
1394 1394 '''Check path ends with os.sep or os.altsep.'''
1395 1395 return (path.endswith(pycompat.ossep)
1396 1396 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1397 1397
1398 1398 def splitpath(path):
1399 1399 '''Split path by os.sep.
1400 1400 Note that this function does not use os.altsep because this is
1401 1401 an alternative of simple "xxx.split(os.sep)".
1402 1402 It is recommended to use os.path.normpath() before using this
1403 1403 function if need.'''
1404 1404 return path.split(pycompat.ossep)
1405 1405
1406 1406 def gui():
1407 1407 '''Are we running in a GUI?'''
1408 1408 if pycompat.sysplatform == 'darwin':
1409 1409 if 'SSH_CONNECTION' in encoding.environ:
1410 1410 # handle SSH access to a box where the user is logged in
1411 1411 return False
1412 1412 elif getattr(osutil, 'isgui', None):
1413 1413 # check if a CoreGraphics session is available
1414 1414 return osutil.isgui()
1415 1415 else:
1416 1416 # pure build; use a safe default
1417 1417 return True
1418 1418 else:
1419 1419 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1420 1420
1421 1421 def mktempcopy(name, emptyok=False, createmode=None):
1422 1422 """Create a temporary file with the same contents from name
1423 1423
1424 1424 The permission bits are copied from the original file.
1425 1425
1426 1426 If the temporary file is going to be truncated immediately, you
1427 1427 can use emptyok=True as an optimization.
1428 1428
1429 1429 Returns the name of the temporary file.
1430 1430 """
1431 1431 d, fn = os.path.split(name)
1432 1432 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1433 1433 os.close(fd)
1434 1434 # Temporary files are created with mode 0600, which is usually not
1435 1435 # what we want. If the original file already exists, just copy
1436 1436 # its mode. Otherwise, manually obey umask.
1437 1437 copymode(name, temp, createmode)
1438 1438 if emptyok:
1439 1439 return temp
1440 1440 try:
1441 1441 try:
1442 1442 ifp = posixfile(name, "rb")
1443 1443 except IOError as inst:
1444 1444 if inst.errno == errno.ENOENT:
1445 1445 return temp
1446 1446 if not getattr(inst, 'filename', None):
1447 1447 inst.filename = name
1448 1448 raise
1449 1449 ofp = posixfile(temp, "wb")
1450 1450 for chunk in filechunkiter(ifp):
1451 1451 ofp.write(chunk)
1452 1452 ifp.close()
1453 1453 ofp.close()
1454 1454 except: # re-raises
1455 1455 try: os.unlink(temp)
1456 1456 except OSError: pass
1457 1457 raise
1458 1458 return temp
1459 1459
1460 1460 class filestat(object):
1461 1461 """help to exactly detect change of a file
1462 1462
1463 1463 'stat' attribute is result of 'os.stat()' if specified 'path'
1464 1464 exists. Otherwise, it is None. This can avoid preparative
1465 1465 'exists()' examination on client side of this class.
1466 1466 """
1467 1467 def __init__(self, path):
1468 1468 try:
1469 1469 self.stat = os.stat(path)
1470 1470 except OSError as err:
1471 1471 if err.errno != errno.ENOENT:
1472 1472 raise
1473 1473 self.stat = None
1474 1474
1475 1475 __hash__ = object.__hash__
1476 1476
1477 1477 def __eq__(self, old):
1478 1478 try:
1479 1479 # if ambiguity between stat of new and old file is
1480 1480 # avoided, comparison of size, ctime and mtime is enough
1481 1481 # to exactly detect change of a file regardless of platform
1482 1482 return (self.stat.st_size == old.stat.st_size and
1483 1483 self.stat.st_ctime == old.stat.st_ctime and
1484 1484 self.stat.st_mtime == old.stat.st_mtime)
1485 1485 except AttributeError:
1486 1486 return False
1487 1487
1488 1488 def isambig(self, old):
1489 1489 """Examine whether new (= self) stat is ambiguous against old one
1490 1490
1491 1491 "S[N]" below means stat of a file at N-th change:
1492 1492
1493 1493 - S[n-1].ctime < S[n].ctime: can detect change of a file
1494 1494 - S[n-1].ctime == S[n].ctime
1495 1495 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1496 1496 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1497 1497 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1498 1498 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1499 1499
1500 1500 Case (*2) above means that a file was changed twice or more at
1501 1501 same time in sec (= S[n-1].ctime), and comparison of timestamp
1502 1502 is ambiguous.
1503 1503
1504 1504 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1505 1505 timestamp is ambiguous".
1506 1506
1507 1507 But advancing mtime only in case (*2) doesn't work as
1508 1508 expected, because naturally advanced S[n].mtime in case (*1)
1509 1509 might be equal to manually advanced S[n-1 or earlier].mtime.
1510 1510
1511 1511 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1512 1512 treated as ambiguous regardless of mtime, to avoid overlooking
1513 1513 by confliction between such mtime.
1514 1514
1515 1515 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1516 1516 S[n].mtime", even if size of a file isn't changed.
1517 1517 """
1518 1518 try:
1519 1519 return (self.stat.st_ctime == old.stat.st_ctime)
1520 1520 except AttributeError:
1521 1521 return False
1522 1522
1523 1523 def avoidambig(self, path, old):
1524 1524 """Change file stat of specified path to avoid ambiguity
1525 1525
1526 1526 'old' should be previous filestat of 'path'.
1527 1527
1528 1528 This skips avoiding ambiguity, if a process doesn't have
1529 1529 appropriate privileges for 'path'.
1530 1530 """
1531 1531 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1532 1532 try:
1533 1533 os.utime(path, (advanced, advanced))
1534 1534 except OSError as inst:
1535 1535 if inst.errno == errno.EPERM:
1536 1536 # utime() on the file created by another user causes EPERM,
1537 1537 # if a process doesn't have appropriate privileges
1538 1538 return
1539 1539 raise
1540 1540
1541 1541 def __ne__(self, other):
1542 1542 return not self == other
1543 1543
1544 1544 class atomictempfile(object):
1545 1545 '''writable file object that atomically updates a file
1546 1546
1547 1547 All writes will go to a temporary copy of the original file. Call
1548 1548 close() when you are done writing, and atomictempfile will rename
1549 1549 the temporary copy to the original name, making the changes
1550 1550 visible. If the object is destroyed without being closed, all your
1551 1551 writes are discarded.
1552 1552
1553 1553 checkambig argument of constructor is used with filestat, and is
1554 1554 useful only if target file is guarded by any lock (e.g. repo.lock
1555 1555 or repo.wlock).
1556 1556 '''
1557 1557 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1558 1558 self.__name = name # permanent name
1559 1559 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1560 1560 createmode=createmode)
1561 1561 self._fp = posixfile(self._tempname, mode)
1562 1562 self._checkambig = checkambig
1563 1563
1564 1564 # delegated methods
1565 1565 self.read = self._fp.read
1566 1566 self.write = self._fp.write
1567 1567 self.seek = self._fp.seek
1568 1568 self.tell = self._fp.tell
1569 1569 self.fileno = self._fp.fileno
1570 1570
1571 1571 def close(self):
1572 1572 if not self._fp.closed:
1573 1573 self._fp.close()
1574 1574 filename = localpath(self.__name)
1575 1575 oldstat = self._checkambig and filestat(filename)
1576 1576 if oldstat and oldstat.stat:
1577 1577 rename(self._tempname, filename)
1578 1578 newstat = filestat(filename)
1579 1579 if newstat.isambig(oldstat):
1580 1580 # stat of changed file is ambiguous to original one
1581 1581 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1582 1582 os.utime(filename, (advanced, advanced))
1583 1583 else:
1584 1584 rename(self._tempname, filename)
1585 1585
1586 1586 def discard(self):
1587 1587 if not self._fp.closed:
1588 1588 try:
1589 1589 os.unlink(self._tempname)
1590 1590 except OSError:
1591 1591 pass
1592 1592 self._fp.close()
1593 1593
1594 1594 def __del__(self):
1595 1595 if safehasattr(self, '_fp'): # constructor actually did something
1596 1596 self.discard()
1597 1597
1598 1598 def __enter__(self):
1599 1599 return self
1600 1600
1601 1601 def __exit__(self, exctype, excvalue, traceback):
1602 1602 if exctype is not None:
1603 1603 self.discard()
1604 1604 else:
1605 1605 self.close()
1606 1606
1607 1607 def unlinkpath(f, ignoremissing=False):
1608 1608 """unlink and remove the directory if it is empty"""
1609 1609 try:
1610 1610 unlink(f)
1611 1611 except OSError as e:
1612 1612 if not (ignoremissing and e.errno == errno.ENOENT):
1613 1613 raise
1614 1614 # try removing directories that might now be empty
1615 1615 try:
1616 1616 removedirs(os.path.dirname(f))
1617 1617 except OSError:
1618 1618 pass
1619 1619
1620 def tryunlink(f):
1621 """Attempt to remove a file, ignoring ENOENT errors."""
1622 try:
1623 unlink(f)
1624 except OSError as e:
1625 if e.errno != errno.ENOENT:
1626 raise
1627
1620 1628 def makedirs(name, mode=None, notindexed=False):
1621 1629 """recursive directory creation with parent mode inheritance
1622 1630
1623 1631 Newly created directories are marked as "not to be indexed by
1624 1632 the content indexing service", if ``notindexed`` is specified
1625 1633 for "write" mode access.
1626 1634 """
1627 1635 try:
1628 1636 makedir(name, notindexed)
1629 1637 except OSError as err:
1630 1638 if err.errno == errno.EEXIST:
1631 1639 return
1632 1640 if err.errno != errno.ENOENT or not name:
1633 1641 raise
1634 1642 parent = os.path.dirname(os.path.abspath(name))
1635 1643 if parent == name:
1636 1644 raise
1637 1645 makedirs(parent, mode, notindexed)
1638 1646 try:
1639 1647 makedir(name, notindexed)
1640 1648 except OSError as err:
1641 1649 # Catch EEXIST to handle races
1642 1650 if err.errno == errno.EEXIST:
1643 1651 return
1644 1652 raise
1645 1653 if mode is not None:
1646 1654 os.chmod(name, mode)
1647 1655
1648 1656 def readfile(path):
1649 1657 with open(path, 'rb') as fp:
1650 1658 return fp.read()
1651 1659
1652 1660 def writefile(path, text):
1653 1661 with open(path, 'wb') as fp:
1654 1662 fp.write(text)
1655 1663
1656 1664 def appendfile(path, text):
1657 1665 with open(path, 'ab') as fp:
1658 1666 fp.write(text)
1659 1667
1660 1668 class chunkbuffer(object):
1661 1669 """Allow arbitrary sized chunks of data to be efficiently read from an
1662 1670 iterator over chunks of arbitrary size."""
1663 1671
1664 1672 def __init__(self, in_iter):
1665 1673 """in_iter is the iterator that's iterating over the input chunks.
1666 1674 targetsize is how big a buffer to try to maintain."""
1667 1675 def splitbig(chunks):
1668 1676 for chunk in chunks:
1669 1677 if len(chunk) > 2**20:
1670 1678 pos = 0
1671 1679 while pos < len(chunk):
1672 1680 end = pos + 2 ** 18
1673 1681 yield chunk[pos:end]
1674 1682 pos = end
1675 1683 else:
1676 1684 yield chunk
1677 1685 self.iter = splitbig(in_iter)
1678 1686 self._queue = collections.deque()
1679 1687 self._chunkoffset = 0
1680 1688
1681 1689 def read(self, l=None):
1682 1690 """Read L bytes of data from the iterator of chunks of data.
1683 1691 Returns less than L bytes if the iterator runs dry.
1684 1692
1685 1693 If size parameter is omitted, read everything"""
1686 1694 if l is None:
1687 1695 return ''.join(self.iter)
1688 1696
1689 1697 left = l
1690 1698 buf = []
1691 1699 queue = self._queue
1692 1700 while left > 0:
1693 1701 # refill the queue
1694 1702 if not queue:
1695 1703 target = 2**18
1696 1704 for chunk in self.iter:
1697 1705 queue.append(chunk)
1698 1706 target -= len(chunk)
1699 1707 if target <= 0:
1700 1708 break
1701 1709 if not queue:
1702 1710 break
1703 1711
1704 1712 # The easy way to do this would be to queue.popleft(), modify the
1705 1713 # chunk (if necessary), then queue.appendleft(). However, for cases
1706 1714 # where we read partial chunk content, this incurs 2 dequeue
1707 1715 # mutations and creates a new str for the remaining chunk in the
1708 1716 # queue. Our code below avoids this overhead.
1709 1717
1710 1718 chunk = queue[0]
1711 1719 chunkl = len(chunk)
1712 1720 offset = self._chunkoffset
1713 1721
1714 1722 # Use full chunk.
1715 1723 if offset == 0 and left >= chunkl:
1716 1724 left -= chunkl
1717 1725 queue.popleft()
1718 1726 buf.append(chunk)
1719 1727 # self._chunkoffset remains at 0.
1720 1728 continue
1721 1729
1722 1730 chunkremaining = chunkl - offset
1723 1731
1724 1732 # Use all of unconsumed part of chunk.
1725 1733 if left >= chunkremaining:
1726 1734 left -= chunkremaining
1727 1735 queue.popleft()
1728 1736 # offset == 0 is enabled by block above, so this won't merely
1729 1737 # copy via ``chunk[0:]``.
1730 1738 buf.append(chunk[offset:])
1731 1739 self._chunkoffset = 0
1732 1740
1733 1741 # Partial chunk needed.
1734 1742 else:
1735 1743 buf.append(chunk[offset:offset + left])
1736 1744 self._chunkoffset += left
1737 1745 left -= chunkremaining
1738 1746
1739 1747 return ''.join(buf)
1740 1748
1741 1749 def filechunkiter(f, size=131072, limit=None):
1742 1750 """Create a generator that produces the data in the file size
1743 1751 (default 131072) bytes at a time, up to optional limit (default is
1744 1752 to read all data). Chunks may be less than size bytes if the
1745 1753 chunk is the last chunk in the file, or the file is a socket or
1746 1754 some other type of file that sometimes reads less data than is
1747 1755 requested."""
1748 1756 assert size >= 0
1749 1757 assert limit is None or limit >= 0
1750 1758 while True:
1751 1759 if limit is None:
1752 1760 nbytes = size
1753 1761 else:
1754 1762 nbytes = min(limit, size)
1755 1763 s = nbytes and f.read(nbytes)
1756 1764 if not s:
1757 1765 break
1758 1766 if limit:
1759 1767 limit -= len(s)
1760 1768 yield s
1761 1769
1762 1770 def makedate(timestamp=None):
1763 1771 '''Return a unix timestamp (or the current time) as a (unixtime,
1764 1772 offset) tuple based off the local timezone.'''
1765 1773 if timestamp is None:
1766 1774 timestamp = time.time()
1767 1775 if timestamp < 0:
1768 1776 hint = _("check your clock")
1769 1777 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1770 1778 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1771 1779 datetime.datetime.fromtimestamp(timestamp))
1772 1780 tz = delta.days * 86400 + delta.seconds
1773 1781 return timestamp, tz
1774 1782
1775 1783 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1776 1784 """represent a (unixtime, offset) tuple as a localized time.
1777 1785 unixtime is seconds since the epoch, and offset is the time zone's
1778 1786 number of seconds away from UTC.
1779 1787
1780 1788 >>> datestr((0, 0))
1781 1789 'Thu Jan 01 00:00:00 1970 +0000'
1782 1790 >>> datestr((42, 0))
1783 1791 'Thu Jan 01 00:00:42 1970 +0000'
1784 1792 >>> datestr((-42, 0))
1785 1793 'Wed Dec 31 23:59:18 1969 +0000'
1786 1794 >>> datestr((0x7fffffff, 0))
1787 1795 'Tue Jan 19 03:14:07 2038 +0000'
1788 1796 >>> datestr((-0x80000000, 0))
1789 1797 'Fri Dec 13 20:45:52 1901 +0000'
1790 1798 """
1791 1799 t, tz = date or makedate()
1792 1800 if "%1" in format or "%2" in format or "%z" in format:
1793 1801 sign = (tz > 0) and "-" or "+"
1794 1802 minutes = abs(tz) // 60
1795 1803 q, r = divmod(minutes, 60)
1796 1804 format = format.replace("%z", "%1%2")
1797 1805 format = format.replace("%1", "%c%02d" % (sign, q))
1798 1806 format = format.replace("%2", "%02d" % r)
1799 1807 d = t - tz
1800 1808 if d > 0x7fffffff:
1801 1809 d = 0x7fffffff
1802 1810 elif d < -0x80000000:
1803 1811 d = -0x80000000
1804 1812 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1805 1813 # because they use the gmtime() system call which is buggy on Windows
1806 1814 # for negative values.
1807 1815 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1808 1816 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1809 1817 return s
1810 1818
1811 1819 def shortdate(date=None):
1812 1820 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1813 1821 return datestr(date, format='%Y-%m-%d')
1814 1822
1815 1823 def parsetimezone(s):
1816 1824 """find a trailing timezone, if any, in string, and return a
1817 1825 (offset, remainder) pair"""
1818 1826
1819 1827 if s.endswith("GMT") or s.endswith("UTC"):
1820 1828 return 0, s[:-3].rstrip()
1821 1829
1822 1830 # Unix-style timezones [+-]hhmm
1823 1831 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1824 1832 sign = (s[-5] == "+") and 1 or -1
1825 1833 hours = int(s[-4:-2])
1826 1834 minutes = int(s[-2:])
1827 1835 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1828 1836
1829 1837 # ISO8601 trailing Z
1830 1838 if s.endswith("Z") and s[-2:-1].isdigit():
1831 1839 return 0, s[:-1]
1832 1840
1833 1841 # ISO8601-style [+-]hh:mm
1834 1842 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1835 1843 s[-5:-3].isdigit() and s[-2:].isdigit()):
1836 1844 sign = (s[-6] == "+") and 1 or -1
1837 1845 hours = int(s[-5:-3])
1838 1846 minutes = int(s[-2:])
1839 1847 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1840 1848
1841 1849 return None, s
1842 1850
1843 1851 def strdate(string, format, defaults=None):
1844 1852 """parse a localized time string and return a (unixtime, offset) tuple.
1845 1853 if the string cannot be parsed, ValueError is raised."""
1846 1854 if defaults is None:
1847 1855 defaults = {}
1848 1856
1849 1857 # NOTE: unixtime = localunixtime + offset
1850 1858 offset, date = parsetimezone(string)
1851 1859
1852 1860 # add missing elements from defaults
1853 1861 usenow = False # default to using biased defaults
1854 1862 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1855 1863 found = [True for p in part if ("%"+p) in format]
1856 1864 if not found:
1857 1865 date += "@" + defaults[part][usenow]
1858 1866 format += "@%" + part[0]
1859 1867 else:
1860 1868 # We've found a specific time element, less specific time
1861 1869 # elements are relative to today
1862 1870 usenow = True
1863 1871
1864 1872 timetuple = time.strptime(date, format)
1865 1873 localunixtime = int(calendar.timegm(timetuple))
1866 1874 if offset is None:
1867 1875 # local timezone
1868 1876 unixtime = int(time.mktime(timetuple))
1869 1877 offset = unixtime - localunixtime
1870 1878 else:
1871 1879 unixtime = localunixtime + offset
1872 1880 return unixtime, offset
1873 1881
1874 1882 def parsedate(date, formats=None, bias=None):
1875 1883 """parse a localized date/time and return a (unixtime, offset) tuple.
1876 1884
1877 1885 The date may be a "unixtime offset" string or in one of the specified
1878 1886 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1879 1887
1880 1888 >>> parsedate(' today ') == parsedate(\
1881 1889 datetime.date.today().strftime('%b %d'))
1882 1890 True
1883 1891 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1884 1892 datetime.timedelta(days=1)\
1885 1893 ).strftime('%b %d'))
1886 1894 True
1887 1895 >>> now, tz = makedate()
1888 1896 >>> strnow, strtz = parsedate('now')
1889 1897 >>> (strnow - now) < 1
1890 1898 True
1891 1899 >>> tz == strtz
1892 1900 True
1893 1901 """
1894 1902 if bias is None:
1895 1903 bias = {}
1896 1904 if not date:
1897 1905 return 0, 0
1898 1906 if isinstance(date, tuple) and len(date) == 2:
1899 1907 return date
1900 1908 if not formats:
1901 1909 formats = defaultdateformats
1902 1910 date = date.strip()
1903 1911
1904 1912 if date == 'now' or date == _('now'):
1905 1913 return makedate()
1906 1914 if date == 'today' or date == _('today'):
1907 1915 date = datetime.date.today().strftime('%b %d')
1908 1916 elif date == 'yesterday' or date == _('yesterday'):
1909 1917 date = (datetime.date.today() -
1910 1918 datetime.timedelta(days=1)).strftime('%b %d')
1911 1919
1912 1920 try:
1913 1921 when, offset = map(int, date.split(' '))
1914 1922 except ValueError:
1915 1923 # fill out defaults
1916 1924 now = makedate()
1917 1925 defaults = {}
1918 1926 for part in ("d", "mb", "yY", "HI", "M", "S"):
1919 1927 # this piece is for rounding the specific end of unknowns
1920 1928 b = bias.get(part)
1921 1929 if b is None:
1922 1930 if part[0] in "HMS":
1923 1931 b = "00"
1924 1932 else:
1925 1933 b = "0"
1926 1934
1927 1935 # this piece is for matching the generic end to today's date
1928 1936 n = datestr(now, "%" + part[0])
1929 1937
1930 1938 defaults[part] = (b, n)
1931 1939
1932 1940 for format in formats:
1933 1941 try:
1934 1942 when, offset = strdate(date, format, defaults)
1935 1943 except (ValueError, OverflowError):
1936 1944 pass
1937 1945 else:
1938 1946 break
1939 1947 else:
1940 1948 raise Abort(_('invalid date: %r') % date)
1941 1949 # validate explicit (probably user-specified) date and
1942 1950 # time zone offset. values must fit in signed 32 bits for
1943 1951 # current 32-bit linux runtimes. timezones go from UTC-12
1944 1952 # to UTC+14
1945 1953 if when < -0x80000000 or when > 0x7fffffff:
1946 1954 raise Abort(_('date exceeds 32 bits: %d') % when)
1947 1955 if offset < -50400 or offset > 43200:
1948 1956 raise Abort(_('impossible time zone offset: %d') % offset)
1949 1957 return when, offset
1950 1958
1951 1959 def matchdate(date):
1952 1960 """Return a function that matches a given date match specifier
1953 1961
1954 1962 Formats include:
1955 1963
1956 1964 '{date}' match a given date to the accuracy provided
1957 1965
1958 1966 '<{date}' on or before a given date
1959 1967
1960 1968 '>{date}' on or after a given date
1961 1969
1962 1970 >>> p1 = parsedate("10:29:59")
1963 1971 >>> p2 = parsedate("10:30:00")
1964 1972 >>> p3 = parsedate("10:30:59")
1965 1973 >>> p4 = parsedate("10:31:00")
1966 1974 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1967 1975 >>> f = matchdate("10:30")
1968 1976 >>> f(p1[0])
1969 1977 False
1970 1978 >>> f(p2[0])
1971 1979 True
1972 1980 >>> f(p3[0])
1973 1981 True
1974 1982 >>> f(p4[0])
1975 1983 False
1976 1984 >>> f(p5[0])
1977 1985 False
1978 1986 """
1979 1987
1980 1988 def lower(date):
1981 1989 d = {'mb': "1", 'd': "1"}
1982 1990 return parsedate(date, extendeddateformats, d)[0]
1983 1991
1984 1992 def upper(date):
1985 1993 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1986 1994 for days in ("31", "30", "29"):
1987 1995 try:
1988 1996 d["d"] = days
1989 1997 return parsedate(date, extendeddateformats, d)[0]
1990 1998 except Abort:
1991 1999 pass
1992 2000 d["d"] = "28"
1993 2001 return parsedate(date, extendeddateformats, d)[0]
1994 2002
1995 2003 date = date.strip()
1996 2004
1997 2005 if not date:
1998 2006 raise Abort(_("dates cannot consist entirely of whitespace"))
1999 2007 elif date[0] == "<":
2000 2008 if not date[1:]:
2001 2009 raise Abort(_("invalid day spec, use '<DATE'"))
2002 2010 when = upper(date[1:])
2003 2011 return lambda x: x <= when
2004 2012 elif date[0] == ">":
2005 2013 if not date[1:]:
2006 2014 raise Abort(_("invalid day spec, use '>DATE'"))
2007 2015 when = lower(date[1:])
2008 2016 return lambda x: x >= when
2009 2017 elif date[0] == "-":
2010 2018 try:
2011 2019 days = int(date[1:])
2012 2020 except ValueError:
2013 2021 raise Abort(_("invalid day spec: %s") % date[1:])
2014 2022 if days < 0:
2015 2023 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2016 2024 % date[1:])
2017 2025 when = makedate()[0] - days * 3600 * 24
2018 2026 return lambda x: x >= when
2019 2027 elif " to " in date:
2020 2028 a, b = date.split(" to ")
2021 2029 start, stop = lower(a), upper(b)
2022 2030 return lambda x: x >= start and x <= stop
2023 2031 else:
2024 2032 start, stop = lower(date), upper(date)
2025 2033 return lambda x: x >= start and x <= stop
2026 2034
2027 2035 def stringmatcher(pattern, casesensitive=True):
2028 2036 """
2029 2037 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2030 2038 returns the matcher name, pattern, and matcher function.
2031 2039 missing or unknown prefixes are treated as literal matches.
2032 2040
2033 2041 helper for tests:
2034 2042 >>> def test(pattern, *tests):
2035 2043 ... kind, pattern, matcher = stringmatcher(pattern)
2036 2044 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2037 2045 >>> def itest(pattern, *tests):
2038 2046 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2039 2047 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2040 2048
2041 2049 exact matching (no prefix):
2042 2050 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2043 2051 ('literal', 'abcdefg', [False, False, True])
2044 2052
2045 2053 regex matching ('re:' prefix)
2046 2054 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2047 2055 ('re', 'a.+b', [False, False, True])
2048 2056
2049 2057 force exact matches ('literal:' prefix)
2050 2058 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2051 2059 ('literal', 're:foobar', [False, True])
2052 2060
2053 2061 unknown prefixes are ignored and treated as literals
2054 2062 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2055 2063 ('literal', 'foo:bar', [False, False, True])
2056 2064
2057 2065 case insensitive regex matches
2058 2066 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2059 2067 ('re', 'A.+b', [False, False, True])
2060 2068
2061 2069 case insensitive literal matches
2062 2070 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2063 2071 ('literal', 'ABCDEFG', [False, False, True])
2064 2072 """
2065 2073 if pattern.startswith('re:'):
2066 2074 pattern = pattern[3:]
2067 2075 try:
2068 2076 flags = 0
2069 2077 if not casesensitive:
2070 2078 flags = remod.I
2071 2079 regex = remod.compile(pattern, flags)
2072 2080 except remod.error as e:
2073 2081 raise error.ParseError(_('invalid regular expression: %s')
2074 2082 % e)
2075 2083 return 're', pattern, regex.search
2076 2084 elif pattern.startswith('literal:'):
2077 2085 pattern = pattern[8:]
2078 2086
2079 2087 match = pattern.__eq__
2080 2088
2081 2089 if not casesensitive:
2082 2090 ipat = encoding.lower(pattern)
2083 2091 match = lambda s: ipat == encoding.lower(s)
2084 2092 return 'literal', pattern, match
2085 2093
2086 2094 def shortuser(user):
2087 2095 """Return a short representation of a user name or email address."""
2088 2096 f = user.find('@')
2089 2097 if f >= 0:
2090 2098 user = user[:f]
2091 2099 f = user.find('<')
2092 2100 if f >= 0:
2093 2101 user = user[f + 1:]
2094 2102 f = user.find(' ')
2095 2103 if f >= 0:
2096 2104 user = user[:f]
2097 2105 f = user.find('.')
2098 2106 if f >= 0:
2099 2107 user = user[:f]
2100 2108 return user
2101 2109
2102 2110 def emailuser(user):
2103 2111 """Return the user portion of an email address."""
2104 2112 f = user.find('@')
2105 2113 if f >= 0:
2106 2114 user = user[:f]
2107 2115 f = user.find('<')
2108 2116 if f >= 0:
2109 2117 user = user[f + 1:]
2110 2118 return user
2111 2119
2112 2120 def email(author):
2113 2121 '''get email of author.'''
2114 2122 r = author.find('>')
2115 2123 if r == -1:
2116 2124 r = None
2117 2125 return author[author.find('<') + 1:r]
2118 2126
2119 2127 def ellipsis(text, maxlength=400):
2120 2128 """Trim string to at most maxlength (default: 400) columns in display."""
2121 2129 return encoding.trim(text, maxlength, ellipsis='...')
2122 2130
2123 2131 def unitcountfn(*unittable):
2124 2132 '''return a function that renders a readable count of some quantity'''
2125 2133
2126 2134 def go(count):
2127 2135 for multiplier, divisor, format in unittable:
2128 2136 if count >= divisor * multiplier:
2129 2137 return format % (count / float(divisor))
2130 2138 return unittable[-1][2] % count
2131 2139
2132 2140 return go
2133 2141
2134 2142 bytecount = unitcountfn(
2135 2143 (100, 1 << 30, _('%.0f GB')),
2136 2144 (10, 1 << 30, _('%.1f GB')),
2137 2145 (1, 1 << 30, _('%.2f GB')),
2138 2146 (100, 1 << 20, _('%.0f MB')),
2139 2147 (10, 1 << 20, _('%.1f MB')),
2140 2148 (1, 1 << 20, _('%.2f MB')),
2141 2149 (100, 1 << 10, _('%.0f KB')),
2142 2150 (10, 1 << 10, _('%.1f KB')),
2143 2151 (1, 1 << 10, _('%.2f KB')),
2144 2152 (1, 1, _('%.0f bytes')),
2145 2153 )
2146 2154
2147 2155 def escapestr(s):
2148 2156 # call underlying function of s.encode('string_escape') directly for
2149 2157 # Python 3 compatibility
2150 2158 return codecs.escape_encode(s)[0]
2151 2159
2152 2160 def unescapestr(s):
2153 2161 return codecs.escape_decode(s)[0]
2154 2162
2155 2163 def uirepr(s):
2156 2164 # Avoid double backslash in Windows path repr()
2157 2165 return repr(s).replace('\\\\', '\\')
2158 2166
2159 2167 # delay import of textwrap
2160 2168 def MBTextWrapper(**kwargs):
2161 2169 class tw(textwrap.TextWrapper):
2162 2170 """
2163 2171 Extend TextWrapper for width-awareness.
2164 2172
2165 2173 Neither number of 'bytes' in any encoding nor 'characters' is
2166 2174 appropriate to calculate terminal columns for specified string.
2167 2175
2168 2176 Original TextWrapper implementation uses built-in 'len()' directly,
2169 2177 so overriding is needed to use width information of each characters.
2170 2178
2171 2179 In addition, characters classified into 'ambiguous' width are
2172 2180 treated as wide in East Asian area, but as narrow in other.
2173 2181
2174 2182 This requires use decision to determine width of such characters.
2175 2183 """
2176 2184 def _cutdown(self, ucstr, space_left):
2177 2185 l = 0
2178 2186 colwidth = encoding.ucolwidth
2179 2187 for i in xrange(len(ucstr)):
2180 2188 l += colwidth(ucstr[i])
2181 2189 if space_left < l:
2182 2190 return (ucstr[:i], ucstr[i:])
2183 2191 return ucstr, ''
2184 2192
2185 2193 # overriding of base class
2186 2194 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2187 2195 space_left = max(width - cur_len, 1)
2188 2196
2189 2197 if self.break_long_words:
2190 2198 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2191 2199 cur_line.append(cut)
2192 2200 reversed_chunks[-1] = res
2193 2201 elif not cur_line:
2194 2202 cur_line.append(reversed_chunks.pop())
2195 2203
2196 2204 # this overriding code is imported from TextWrapper of Python 2.6
2197 2205 # to calculate columns of string by 'encoding.ucolwidth()'
2198 2206 def _wrap_chunks(self, chunks):
2199 2207 colwidth = encoding.ucolwidth
2200 2208
2201 2209 lines = []
2202 2210 if self.width <= 0:
2203 2211 raise ValueError("invalid width %r (must be > 0)" % self.width)
2204 2212
2205 2213 # Arrange in reverse order so items can be efficiently popped
2206 2214 # from a stack of chucks.
2207 2215 chunks.reverse()
2208 2216
2209 2217 while chunks:
2210 2218
2211 2219 # Start the list of chunks that will make up the current line.
2212 2220 # cur_len is just the length of all the chunks in cur_line.
2213 2221 cur_line = []
2214 2222 cur_len = 0
2215 2223
2216 2224 # Figure out which static string will prefix this line.
2217 2225 if lines:
2218 2226 indent = self.subsequent_indent
2219 2227 else:
2220 2228 indent = self.initial_indent
2221 2229
2222 2230 # Maximum width for this line.
2223 2231 width = self.width - len(indent)
2224 2232
2225 2233 # First chunk on line is whitespace -- drop it, unless this
2226 2234 # is the very beginning of the text (i.e. no lines started yet).
2227 2235 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2228 2236 del chunks[-1]
2229 2237
2230 2238 while chunks:
2231 2239 l = colwidth(chunks[-1])
2232 2240
2233 2241 # Can at least squeeze this chunk onto the current line.
2234 2242 if cur_len + l <= width:
2235 2243 cur_line.append(chunks.pop())
2236 2244 cur_len += l
2237 2245
2238 2246 # Nope, this line is full.
2239 2247 else:
2240 2248 break
2241 2249
2242 2250 # The current line is full, and the next chunk is too big to
2243 2251 # fit on *any* line (not just this one).
2244 2252 if chunks and colwidth(chunks[-1]) > width:
2245 2253 self._handle_long_word(chunks, cur_line, cur_len, width)
2246 2254
2247 2255 # If the last chunk on this line is all whitespace, drop it.
2248 2256 if (self.drop_whitespace and
2249 2257 cur_line and cur_line[-1].strip() == ''):
2250 2258 del cur_line[-1]
2251 2259
2252 2260 # Convert current line back to a string and store it in list
2253 2261 # of all lines (return value).
2254 2262 if cur_line:
2255 2263 lines.append(indent + ''.join(cur_line))
2256 2264
2257 2265 return lines
2258 2266
2259 2267 global MBTextWrapper
2260 2268 MBTextWrapper = tw
2261 2269 return tw(**kwargs)
2262 2270
2263 2271 def wrap(line, width, initindent='', hangindent=''):
2264 2272 maxindent = max(len(hangindent), len(initindent))
2265 2273 if width <= maxindent:
2266 2274 # adjust for weird terminal size
2267 2275 width = max(78, maxindent + 1)
2268 2276 line = line.decode(pycompat.sysstr(encoding.encoding),
2269 2277 pycompat.sysstr(encoding.encodingmode))
2270 2278 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2271 2279 pycompat.sysstr(encoding.encodingmode))
2272 2280 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2273 2281 pycompat.sysstr(encoding.encodingmode))
2274 2282 wrapper = MBTextWrapper(width=width,
2275 2283 initial_indent=initindent,
2276 2284 subsequent_indent=hangindent)
2277 2285 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2278 2286
2279 2287 if (pyplatform.python_implementation() == 'CPython' and
2280 2288 sys.version_info < (3, 0)):
2281 2289 # There is an issue in CPython that some IO methods do not handle EINTR
2282 2290 # correctly. The following table shows what CPython version (and functions)
2283 2291 # are affected (buggy: has the EINTR bug, okay: otherwise):
2284 2292 #
2285 2293 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2286 2294 # --------------------------------------------------
2287 2295 # fp.__iter__ | buggy | buggy | okay
2288 2296 # fp.read* | buggy | okay [1] | okay
2289 2297 #
2290 2298 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2291 2299 #
2292 2300 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2293 2301 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2294 2302 #
2295 2303 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2296 2304 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2297 2305 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2298 2306 # fp.__iter__ but not other fp.read* methods.
2299 2307 #
2300 2308 # On modern systems like Linux, the "read" syscall cannot be interrupted
2301 2309 # when reading "fast" files like on-disk files. So the EINTR issue only
2302 2310 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2303 2311 # files approximately as "fast" files and use the fast (unsafe) code path,
2304 2312 # to minimize the performance impact.
2305 2313 if sys.version_info >= (2, 7, 4):
2306 2314 # fp.readline deals with EINTR correctly, use it as a workaround.
2307 2315 def _safeiterfile(fp):
2308 2316 return iter(fp.readline, '')
2309 2317 else:
2310 2318 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2311 2319 # note: this may block longer than necessary because of bufsize.
2312 2320 def _safeiterfile(fp, bufsize=4096):
2313 2321 fd = fp.fileno()
2314 2322 line = ''
2315 2323 while True:
2316 2324 try:
2317 2325 buf = os.read(fd, bufsize)
2318 2326 except OSError as ex:
2319 2327 # os.read only raises EINTR before any data is read
2320 2328 if ex.errno == errno.EINTR:
2321 2329 continue
2322 2330 else:
2323 2331 raise
2324 2332 line += buf
2325 2333 if '\n' in buf:
2326 2334 splitted = line.splitlines(True)
2327 2335 line = ''
2328 2336 for l in splitted:
2329 2337 if l[-1] == '\n':
2330 2338 yield l
2331 2339 else:
2332 2340 line = l
2333 2341 if not buf:
2334 2342 break
2335 2343 if line:
2336 2344 yield line
2337 2345
2338 2346 def iterfile(fp):
2339 2347 fastpath = True
2340 2348 if type(fp) is file:
2341 2349 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2342 2350 if fastpath:
2343 2351 return fp
2344 2352 else:
2345 2353 return _safeiterfile(fp)
2346 2354 else:
2347 2355 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2348 2356 def iterfile(fp):
2349 2357 return fp
2350 2358
2351 2359 def iterlines(iterator):
2352 2360 for chunk in iterator:
2353 2361 for line in chunk.splitlines():
2354 2362 yield line
2355 2363
2356 2364 def expandpath(path):
2357 2365 return os.path.expanduser(os.path.expandvars(path))
2358 2366
2359 2367 def hgcmd():
2360 2368 """Return the command used to execute current hg
2361 2369
2362 2370 This is different from hgexecutable() because on Windows we want
2363 2371 to avoid things opening new shell windows like batch files, so we
2364 2372 get either the python call or current executable.
2365 2373 """
2366 2374 if mainfrozen():
2367 2375 if getattr(sys, 'frozen', None) == 'macosx_app':
2368 2376 # Env variable set by py2app
2369 2377 return [encoding.environ['EXECUTABLEPATH']]
2370 2378 else:
2371 2379 return [pycompat.sysexecutable]
2372 2380 return gethgcmd()
2373 2381
2374 2382 def rundetached(args, condfn):
2375 2383 """Execute the argument list in a detached process.
2376 2384
2377 2385 condfn is a callable which is called repeatedly and should return
2378 2386 True once the child process is known to have started successfully.
2379 2387 At this point, the child process PID is returned. If the child
2380 2388 process fails to start or finishes before condfn() evaluates to
2381 2389 True, return -1.
2382 2390 """
2383 2391 # Windows case is easier because the child process is either
2384 2392 # successfully starting and validating the condition or exiting
2385 2393 # on failure. We just poll on its PID. On Unix, if the child
2386 2394 # process fails to start, it will be left in a zombie state until
2387 2395 # the parent wait on it, which we cannot do since we expect a long
2388 2396 # running process on success. Instead we listen for SIGCHLD telling
2389 2397 # us our child process terminated.
2390 2398 terminated = set()
2391 2399 def handler(signum, frame):
2392 2400 terminated.add(os.wait())
2393 2401 prevhandler = None
2394 2402 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2395 2403 if SIGCHLD is not None:
2396 2404 prevhandler = signal.signal(SIGCHLD, handler)
2397 2405 try:
2398 2406 pid = spawndetached(args)
2399 2407 while not condfn():
2400 2408 if ((pid in terminated or not testpid(pid))
2401 2409 and not condfn()):
2402 2410 return -1
2403 2411 time.sleep(0.1)
2404 2412 return pid
2405 2413 finally:
2406 2414 if prevhandler is not None:
2407 2415 signal.signal(signal.SIGCHLD, prevhandler)
2408 2416
2409 2417 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2410 2418 """Return the result of interpolating items in the mapping into string s.
2411 2419
2412 2420 prefix is a single character string, or a two character string with
2413 2421 a backslash as the first character if the prefix needs to be escaped in
2414 2422 a regular expression.
2415 2423
2416 2424 fn is an optional function that will be applied to the replacement text
2417 2425 just before replacement.
2418 2426
2419 2427 escape_prefix is an optional flag that allows using doubled prefix for
2420 2428 its escaping.
2421 2429 """
2422 2430 fn = fn or (lambda s: s)
2423 2431 patterns = '|'.join(mapping.keys())
2424 2432 if escape_prefix:
2425 2433 patterns += '|' + prefix
2426 2434 if len(prefix) > 1:
2427 2435 prefix_char = prefix[1:]
2428 2436 else:
2429 2437 prefix_char = prefix
2430 2438 mapping[prefix_char] = prefix_char
2431 2439 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2432 2440 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2433 2441
2434 2442 def getport(port):
2435 2443 """Return the port for a given network service.
2436 2444
2437 2445 If port is an integer, it's returned as is. If it's a string, it's
2438 2446 looked up using socket.getservbyname(). If there's no matching
2439 2447 service, error.Abort is raised.
2440 2448 """
2441 2449 try:
2442 2450 return int(port)
2443 2451 except ValueError:
2444 2452 pass
2445 2453
2446 2454 try:
2447 2455 return socket.getservbyname(port)
2448 2456 except socket.error:
2449 2457 raise Abort(_("no port number associated with service '%s'") % port)
2450 2458
2451 2459 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2452 2460 '0': False, 'no': False, 'false': False, 'off': False,
2453 2461 'never': False}
2454 2462
2455 2463 def parsebool(s):
2456 2464 """Parse s into a boolean.
2457 2465
2458 2466 If s is not a valid boolean, returns None.
2459 2467 """
2460 2468 return _booleans.get(s.lower(), None)
2461 2469
2462 2470 _hextochr = dict((a + b, chr(int(a + b, 16)))
2463 2471 for a in string.hexdigits for b in string.hexdigits)
2464 2472
2465 2473 class url(object):
2466 2474 r"""Reliable URL parser.
2467 2475
2468 2476 This parses URLs and provides attributes for the following
2469 2477 components:
2470 2478
2471 2479 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2472 2480
2473 2481 Missing components are set to None. The only exception is
2474 2482 fragment, which is set to '' if present but empty.
2475 2483
2476 2484 If parsefragment is False, fragment is included in query. If
2477 2485 parsequery is False, query is included in path. If both are
2478 2486 False, both fragment and query are included in path.
2479 2487
2480 2488 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2481 2489
2482 2490 Note that for backward compatibility reasons, bundle URLs do not
2483 2491 take host names. That means 'bundle://../' has a path of '../'.
2484 2492
2485 2493 Examples:
2486 2494
2487 2495 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2488 2496 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2489 2497 >>> url('ssh://[::1]:2200//home/joe/repo')
2490 2498 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2491 2499 >>> url('file:///home/joe/repo')
2492 2500 <url scheme: 'file', path: '/home/joe/repo'>
2493 2501 >>> url('file:///c:/temp/foo/')
2494 2502 <url scheme: 'file', path: 'c:/temp/foo/'>
2495 2503 >>> url('bundle:foo')
2496 2504 <url scheme: 'bundle', path: 'foo'>
2497 2505 >>> url('bundle://../foo')
2498 2506 <url scheme: 'bundle', path: '../foo'>
2499 2507 >>> url(r'c:\foo\bar')
2500 2508 <url path: 'c:\\foo\\bar'>
2501 2509 >>> url(r'\\blah\blah\blah')
2502 2510 <url path: '\\\\blah\\blah\\blah'>
2503 2511 >>> url(r'\\blah\blah\blah#baz')
2504 2512 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2505 2513 >>> url(r'file:///C:\users\me')
2506 2514 <url scheme: 'file', path: 'C:\\users\\me'>
2507 2515
2508 2516 Authentication credentials:
2509 2517
2510 2518 >>> url('ssh://joe:xyz@x/repo')
2511 2519 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2512 2520 >>> url('ssh://joe@x/repo')
2513 2521 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2514 2522
2515 2523 Query strings and fragments:
2516 2524
2517 2525 >>> url('http://host/a?b#c')
2518 2526 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2519 2527 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2520 2528 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2521 2529
2522 2530 Empty path:
2523 2531
2524 2532 >>> url('')
2525 2533 <url path: ''>
2526 2534 >>> url('#a')
2527 2535 <url path: '', fragment: 'a'>
2528 2536 >>> url('http://host/')
2529 2537 <url scheme: 'http', host: 'host', path: ''>
2530 2538 >>> url('http://host/#a')
2531 2539 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2532 2540
2533 2541 Only scheme:
2534 2542
2535 2543 >>> url('http:')
2536 2544 <url scheme: 'http'>
2537 2545 """
2538 2546
2539 2547 _safechars = "!~*'()+"
2540 2548 _safepchars = "/!~*'()+:\\"
2541 2549 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2542 2550
2543 2551 def __init__(self, path, parsequery=True, parsefragment=True):
2544 2552 # We slowly chomp away at path until we have only the path left
2545 2553 self.scheme = self.user = self.passwd = self.host = None
2546 2554 self.port = self.path = self.query = self.fragment = None
2547 2555 self._localpath = True
2548 2556 self._hostport = ''
2549 2557 self._origpath = path
2550 2558
2551 2559 if parsefragment and '#' in path:
2552 2560 path, self.fragment = path.split('#', 1)
2553 2561
2554 2562 # special case for Windows drive letters and UNC paths
2555 2563 if hasdriveletter(path) or path.startswith('\\\\'):
2556 2564 self.path = path
2557 2565 return
2558 2566
2559 2567 # For compatibility reasons, we can't handle bundle paths as
2560 2568 # normal URLS
2561 2569 if path.startswith('bundle:'):
2562 2570 self.scheme = 'bundle'
2563 2571 path = path[7:]
2564 2572 if path.startswith('//'):
2565 2573 path = path[2:]
2566 2574 self.path = path
2567 2575 return
2568 2576
2569 2577 if self._matchscheme(path):
2570 2578 parts = path.split(':', 1)
2571 2579 if parts[0]:
2572 2580 self.scheme, path = parts
2573 2581 self._localpath = False
2574 2582
2575 2583 if not path:
2576 2584 path = None
2577 2585 if self._localpath:
2578 2586 self.path = ''
2579 2587 return
2580 2588 else:
2581 2589 if self._localpath:
2582 2590 self.path = path
2583 2591 return
2584 2592
2585 2593 if parsequery and '?' in path:
2586 2594 path, self.query = path.split('?', 1)
2587 2595 if not path:
2588 2596 path = None
2589 2597 if not self.query:
2590 2598 self.query = None
2591 2599
2592 2600 # // is required to specify a host/authority
2593 2601 if path and path.startswith('//'):
2594 2602 parts = path[2:].split('/', 1)
2595 2603 if len(parts) > 1:
2596 2604 self.host, path = parts
2597 2605 else:
2598 2606 self.host = parts[0]
2599 2607 path = None
2600 2608 if not self.host:
2601 2609 self.host = None
2602 2610 # path of file:///d is /d
2603 2611 # path of file:///d:/ is d:/, not /d:/
2604 2612 if path and not hasdriveletter(path):
2605 2613 path = '/' + path
2606 2614
2607 2615 if self.host and '@' in self.host:
2608 2616 self.user, self.host = self.host.rsplit('@', 1)
2609 2617 if ':' in self.user:
2610 2618 self.user, self.passwd = self.user.split(':', 1)
2611 2619 if not self.host:
2612 2620 self.host = None
2613 2621
2614 2622 # Don't split on colons in IPv6 addresses without ports
2615 2623 if (self.host and ':' in self.host and
2616 2624 not (self.host.startswith('[') and self.host.endswith(']'))):
2617 2625 self._hostport = self.host
2618 2626 self.host, self.port = self.host.rsplit(':', 1)
2619 2627 if not self.host:
2620 2628 self.host = None
2621 2629
2622 2630 if (self.host and self.scheme == 'file' and
2623 2631 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2624 2632 raise Abort(_('file:// URLs can only refer to localhost'))
2625 2633
2626 2634 self.path = path
2627 2635
2628 2636 # leave the query string escaped
2629 2637 for a in ('user', 'passwd', 'host', 'port',
2630 2638 'path', 'fragment'):
2631 2639 v = getattr(self, a)
2632 2640 if v is not None:
2633 2641 setattr(self, a, pycompat.urlunquote(v))
2634 2642
2635 2643 def __repr__(self):
2636 2644 attrs = []
2637 2645 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2638 2646 'query', 'fragment'):
2639 2647 v = getattr(self, a)
2640 2648 if v is not None:
2641 2649 attrs.append('%s: %r' % (a, v))
2642 2650 return '<url %s>' % ', '.join(attrs)
2643 2651
2644 2652 def __str__(self):
2645 2653 r"""Join the URL's components back into a URL string.
2646 2654
2647 2655 Examples:
2648 2656
2649 2657 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2650 2658 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2651 2659 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2652 2660 'http://user:pw@host:80/?foo=bar&baz=42'
2653 2661 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2654 2662 'http://user:pw@host:80/?foo=bar%3dbaz'
2655 2663 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2656 2664 'ssh://user:pw@[::1]:2200//home/joe#'
2657 2665 >>> str(url('http://localhost:80//'))
2658 2666 'http://localhost:80//'
2659 2667 >>> str(url('http://localhost:80/'))
2660 2668 'http://localhost:80/'
2661 2669 >>> str(url('http://localhost:80'))
2662 2670 'http://localhost:80/'
2663 2671 >>> str(url('bundle:foo'))
2664 2672 'bundle:foo'
2665 2673 >>> str(url('bundle://../foo'))
2666 2674 'bundle:../foo'
2667 2675 >>> str(url('path'))
2668 2676 'path'
2669 2677 >>> str(url('file:///tmp/foo/bar'))
2670 2678 'file:///tmp/foo/bar'
2671 2679 >>> str(url('file:///c:/tmp/foo/bar'))
2672 2680 'file:///c:/tmp/foo/bar'
2673 2681 >>> print url(r'bundle:foo\bar')
2674 2682 bundle:foo\bar
2675 2683 >>> print url(r'file:///D:\data\hg')
2676 2684 file:///D:\data\hg
2677 2685 """
2678 2686 return encoding.strfromlocal(self.__bytes__())
2679 2687
2680 2688 def __bytes__(self):
2681 2689 if self._localpath:
2682 2690 s = self.path
2683 2691 if self.scheme == 'bundle':
2684 2692 s = 'bundle:' + s
2685 2693 if self.fragment:
2686 2694 s += '#' + self.fragment
2687 2695 return s
2688 2696
2689 2697 s = self.scheme + ':'
2690 2698 if self.user or self.passwd or self.host:
2691 2699 s += '//'
2692 2700 elif self.scheme and (not self.path or self.path.startswith('/')
2693 2701 or hasdriveletter(self.path)):
2694 2702 s += '//'
2695 2703 if hasdriveletter(self.path):
2696 2704 s += '/'
2697 2705 if self.user:
2698 2706 s += urlreq.quote(self.user, safe=self._safechars)
2699 2707 if self.passwd:
2700 2708 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2701 2709 if self.user or self.passwd:
2702 2710 s += '@'
2703 2711 if self.host:
2704 2712 if not (self.host.startswith('[') and self.host.endswith(']')):
2705 2713 s += urlreq.quote(self.host)
2706 2714 else:
2707 2715 s += self.host
2708 2716 if self.port:
2709 2717 s += ':' + urlreq.quote(self.port)
2710 2718 if self.host:
2711 2719 s += '/'
2712 2720 if self.path:
2713 2721 # TODO: similar to the query string, we should not unescape the
2714 2722 # path when we store it, the path might contain '%2f' = '/',
2715 2723 # which we should *not* escape.
2716 2724 s += urlreq.quote(self.path, safe=self._safepchars)
2717 2725 if self.query:
2718 2726 # we store the query in escaped form.
2719 2727 s += '?' + self.query
2720 2728 if self.fragment is not None:
2721 2729 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2722 2730 return s
2723 2731
2724 2732 def authinfo(self):
2725 2733 user, passwd = self.user, self.passwd
2726 2734 try:
2727 2735 self.user, self.passwd = None, None
2728 2736 s = str(self)
2729 2737 finally:
2730 2738 self.user, self.passwd = user, passwd
2731 2739 if not self.user:
2732 2740 return (s, None)
2733 2741 # authinfo[1] is passed to urllib2 password manager, and its
2734 2742 # URIs must not contain credentials. The host is passed in the
2735 2743 # URIs list because Python < 2.4.3 uses only that to search for
2736 2744 # a password.
2737 2745 return (s, (None, (s, self.host),
2738 2746 self.user, self.passwd or ''))
2739 2747
2740 2748 def isabs(self):
2741 2749 if self.scheme and self.scheme != 'file':
2742 2750 return True # remote URL
2743 2751 if hasdriveletter(self.path):
2744 2752 return True # absolute for our purposes - can't be joined()
2745 2753 if self.path.startswith(r'\\'):
2746 2754 return True # Windows UNC path
2747 2755 if self.path.startswith('/'):
2748 2756 return True # POSIX-style
2749 2757 return False
2750 2758
2751 2759 def localpath(self):
2752 2760 if self.scheme == 'file' or self.scheme == 'bundle':
2753 2761 path = self.path or '/'
2754 2762 # For Windows, we need to promote hosts containing drive
2755 2763 # letters to paths with drive letters.
2756 2764 if hasdriveletter(self._hostport):
2757 2765 path = self._hostport + '/' + self.path
2758 2766 elif (self.host is not None and self.path
2759 2767 and not hasdriveletter(path)):
2760 2768 path = '/' + path
2761 2769 return path
2762 2770 return self._origpath
2763 2771
2764 2772 def islocal(self):
2765 2773 '''whether localpath will return something that posixfile can open'''
2766 2774 return (not self.scheme or self.scheme == 'file'
2767 2775 or self.scheme == 'bundle')
2768 2776
2769 2777 def hasscheme(path):
2770 2778 return bool(url(path).scheme)
2771 2779
2772 2780 def hasdriveletter(path):
2773 2781 return path and path[1:2] == ':' and path[0:1].isalpha()
2774 2782
2775 2783 def urllocalpath(path):
2776 2784 return url(path, parsequery=False, parsefragment=False).localpath()
2777 2785
2778 2786 def hidepassword(u):
2779 2787 '''hide user credential in a url string'''
2780 2788 u = url(u)
2781 2789 if u.passwd:
2782 2790 u.passwd = '***'
2783 2791 return str(u)
2784 2792
2785 2793 def removeauth(u):
2786 2794 '''remove all authentication information from a url string'''
2787 2795 u = url(u)
2788 2796 u.user = u.passwd = None
2789 2797 return str(u)
2790 2798
2791 2799 timecount = unitcountfn(
2792 2800 (1, 1e3, _('%.0f s')),
2793 2801 (100, 1, _('%.1f s')),
2794 2802 (10, 1, _('%.2f s')),
2795 2803 (1, 1, _('%.3f s')),
2796 2804 (100, 0.001, _('%.1f ms')),
2797 2805 (10, 0.001, _('%.2f ms')),
2798 2806 (1, 0.001, _('%.3f ms')),
2799 2807 (100, 0.000001, _('%.1f us')),
2800 2808 (10, 0.000001, _('%.2f us')),
2801 2809 (1, 0.000001, _('%.3f us')),
2802 2810 (100, 0.000000001, _('%.1f ns')),
2803 2811 (10, 0.000000001, _('%.2f ns')),
2804 2812 (1, 0.000000001, _('%.3f ns')),
2805 2813 )
2806 2814
2807 2815 _timenesting = [0]
2808 2816
2809 2817 def timed(func):
2810 2818 '''Report the execution time of a function call to stderr.
2811 2819
2812 2820 During development, use as a decorator when you need to measure
2813 2821 the cost of a function, e.g. as follows:
2814 2822
2815 2823 @util.timed
2816 2824 def foo(a, b, c):
2817 2825 pass
2818 2826 '''
2819 2827
2820 2828 def wrapper(*args, **kwargs):
2821 2829 start = timer()
2822 2830 indent = 2
2823 2831 _timenesting[0] += indent
2824 2832 try:
2825 2833 return func(*args, **kwargs)
2826 2834 finally:
2827 2835 elapsed = timer() - start
2828 2836 _timenesting[0] -= indent
2829 2837 stderr.write('%s%s: %s\n' %
2830 2838 (' ' * _timenesting[0], func.__name__,
2831 2839 timecount(elapsed)))
2832 2840 return wrapper
2833 2841
2834 2842 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2835 2843 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2836 2844
2837 2845 def sizetoint(s):
2838 2846 '''Convert a space specifier to a byte count.
2839 2847
2840 2848 >>> sizetoint('30')
2841 2849 30
2842 2850 >>> sizetoint('2.2kb')
2843 2851 2252
2844 2852 >>> sizetoint('6M')
2845 2853 6291456
2846 2854 '''
2847 2855 t = s.strip().lower()
2848 2856 try:
2849 2857 for k, u in _sizeunits:
2850 2858 if t.endswith(k):
2851 2859 return int(float(t[:-len(k)]) * u)
2852 2860 return int(t)
2853 2861 except ValueError:
2854 2862 raise error.ParseError(_("couldn't parse size: %s") % s)
2855 2863
2856 2864 class hooks(object):
2857 2865 '''A collection of hook functions that can be used to extend a
2858 2866 function's behavior. Hooks are called in lexicographic order,
2859 2867 based on the names of their sources.'''
2860 2868
2861 2869 def __init__(self):
2862 2870 self._hooks = []
2863 2871
2864 2872 def add(self, source, hook):
2865 2873 self._hooks.append((source, hook))
2866 2874
2867 2875 def __call__(self, *args):
2868 2876 self._hooks.sort(key=lambda x: x[0])
2869 2877 results = []
2870 2878 for source, hook in self._hooks:
2871 2879 results.append(hook(*args))
2872 2880 return results
2873 2881
2874 2882 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2875 2883 '''Yields lines for a nicely formatted stacktrace.
2876 2884 Skips the 'skip' last entries, then return the last 'depth' entries.
2877 2885 Each file+linenumber is formatted according to fileline.
2878 2886 Each line is formatted according to line.
2879 2887 If line is None, it yields:
2880 2888 length of longest filepath+line number,
2881 2889 filepath+linenumber,
2882 2890 function
2883 2891
2884 2892 Not be used in production code but very convenient while developing.
2885 2893 '''
2886 2894 entries = [(fileline % (fn, ln), func)
2887 2895 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2888 2896 ][-depth:]
2889 2897 if entries:
2890 2898 fnmax = max(len(entry[0]) for entry in entries)
2891 2899 for fnln, func in entries:
2892 2900 if line is None:
2893 2901 yield (fnmax, fnln, func)
2894 2902 else:
2895 2903 yield line % (fnmax, fnln, func)
2896 2904
2897 2905 def debugstacktrace(msg='stacktrace', skip=0,
2898 2906 f=stderr, otherf=stdout, depth=0):
2899 2907 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2900 2908 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2901 2909 By default it will flush stdout first.
2902 2910 It can be used everywhere and intentionally does not require an ui object.
2903 2911 Not be used in production code but very convenient while developing.
2904 2912 '''
2905 2913 if otherf:
2906 2914 otherf.flush()
2907 2915 f.write('%s at:\n' % msg.rstrip())
2908 2916 for line in getstackframes(skip + 1, depth=depth):
2909 2917 f.write(line)
2910 2918 f.flush()
2911 2919
2912 2920 class dirs(object):
2913 2921 '''a multiset of directory names from a dirstate or manifest'''
2914 2922
2915 2923 def __init__(self, map, skip=None):
2916 2924 self._dirs = {}
2917 2925 addpath = self.addpath
2918 2926 if safehasattr(map, 'iteritems') and skip is not None:
2919 2927 for f, s in map.iteritems():
2920 2928 if s[0] != skip:
2921 2929 addpath(f)
2922 2930 else:
2923 2931 for f in map:
2924 2932 addpath(f)
2925 2933
2926 2934 def addpath(self, path):
2927 2935 dirs = self._dirs
2928 2936 for base in finddirs(path):
2929 2937 if base in dirs:
2930 2938 dirs[base] += 1
2931 2939 return
2932 2940 dirs[base] = 1
2933 2941
2934 2942 def delpath(self, path):
2935 2943 dirs = self._dirs
2936 2944 for base in finddirs(path):
2937 2945 if dirs[base] > 1:
2938 2946 dirs[base] -= 1
2939 2947 return
2940 2948 del dirs[base]
2941 2949
2942 2950 def __iter__(self):
2943 2951 return iter(self._dirs)
2944 2952
2945 2953 def __contains__(self, d):
2946 2954 return d in self._dirs
2947 2955
2948 2956 if safehasattr(parsers, 'dirs'):
2949 2957 dirs = parsers.dirs
2950 2958
2951 2959 def finddirs(path):
2952 2960 pos = path.rfind('/')
2953 2961 while pos != -1:
2954 2962 yield path[:pos]
2955 2963 pos = path.rfind('/', 0, pos)
2956 2964
2957 2965 class ctxmanager(object):
2958 2966 '''A context manager for use in 'with' blocks to allow multiple
2959 2967 contexts to be entered at once. This is both safer and more
2960 2968 flexible than contextlib.nested.
2961 2969
2962 2970 Once Mercurial supports Python 2.7+, this will become mostly
2963 2971 unnecessary.
2964 2972 '''
2965 2973
2966 2974 def __init__(self, *args):
2967 2975 '''Accepts a list of no-argument functions that return context
2968 2976 managers. These will be invoked at __call__ time.'''
2969 2977 self._pending = args
2970 2978 self._atexit = []
2971 2979
2972 2980 def __enter__(self):
2973 2981 return self
2974 2982
2975 2983 def enter(self):
2976 2984 '''Create and enter context managers in the order in which they were
2977 2985 passed to the constructor.'''
2978 2986 values = []
2979 2987 for func in self._pending:
2980 2988 obj = func()
2981 2989 values.append(obj.__enter__())
2982 2990 self._atexit.append(obj.__exit__)
2983 2991 del self._pending
2984 2992 return values
2985 2993
2986 2994 def atexit(self, func, *args, **kwargs):
2987 2995 '''Add a function to call when this context manager exits. The
2988 2996 ordering of multiple atexit calls is unspecified, save that
2989 2997 they will happen before any __exit__ functions.'''
2990 2998 def wrapper(exc_type, exc_val, exc_tb):
2991 2999 func(*args, **kwargs)
2992 3000 self._atexit.append(wrapper)
2993 3001 return func
2994 3002
2995 3003 def __exit__(self, exc_type, exc_val, exc_tb):
2996 3004 '''Context managers are exited in the reverse order from which
2997 3005 they were created.'''
2998 3006 received = exc_type is not None
2999 3007 suppressed = False
3000 3008 pending = None
3001 3009 self._atexit.reverse()
3002 3010 for exitfunc in self._atexit:
3003 3011 try:
3004 3012 if exitfunc(exc_type, exc_val, exc_tb):
3005 3013 suppressed = True
3006 3014 exc_type = None
3007 3015 exc_val = None
3008 3016 exc_tb = None
3009 3017 except BaseException:
3010 3018 pending = sys.exc_info()
3011 3019 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3012 3020 del self._atexit
3013 3021 if pending:
3014 3022 raise exc_val
3015 3023 return received and suppressed
3016 3024
3017 3025 # compression code
3018 3026
3019 3027 SERVERROLE = 'server'
3020 3028 CLIENTROLE = 'client'
3021 3029
3022 3030 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3023 3031 (u'name', u'serverpriority',
3024 3032 u'clientpriority'))
3025 3033
3026 3034 class compressormanager(object):
3027 3035 """Holds registrations of various compression engines.
3028 3036
3029 3037 This class essentially abstracts the differences between compression
3030 3038 engines to allow new compression formats to be added easily, possibly from
3031 3039 extensions.
3032 3040
3033 3041 Compressors are registered against the global instance by calling its
3034 3042 ``register()`` method.
3035 3043 """
3036 3044 def __init__(self):
3037 3045 self._engines = {}
3038 3046 # Bundle spec human name to engine name.
3039 3047 self._bundlenames = {}
3040 3048 # Internal bundle identifier to engine name.
3041 3049 self._bundletypes = {}
3042 3050 # Revlog header to engine name.
3043 3051 self._revlogheaders = {}
3044 3052 # Wire proto identifier to engine name.
3045 3053 self._wiretypes = {}
3046 3054
3047 3055 def __getitem__(self, key):
3048 3056 return self._engines[key]
3049 3057
3050 3058 def __contains__(self, key):
3051 3059 return key in self._engines
3052 3060
3053 3061 def __iter__(self):
3054 3062 return iter(self._engines.keys())
3055 3063
3056 3064 def register(self, engine):
3057 3065 """Register a compression engine with the manager.
3058 3066
3059 3067 The argument must be a ``compressionengine`` instance.
3060 3068 """
3061 3069 if not isinstance(engine, compressionengine):
3062 3070 raise ValueError(_('argument must be a compressionengine'))
3063 3071
3064 3072 name = engine.name()
3065 3073
3066 3074 if name in self._engines:
3067 3075 raise error.Abort(_('compression engine %s already registered') %
3068 3076 name)
3069 3077
3070 3078 bundleinfo = engine.bundletype()
3071 3079 if bundleinfo:
3072 3080 bundlename, bundletype = bundleinfo
3073 3081
3074 3082 if bundlename in self._bundlenames:
3075 3083 raise error.Abort(_('bundle name %s already registered') %
3076 3084 bundlename)
3077 3085 if bundletype in self._bundletypes:
3078 3086 raise error.Abort(_('bundle type %s already registered by %s') %
3079 3087 (bundletype, self._bundletypes[bundletype]))
3080 3088
3081 3089 # No external facing name declared.
3082 3090 if bundlename:
3083 3091 self._bundlenames[bundlename] = name
3084 3092
3085 3093 self._bundletypes[bundletype] = name
3086 3094
3087 3095 wiresupport = engine.wireprotosupport()
3088 3096 if wiresupport:
3089 3097 wiretype = wiresupport.name
3090 3098 if wiretype in self._wiretypes:
3091 3099 raise error.Abort(_('wire protocol compression %s already '
3092 3100 'registered by %s') %
3093 3101 (wiretype, self._wiretypes[wiretype]))
3094 3102
3095 3103 self._wiretypes[wiretype] = name
3096 3104
3097 3105 revlogheader = engine.revlogheader()
3098 3106 if revlogheader and revlogheader in self._revlogheaders:
3099 3107 raise error.Abort(_('revlog header %s already registered by %s') %
3100 3108 (revlogheader, self._revlogheaders[revlogheader]))
3101 3109
3102 3110 if revlogheader:
3103 3111 self._revlogheaders[revlogheader] = name
3104 3112
3105 3113 self._engines[name] = engine
3106 3114
3107 3115 @property
3108 3116 def supportedbundlenames(self):
3109 3117 return set(self._bundlenames.keys())
3110 3118
3111 3119 @property
3112 3120 def supportedbundletypes(self):
3113 3121 return set(self._bundletypes.keys())
3114 3122
3115 3123 def forbundlename(self, bundlename):
3116 3124 """Obtain a compression engine registered to a bundle name.
3117 3125
3118 3126 Will raise KeyError if the bundle type isn't registered.
3119 3127
3120 3128 Will abort if the engine is known but not available.
3121 3129 """
3122 3130 engine = self._engines[self._bundlenames[bundlename]]
3123 3131 if not engine.available():
3124 3132 raise error.Abort(_('compression engine %s could not be loaded') %
3125 3133 engine.name())
3126 3134 return engine
3127 3135
3128 3136 def forbundletype(self, bundletype):
3129 3137 """Obtain a compression engine registered to a bundle type.
3130 3138
3131 3139 Will raise KeyError if the bundle type isn't registered.
3132 3140
3133 3141 Will abort if the engine is known but not available.
3134 3142 """
3135 3143 engine = self._engines[self._bundletypes[bundletype]]
3136 3144 if not engine.available():
3137 3145 raise error.Abort(_('compression engine %s could not be loaded') %
3138 3146 engine.name())
3139 3147 return engine
3140 3148
3141 3149 def supportedwireengines(self, role, onlyavailable=True):
3142 3150 """Obtain compression engines that support the wire protocol.
3143 3151
3144 3152 Returns a list of engines in prioritized order, most desired first.
3145 3153
3146 3154 If ``onlyavailable`` is set, filter out engines that can't be
3147 3155 loaded.
3148 3156 """
3149 3157 assert role in (SERVERROLE, CLIENTROLE)
3150 3158
3151 3159 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3152 3160
3153 3161 engines = [self._engines[e] for e in self._wiretypes.values()]
3154 3162 if onlyavailable:
3155 3163 engines = [e for e in engines if e.available()]
3156 3164
3157 3165 def getkey(e):
3158 3166 # Sort first by priority, highest first. In case of tie, sort
3159 3167 # alphabetically. This is arbitrary, but ensures output is
3160 3168 # stable.
3161 3169 w = e.wireprotosupport()
3162 3170 return -1 * getattr(w, attr), w.name
3163 3171
3164 3172 return list(sorted(engines, key=getkey))
3165 3173
3166 3174 def forwiretype(self, wiretype):
3167 3175 engine = self._engines[self._wiretypes[wiretype]]
3168 3176 if not engine.available():
3169 3177 raise error.Abort(_('compression engine %s could not be loaded') %
3170 3178 engine.name())
3171 3179 return engine
3172 3180
3173 3181 def forrevlogheader(self, header):
3174 3182 """Obtain a compression engine registered to a revlog header.
3175 3183
3176 3184 Will raise KeyError if the revlog header value isn't registered.
3177 3185 """
3178 3186 return self._engines[self._revlogheaders[header]]
3179 3187
3180 3188 compengines = compressormanager()
3181 3189
3182 3190 class compressionengine(object):
3183 3191 """Base class for compression engines.
3184 3192
3185 3193 Compression engines must implement the interface defined by this class.
3186 3194 """
3187 3195 def name(self):
3188 3196 """Returns the name of the compression engine.
3189 3197
3190 3198 This is the key the engine is registered under.
3191 3199
3192 3200 This method must be implemented.
3193 3201 """
3194 3202 raise NotImplementedError()
3195 3203
3196 3204 def available(self):
3197 3205 """Whether the compression engine is available.
3198 3206
3199 3207 The intent of this method is to allow optional compression engines
3200 3208 that may not be available in all installations (such as engines relying
3201 3209 on C extensions that may not be present).
3202 3210 """
3203 3211 return True
3204 3212
3205 3213 def bundletype(self):
3206 3214 """Describes bundle identifiers for this engine.
3207 3215
3208 3216 If this compression engine isn't supported for bundles, returns None.
3209 3217
3210 3218 If this engine can be used for bundles, returns a 2-tuple of strings of
3211 3219 the user-facing "bundle spec" compression name and an internal
3212 3220 identifier used to denote the compression format within bundles. To
3213 3221 exclude the name from external usage, set the first element to ``None``.
3214 3222
3215 3223 If bundle compression is supported, the class must also implement
3216 3224 ``compressstream`` and `decompressorreader``.
3217 3225 """
3218 3226 return None
3219 3227
3220 3228 def wireprotosupport(self):
3221 3229 """Declare support for this compression format on the wire protocol.
3222 3230
3223 3231 If this compression engine isn't supported for compressing wire
3224 3232 protocol payloads, returns None.
3225 3233
3226 3234 Otherwise, returns ``compenginewireprotosupport`` with the following
3227 3235 fields:
3228 3236
3229 3237 * String format identifier
3230 3238 * Integer priority for the server
3231 3239 * Integer priority for the client
3232 3240
3233 3241 The integer priorities are used to order the advertisement of format
3234 3242 support by server and client. The highest integer is advertised
3235 3243 first. Integers with non-positive values aren't advertised.
3236 3244
3237 3245 The priority values are somewhat arbitrary and only used for default
3238 3246 ordering. The relative order can be changed via config options.
3239 3247
3240 3248 If wire protocol compression is supported, the class must also implement
3241 3249 ``compressstream`` and ``decompressorreader``.
3242 3250 """
3243 3251 return None
3244 3252
3245 3253 def revlogheader(self):
3246 3254 """Header added to revlog chunks that identifies this engine.
3247 3255
3248 3256 If this engine can be used to compress revlogs, this method should
3249 3257 return the bytes used to identify chunks compressed with this engine.
3250 3258 Else, the method should return ``None`` to indicate it does not
3251 3259 participate in revlog compression.
3252 3260 """
3253 3261 return None
3254 3262
3255 3263 def compressstream(self, it, opts=None):
3256 3264 """Compress an iterator of chunks.
3257 3265
3258 3266 The method receives an iterator (ideally a generator) of chunks of
3259 3267 bytes to be compressed. It returns an iterator (ideally a generator)
3260 3268 of bytes of chunks representing the compressed output.
3261 3269
3262 3270 Optionally accepts an argument defining how to perform compression.
3263 3271 Each engine treats this argument differently.
3264 3272 """
3265 3273 raise NotImplementedError()
3266 3274
3267 3275 def decompressorreader(self, fh):
3268 3276 """Perform decompression on a file object.
3269 3277
3270 3278 Argument is an object with a ``read(size)`` method that returns
3271 3279 compressed data. Return value is an object with a ``read(size)`` that
3272 3280 returns uncompressed data.
3273 3281 """
3274 3282 raise NotImplementedError()
3275 3283
3276 3284 def revlogcompressor(self, opts=None):
3277 3285 """Obtain an object that can be used to compress revlog entries.
3278 3286
3279 3287 The object has a ``compress(data)`` method that compresses binary
3280 3288 data. This method returns compressed binary data or ``None`` if
3281 3289 the data could not be compressed (too small, not compressible, etc).
3282 3290 The returned data should have a header uniquely identifying this
3283 3291 compression format so decompression can be routed to this engine.
3284 3292 This header should be identified by the ``revlogheader()`` return
3285 3293 value.
3286 3294
3287 3295 The object has a ``decompress(data)`` method that decompresses
3288 3296 data. The method will only be called if ``data`` begins with
3289 3297 ``revlogheader()``. The method should return the raw, uncompressed
3290 3298 data or raise a ``RevlogError``.
3291 3299
3292 3300 The object is reusable but is not thread safe.
3293 3301 """
3294 3302 raise NotImplementedError()
3295 3303
3296 3304 class _zlibengine(compressionengine):
3297 3305 def name(self):
3298 3306 return 'zlib'
3299 3307
3300 3308 def bundletype(self):
3301 3309 return 'gzip', 'GZ'
3302 3310
3303 3311 def wireprotosupport(self):
3304 3312 return compewireprotosupport('zlib', 20, 20)
3305 3313
3306 3314 def revlogheader(self):
3307 3315 return 'x'
3308 3316
3309 3317 def compressstream(self, it, opts=None):
3310 3318 opts = opts or {}
3311 3319
3312 3320 z = zlib.compressobj(opts.get('level', -1))
3313 3321 for chunk in it:
3314 3322 data = z.compress(chunk)
3315 3323 # Not all calls to compress emit data. It is cheaper to inspect
3316 3324 # here than to feed empty chunks through generator.
3317 3325 if data:
3318 3326 yield data
3319 3327
3320 3328 yield z.flush()
3321 3329
3322 3330 def decompressorreader(self, fh):
3323 3331 def gen():
3324 3332 d = zlib.decompressobj()
3325 3333 for chunk in filechunkiter(fh):
3326 3334 while chunk:
3327 3335 # Limit output size to limit memory.
3328 3336 yield d.decompress(chunk, 2 ** 18)
3329 3337 chunk = d.unconsumed_tail
3330 3338
3331 3339 return chunkbuffer(gen())
3332 3340
3333 3341 class zlibrevlogcompressor(object):
3334 3342 def compress(self, data):
3335 3343 insize = len(data)
3336 3344 # Caller handles empty input case.
3337 3345 assert insize > 0
3338 3346
3339 3347 if insize < 44:
3340 3348 return None
3341 3349
3342 3350 elif insize <= 1000000:
3343 3351 compressed = zlib.compress(data)
3344 3352 if len(compressed) < insize:
3345 3353 return compressed
3346 3354 return None
3347 3355
3348 3356 # zlib makes an internal copy of the input buffer, doubling
3349 3357 # memory usage for large inputs. So do streaming compression
3350 3358 # on large inputs.
3351 3359 else:
3352 3360 z = zlib.compressobj()
3353 3361 parts = []
3354 3362 pos = 0
3355 3363 while pos < insize:
3356 3364 pos2 = pos + 2**20
3357 3365 parts.append(z.compress(data[pos:pos2]))
3358 3366 pos = pos2
3359 3367 parts.append(z.flush())
3360 3368
3361 3369 if sum(map(len, parts)) < insize:
3362 3370 return ''.join(parts)
3363 3371 return None
3364 3372
3365 3373 def decompress(self, data):
3366 3374 try:
3367 3375 return zlib.decompress(data)
3368 3376 except zlib.error as e:
3369 3377 raise error.RevlogError(_('revlog decompress error: %s') %
3370 3378 str(e))
3371 3379
3372 3380 def revlogcompressor(self, opts=None):
3373 3381 return self.zlibrevlogcompressor()
3374 3382
3375 3383 compengines.register(_zlibengine())
3376 3384
3377 3385 class _bz2engine(compressionengine):
3378 3386 def name(self):
3379 3387 return 'bz2'
3380 3388
3381 3389 def bundletype(self):
3382 3390 return 'bzip2', 'BZ'
3383 3391
3384 3392 # We declare a protocol name but don't advertise by default because
3385 3393 # it is slow.
3386 3394 def wireprotosupport(self):
3387 3395 return compewireprotosupport('bzip2', 0, 0)
3388 3396
3389 3397 def compressstream(self, it, opts=None):
3390 3398 opts = opts or {}
3391 3399 z = bz2.BZ2Compressor(opts.get('level', 9))
3392 3400 for chunk in it:
3393 3401 data = z.compress(chunk)
3394 3402 if data:
3395 3403 yield data
3396 3404
3397 3405 yield z.flush()
3398 3406
3399 3407 def decompressorreader(self, fh):
3400 3408 def gen():
3401 3409 d = bz2.BZ2Decompressor()
3402 3410 for chunk in filechunkiter(fh):
3403 3411 yield d.decompress(chunk)
3404 3412
3405 3413 return chunkbuffer(gen())
3406 3414
3407 3415 compengines.register(_bz2engine())
3408 3416
3409 3417 class _truncatedbz2engine(compressionengine):
3410 3418 def name(self):
3411 3419 return 'bz2truncated'
3412 3420
3413 3421 def bundletype(self):
3414 3422 return None, '_truncatedBZ'
3415 3423
3416 3424 # We don't implement compressstream because it is hackily handled elsewhere.
3417 3425
3418 3426 def decompressorreader(self, fh):
3419 3427 def gen():
3420 3428 # The input stream doesn't have the 'BZ' header. So add it back.
3421 3429 d = bz2.BZ2Decompressor()
3422 3430 d.decompress('BZ')
3423 3431 for chunk in filechunkiter(fh):
3424 3432 yield d.decompress(chunk)
3425 3433
3426 3434 return chunkbuffer(gen())
3427 3435
3428 3436 compengines.register(_truncatedbz2engine())
3429 3437
3430 3438 class _noopengine(compressionengine):
3431 3439 def name(self):
3432 3440 return 'none'
3433 3441
3434 3442 def bundletype(self):
3435 3443 return 'none', 'UN'
3436 3444
3437 3445 # Clients always support uncompressed payloads. Servers don't because
3438 3446 # unless you are on a fast network, uncompressed payloads can easily
3439 3447 # saturate your network pipe.
3440 3448 def wireprotosupport(self):
3441 3449 return compewireprotosupport('none', 0, 10)
3442 3450
3443 3451 # We don't implement revlogheader because it is handled specially
3444 3452 # in the revlog class.
3445 3453
3446 3454 def compressstream(self, it, opts=None):
3447 3455 return it
3448 3456
3449 3457 def decompressorreader(self, fh):
3450 3458 return fh
3451 3459
3452 3460 class nooprevlogcompressor(object):
3453 3461 def compress(self, data):
3454 3462 return None
3455 3463
3456 3464 def revlogcompressor(self, opts=None):
3457 3465 return self.nooprevlogcompressor()
3458 3466
3459 3467 compengines.register(_noopengine())
3460 3468
3461 3469 class _zstdengine(compressionengine):
3462 3470 def name(self):
3463 3471 return 'zstd'
3464 3472
3465 3473 @propertycache
3466 3474 def _module(self):
3467 3475 # Not all installs have the zstd module available. So defer importing
3468 3476 # until first access.
3469 3477 try:
3470 3478 from . import zstd
3471 3479 # Force delayed import.
3472 3480 zstd.__version__
3473 3481 return zstd
3474 3482 except ImportError:
3475 3483 return None
3476 3484
3477 3485 def available(self):
3478 3486 return bool(self._module)
3479 3487
3480 3488 def bundletype(self):
3481 3489 return 'zstd', 'ZS'
3482 3490
3483 3491 def wireprotosupport(self):
3484 3492 return compewireprotosupport('zstd', 50, 50)
3485 3493
3486 3494 def revlogheader(self):
3487 3495 return '\x28'
3488 3496
3489 3497 def compressstream(self, it, opts=None):
3490 3498 opts = opts or {}
3491 3499 # zstd level 3 is almost always significantly faster than zlib
3492 3500 # while providing no worse compression. It strikes a good balance
3493 3501 # between speed and compression.
3494 3502 level = opts.get('level', 3)
3495 3503
3496 3504 zstd = self._module
3497 3505 z = zstd.ZstdCompressor(level=level).compressobj()
3498 3506 for chunk in it:
3499 3507 data = z.compress(chunk)
3500 3508 if data:
3501 3509 yield data
3502 3510
3503 3511 yield z.flush()
3504 3512
3505 3513 def decompressorreader(self, fh):
3506 3514 zstd = self._module
3507 3515 dctx = zstd.ZstdDecompressor()
3508 3516 return chunkbuffer(dctx.read_from(fh))
3509 3517
3510 3518 class zstdrevlogcompressor(object):
3511 3519 def __init__(self, zstd, level=3):
3512 3520 # Writing the content size adds a few bytes to the output. However,
3513 3521 # it allows decompression to be more optimal since we can
3514 3522 # pre-allocate a buffer to hold the result.
3515 3523 self._cctx = zstd.ZstdCompressor(level=level,
3516 3524 write_content_size=True)
3517 3525 self._dctx = zstd.ZstdDecompressor()
3518 3526 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3519 3527 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3520 3528
3521 3529 def compress(self, data):
3522 3530 insize = len(data)
3523 3531 # Caller handles empty input case.
3524 3532 assert insize > 0
3525 3533
3526 3534 if insize < 50:
3527 3535 return None
3528 3536
3529 3537 elif insize <= 1000000:
3530 3538 compressed = self._cctx.compress(data)
3531 3539 if len(compressed) < insize:
3532 3540 return compressed
3533 3541 return None
3534 3542 else:
3535 3543 z = self._cctx.compressobj()
3536 3544 chunks = []
3537 3545 pos = 0
3538 3546 while pos < insize:
3539 3547 pos2 = pos + self._compinsize
3540 3548 chunk = z.compress(data[pos:pos2])
3541 3549 if chunk:
3542 3550 chunks.append(chunk)
3543 3551 pos = pos2
3544 3552 chunks.append(z.flush())
3545 3553
3546 3554 if sum(map(len, chunks)) < insize:
3547 3555 return ''.join(chunks)
3548 3556 return None
3549 3557
3550 3558 def decompress(self, data):
3551 3559 insize = len(data)
3552 3560
3553 3561 try:
3554 3562 # This was measured to be faster than other streaming
3555 3563 # decompressors.
3556 3564 dobj = self._dctx.decompressobj()
3557 3565 chunks = []
3558 3566 pos = 0
3559 3567 while pos < insize:
3560 3568 pos2 = pos + self._decompinsize
3561 3569 chunk = dobj.decompress(data[pos:pos2])
3562 3570 if chunk:
3563 3571 chunks.append(chunk)
3564 3572 pos = pos2
3565 3573 # Frame should be exhausted, so no finish() API.
3566 3574
3567 3575 return ''.join(chunks)
3568 3576 except Exception as e:
3569 3577 raise error.RevlogError(_('revlog decompress error: %s') %
3570 3578 str(e))
3571 3579
3572 3580 def revlogcompressor(self, opts=None):
3573 3581 opts = opts or {}
3574 3582 return self.zstdrevlogcompressor(self._module,
3575 3583 level=opts.get('level', 3))
3576 3584
3577 3585 compengines.register(_zstdengine())
3578 3586
3579 3587 # convenient shortcut
3580 3588 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now