##// END OF EJS Templates
util: use tryunlink in unlinkpath...
Ryan McElroy -
r31541:bd9daafb default
parent child Browse files
Show More
@@ -1,3588 +1,3587 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlparse = pycompat.urlparse
64 64 urlreq = pycompat.urlreq
65 65 xmlrpclib = pycompat.xmlrpclib
66 66
67 67 def isatty(fp):
68 68 try:
69 69 return fp.isatty()
70 70 except AttributeError:
71 71 return False
72 72
73 73 # glibc determines buffering on first write to stdout - if we replace a TTY
74 74 # destined stdout with a pipe destined stdout (e.g. pager), we want line
75 75 # buffering
76 76 if isatty(stdout):
77 77 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
78 78
79 79 if pycompat.osname == 'nt':
80 80 from . import windows as platform
81 81 stdout = platform.winstdout(stdout)
82 82 else:
83 83 from . import posix as platform
84 84
85 85 _ = i18n._
86 86
87 87 bindunixsocket = platform.bindunixsocket
88 88 cachestat = platform.cachestat
89 89 checkexec = platform.checkexec
90 90 checklink = platform.checklink
91 91 copymode = platform.copymode
92 92 executablepath = platform.executablepath
93 93 expandglobs = platform.expandglobs
94 94 explainexit = platform.explainexit
95 95 findexe = platform.findexe
96 96 gethgcmd = platform.gethgcmd
97 97 getuser = platform.getuser
98 98 getpid = os.getpid
99 99 groupmembers = platform.groupmembers
100 100 groupname = platform.groupname
101 101 hidewindow = platform.hidewindow
102 102 isexec = platform.isexec
103 103 isowner = platform.isowner
104 104 localpath = platform.localpath
105 105 lookupreg = platform.lookupreg
106 106 makedir = platform.makedir
107 107 nlinks = platform.nlinks
108 108 normpath = platform.normpath
109 109 normcase = platform.normcase
110 110 normcasespec = platform.normcasespec
111 111 normcasefallback = platform.normcasefallback
112 112 openhardlinks = platform.openhardlinks
113 113 oslink = platform.oslink
114 114 parsepatchoutput = platform.parsepatchoutput
115 115 pconvert = platform.pconvert
116 116 poll = platform.poll
117 117 popen = platform.popen
118 118 posixfile = platform.posixfile
119 119 quotecommand = platform.quotecommand
120 120 readpipe = platform.readpipe
121 121 rename = platform.rename
122 122 removedirs = platform.removedirs
123 123 samedevice = platform.samedevice
124 124 samefile = platform.samefile
125 125 samestat = platform.samestat
126 126 setbinary = platform.setbinary
127 127 setflags = platform.setflags
128 128 setsignalhandler = platform.setsignalhandler
129 129 shellquote = platform.shellquote
130 130 spawndetached = platform.spawndetached
131 131 split = platform.split
132 132 sshargs = platform.sshargs
133 133 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
134 134 statisexec = platform.statisexec
135 135 statislink = platform.statislink
136 136 testpid = platform.testpid
137 137 umask = platform.umask
138 138 unlink = platform.unlink
139 139 username = platform.username
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 # disable Python's problematic floating point timestamps (issue4836)
146 146 # (Python hypocritically says you shouldn't change this behavior in
147 147 # libraries, and sure enough Mercurial is not a library.)
148 148 os.stat_float_times(False)
149 149
150 150 def safehasattr(thing, attr):
151 151 return getattr(thing, attr, _notset) is not _notset
152 152
153 153 def bitsfrom(container):
154 154 bits = 0
155 155 for bit in container:
156 156 bits |= bit
157 157 return bits
158 158
159 159 DIGESTS = {
160 160 'md5': hashlib.md5,
161 161 'sha1': hashlib.sha1,
162 162 'sha512': hashlib.sha512,
163 163 }
164 164 # List of digest types from strongest to weakest
165 165 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
166 166
167 167 for k in DIGESTS_BY_STRENGTH:
168 168 assert k in DIGESTS
169 169
170 170 class digester(object):
171 171 """helper to compute digests.
172 172
173 173 This helper can be used to compute one or more digests given their name.
174 174
175 175 >>> d = digester(['md5', 'sha1'])
176 176 >>> d.update('foo')
177 177 >>> [k for k in sorted(d)]
178 178 ['md5', 'sha1']
179 179 >>> d['md5']
180 180 'acbd18db4cc2f85cedef654fccc4a4d8'
181 181 >>> d['sha1']
182 182 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
183 183 >>> digester.preferred(['md5', 'sha1'])
184 184 'sha1'
185 185 """
186 186
187 187 def __init__(self, digests, s=''):
188 188 self._hashes = {}
189 189 for k in digests:
190 190 if k not in DIGESTS:
191 191 raise Abort(_('unknown digest type: %s') % k)
192 192 self._hashes[k] = DIGESTS[k]()
193 193 if s:
194 194 self.update(s)
195 195
196 196 def update(self, data):
197 197 for h in self._hashes.values():
198 198 h.update(data)
199 199
200 200 def __getitem__(self, key):
201 201 if key not in DIGESTS:
202 202 raise Abort(_('unknown digest type: %s') % k)
203 203 return self._hashes[key].hexdigest()
204 204
205 205 def __iter__(self):
206 206 return iter(self._hashes)
207 207
208 208 @staticmethod
209 209 def preferred(supported):
210 210 """returns the strongest digest type in both supported and DIGESTS."""
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 if k in supported:
214 214 return k
215 215 return None
216 216
217 217 class digestchecker(object):
218 218 """file handle wrapper that additionally checks content against a given
219 219 size and digests.
220 220
221 221 d = digestchecker(fh, size, {'md5': '...'})
222 222
223 223 When multiple digests are given, all of them are validated.
224 224 """
225 225
226 226 def __init__(self, fh, size, digests):
227 227 self._fh = fh
228 228 self._size = size
229 229 self._got = 0
230 230 self._digests = dict(digests)
231 231 self._digester = digester(self._digests.keys())
232 232
233 233 def read(self, length=-1):
234 234 content = self._fh.read(length)
235 235 self._digester.update(content)
236 236 self._got += len(content)
237 237 return content
238 238
239 239 def validate(self):
240 240 if self._size != self._got:
241 241 raise Abort(_('size mismatch: expected %d, got %d') %
242 242 (self._size, self._got))
243 243 for k, v in self._digests.items():
244 244 if v != self._digester[k]:
245 245 # i18n: first parameter is a digest name
246 246 raise Abort(_('%s mismatch: expected %s, got %s') %
247 247 (k, v, self._digester[k]))
248 248
249 249 try:
250 250 buffer = buffer
251 251 except NameError:
252 252 if not pycompat.ispy3:
253 253 def buffer(sliceable, offset=0, length=None):
254 254 if length is not None:
255 255 return sliceable[offset:offset + length]
256 256 return sliceable[offset:]
257 257 else:
258 258 def buffer(sliceable, offset=0, length=None):
259 259 if length is not None:
260 260 return memoryview(sliceable)[offset:offset + length]
261 261 return memoryview(sliceable)[offset:]
262 262
263 263 closefds = pycompat.osname == 'posix'
264 264
265 265 _chunksize = 4096
266 266
267 267 class bufferedinputpipe(object):
268 268 """a manually buffered input pipe
269 269
270 270 Python will not let us use buffered IO and lazy reading with 'polling' at
271 271 the same time. We cannot probe the buffer state and select will not detect
272 272 that data are ready to read if they are already buffered.
273 273
274 274 This class let us work around that by implementing its own buffering
275 275 (allowing efficient readline) while offering a way to know if the buffer is
276 276 empty from the output (allowing collaboration of the buffer with polling).
277 277
278 278 This class lives in the 'util' module because it makes use of the 'os'
279 279 module from the python stdlib.
280 280 """
281 281
282 282 def __init__(self, input):
283 283 self._input = input
284 284 self._buffer = []
285 285 self._eof = False
286 286 self._lenbuf = 0
287 287
288 288 @property
289 289 def hasbuffer(self):
290 290 """True is any data is currently buffered
291 291
292 292 This will be used externally a pre-step for polling IO. If there is
293 293 already data then no polling should be set in place."""
294 294 return bool(self._buffer)
295 295
296 296 @property
297 297 def closed(self):
298 298 return self._input.closed
299 299
300 300 def fileno(self):
301 301 return self._input.fileno()
302 302
303 303 def close(self):
304 304 return self._input.close()
305 305
306 306 def read(self, size):
307 307 while (not self._eof) and (self._lenbuf < size):
308 308 self._fillbuffer()
309 309 return self._frombuffer(size)
310 310
311 311 def readline(self, *args, **kwargs):
312 312 if 1 < len(self._buffer):
313 313 # this should not happen because both read and readline end with a
314 314 # _frombuffer call that collapse it.
315 315 self._buffer = [''.join(self._buffer)]
316 316 self._lenbuf = len(self._buffer[0])
317 317 lfi = -1
318 318 if self._buffer:
319 319 lfi = self._buffer[-1].find('\n')
320 320 while (not self._eof) and lfi < 0:
321 321 self._fillbuffer()
322 322 if self._buffer:
323 323 lfi = self._buffer[-1].find('\n')
324 324 size = lfi + 1
325 325 if lfi < 0: # end of file
326 326 size = self._lenbuf
327 327 elif 1 < len(self._buffer):
328 328 # we need to take previous chunks into account
329 329 size += self._lenbuf - len(self._buffer[-1])
330 330 return self._frombuffer(size)
331 331
332 332 def _frombuffer(self, size):
333 333 """return at most 'size' data from the buffer
334 334
335 335 The data are removed from the buffer."""
336 336 if size == 0 or not self._buffer:
337 337 return ''
338 338 buf = self._buffer[0]
339 339 if 1 < len(self._buffer):
340 340 buf = ''.join(self._buffer)
341 341
342 342 data = buf[:size]
343 343 buf = buf[len(data):]
344 344 if buf:
345 345 self._buffer = [buf]
346 346 self._lenbuf = len(buf)
347 347 else:
348 348 self._buffer = []
349 349 self._lenbuf = 0
350 350 return data
351 351
352 352 def _fillbuffer(self):
353 353 """read data to the buffer"""
354 354 data = os.read(self._input.fileno(), _chunksize)
355 355 if not data:
356 356 self._eof = True
357 357 else:
358 358 self._lenbuf += len(data)
359 359 self._buffer.append(data)
360 360
361 361 def popen2(cmd, env=None, newlines=False):
362 362 # Setting bufsize to -1 lets the system decide the buffer size.
363 363 # The default for bufsize is 0, meaning unbuffered. This leads to
364 364 # poor performance on Mac OS X: http://bugs.python.org/issue4194
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 universal_newlines=newlines,
369 369 env=env)
370 370 return p.stdin, p.stdout
371 371
372 372 def popen3(cmd, env=None, newlines=False):
373 373 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
374 374 return stdin, stdout, stderr
375 375
376 376 def popen4(cmd, env=None, newlines=False, bufsize=-1):
377 377 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
378 378 close_fds=closefds,
379 379 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
380 380 stderr=subprocess.PIPE,
381 381 universal_newlines=newlines,
382 382 env=env)
383 383 return p.stdin, p.stdout, p.stderr, p
384 384
385 385 def version():
386 386 """Return version information if available."""
387 387 try:
388 388 from . import __version__
389 389 return __version__.version
390 390 except ImportError:
391 391 return 'unknown'
392 392
393 393 def versiontuple(v=None, n=4):
394 394 """Parses a Mercurial version string into an N-tuple.
395 395
396 396 The version string to be parsed is specified with the ``v`` argument.
397 397 If it isn't defined, the current Mercurial version string will be parsed.
398 398
399 399 ``n`` can be 2, 3, or 4. Here is how some version strings map to
400 400 returned values:
401 401
402 402 >>> v = '3.6.1+190-df9b73d2d444'
403 403 >>> versiontuple(v, 2)
404 404 (3, 6)
405 405 >>> versiontuple(v, 3)
406 406 (3, 6, 1)
407 407 >>> versiontuple(v, 4)
408 408 (3, 6, 1, '190-df9b73d2d444')
409 409
410 410 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
411 411 (3, 6, 1, '190-df9b73d2d444+20151118')
412 412
413 413 >>> v = '3.6'
414 414 >>> versiontuple(v, 2)
415 415 (3, 6)
416 416 >>> versiontuple(v, 3)
417 417 (3, 6, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 6, None, None)
420 420
421 421 >>> v = '3.9-rc'
422 422 >>> versiontuple(v, 2)
423 423 (3, 9)
424 424 >>> versiontuple(v, 3)
425 425 (3, 9, None)
426 426 >>> versiontuple(v, 4)
427 427 (3, 9, None, 'rc')
428 428
429 429 >>> v = '3.9-rc+2-02a8fea4289b'
430 430 >>> versiontuple(v, 2)
431 431 (3, 9)
432 432 >>> versiontuple(v, 3)
433 433 (3, 9, None)
434 434 >>> versiontuple(v, 4)
435 435 (3, 9, None, 'rc+2-02a8fea4289b')
436 436 """
437 437 if not v:
438 438 v = version()
439 439 parts = remod.split('[\+-]', v, 1)
440 440 if len(parts) == 1:
441 441 vparts, extra = parts[0], None
442 442 else:
443 443 vparts, extra = parts
444 444
445 445 vints = []
446 446 for i in vparts.split('.'):
447 447 try:
448 448 vints.append(int(i))
449 449 except ValueError:
450 450 break
451 451 # (3, 6) -> (3, 6, None)
452 452 while len(vints) < 3:
453 453 vints.append(None)
454 454
455 455 if n == 2:
456 456 return (vints[0], vints[1])
457 457 if n == 3:
458 458 return (vints[0], vints[1], vints[2])
459 459 if n == 4:
460 460 return (vints[0], vints[1], vints[2], extra)
461 461
462 462 # used by parsedate
463 463 defaultdateformats = (
464 464 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
465 465 '%Y-%m-%dT%H:%M', # without seconds
466 466 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
467 467 '%Y-%m-%dT%H%M', # without seconds
468 468 '%Y-%m-%d %H:%M:%S', # our common legal variant
469 469 '%Y-%m-%d %H:%M', # without seconds
470 470 '%Y-%m-%d %H%M%S', # without :
471 471 '%Y-%m-%d %H%M', # without seconds
472 472 '%Y-%m-%d %I:%M:%S%p',
473 473 '%Y-%m-%d %H:%M',
474 474 '%Y-%m-%d %I:%M%p',
475 475 '%Y-%m-%d',
476 476 '%m-%d',
477 477 '%m/%d',
478 478 '%m/%d/%y',
479 479 '%m/%d/%Y',
480 480 '%a %b %d %H:%M:%S %Y',
481 481 '%a %b %d %I:%M:%S%p %Y',
482 482 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
483 483 '%b %d %H:%M:%S %Y',
484 484 '%b %d %I:%M:%S%p %Y',
485 485 '%b %d %H:%M:%S',
486 486 '%b %d %I:%M:%S%p',
487 487 '%b %d %H:%M',
488 488 '%b %d %I:%M%p',
489 489 '%b %d %Y',
490 490 '%b %d',
491 491 '%H:%M:%S',
492 492 '%I:%M:%S%p',
493 493 '%H:%M',
494 494 '%I:%M%p',
495 495 )
496 496
497 497 extendeddateformats = defaultdateformats + (
498 498 "%Y",
499 499 "%Y-%m",
500 500 "%b",
501 501 "%b %Y",
502 502 )
503 503
504 504 def cachefunc(func):
505 505 '''cache the result of function calls'''
506 506 # XXX doesn't handle keywords args
507 507 if func.__code__.co_argcount == 0:
508 508 cache = []
509 509 def f():
510 510 if len(cache) == 0:
511 511 cache.append(func())
512 512 return cache[0]
513 513 return f
514 514 cache = {}
515 515 if func.__code__.co_argcount == 1:
516 516 # we gain a small amount of time because
517 517 # we don't need to pack/unpack the list
518 518 def f(arg):
519 519 if arg not in cache:
520 520 cache[arg] = func(arg)
521 521 return cache[arg]
522 522 else:
523 523 def f(*args):
524 524 if args not in cache:
525 525 cache[args] = func(*args)
526 526 return cache[args]
527 527
528 528 return f
529 529
530 530 class sortdict(dict):
531 531 '''a simple sorted dictionary'''
532 532 def __init__(self, data=None):
533 533 self._list = []
534 534 if data:
535 535 self.update(data)
536 536 def copy(self):
537 537 return sortdict(self)
538 538 def __setitem__(self, key, val):
539 539 if key in self:
540 540 self._list.remove(key)
541 541 self._list.append(key)
542 542 dict.__setitem__(self, key, val)
543 543 def __iter__(self):
544 544 return self._list.__iter__()
545 545 def update(self, src):
546 546 if isinstance(src, dict):
547 547 src = src.iteritems()
548 548 for k, v in src:
549 549 self[k] = v
550 550 def clear(self):
551 551 dict.clear(self)
552 552 self._list = []
553 553 def items(self):
554 554 return [(k, self[k]) for k in self._list]
555 555 def __delitem__(self, key):
556 556 dict.__delitem__(self, key)
557 557 self._list.remove(key)
558 558 def pop(self, key, *args, **kwargs):
559 559 dict.pop(self, key, *args, **kwargs)
560 560 try:
561 561 self._list.remove(key)
562 562 except ValueError:
563 563 pass
564 564 def keys(self):
565 565 return self._list[:]
566 566 def iterkeys(self):
567 567 return self._list.__iter__()
568 568 def iteritems(self):
569 569 for k in self._list:
570 570 yield k, self[k]
571 571 def insert(self, index, key, val):
572 572 self._list.insert(index, key)
573 573 dict.__setitem__(self, key, val)
574 574 def __repr__(self):
575 575 if not self:
576 576 return '%s()' % self.__class__.__name__
577 577 return '%s(%r)' % (self.__class__.__name__, self.items())
578 578
579 579 class _lrucachenode(object):
580 580 """A node in a doubly linked list.
581 581
582 582 Holds a reference to nodes on either side as well as a key-value
583 583 pair for the dictionary entry.
584 584 """
585 585 __slots__ = (u'next', u'prev', u'key', u'value')
586 586
587 587 def __init__(self):
588 588 self.next = None
589 589 self.prev = None
590 590
591 591 self.key = _notset
592 592 self.value = None
593 593
594 594 def markempty(self):
595 595 """Mark the node as emptied."""
596 596 self.key = _notset
597 597
598 598 class lrucachedict(object):
599 599 """Dict that caches most recent accesses and sets.
600 600
601 601 The dict consists of an actual backing dict - indexed by original
602 602 key - and a doubly linked circular list defining the order of entries in
603 603 the cache.
604 604
605 605 The head node is the newest entry in the cache. If the cache is full,
606 606 we recycle head.prev and make it the new head. Cache accesses result in
607 607 the node being moved to before the existing head and being marked as the
608 608 new head node.
609 609 """
610 610 def __init__(self, max):
611 611 self._cache = {}
612 612
613 613 self._head = head = _lrucachenode()
614 614 head.prev = head
615 615 head.next = head
616 616 self._size = 1
617 617 self._capacity = max
618 618
619 619 def __len__(self):
620 620 return len(self._cache)
621 621
622 622 def __contains__(self, k):
623 623 return k in self._cache
624 624
625 625 def __iter__(self):
626 626 # We don't have to iterate in cache order, but why not.
627 627 n = self._head
628 628 for i in range(len(self._cache)):
629 629 yield n.key
630 630 n = n.next
631 631
632 632 def __getitem__(self, k):
633 633 node = self._cache[k]
634 634 self._movetohead(node)
635 635 return node.value
636 636
637 637 def __setitem__(self, k, v):
638 638 node = self._cache.get(k)
639 639 # Replace existing value and mark as newest.
640 640 if node is not None:
641 641 node.value = v
642 642 self._movetohead(node)
643 643 return
644 644
645 645 if self._size < self._capacity:
646 646 node = self._addcapacity()
647 647 else:
648 648 # Grab the last/oldest item.
649 649 node = self._head.prev
650 650
651 651 # At capacity. Kill the old entry.
652 652 if node.key is not _notset:
653 653 del self._cache[node.key]
654 654
655 655 node.key = k
656 656 node.value = v
657 657 self._cache[k] = node
658 658 # And mark it as newest entry. No need to adjust order since it
659 659 # is already self._head.prev.
660 660 self._head = node
661 661
662 662 def __delitem__(self, k):
663 663 node = self._cache.pop(k)
664 664 node.markempty()
665 665
666 666 # Temporarily mark as newest item before re-adjusting head to make
667 667 # this node the oldest item.
668 668 self._movetohead(node)
669 669 self._head = node.next
670 670
671 671 # Additional dict methods.
672 672
673 673 def get(self, k, default=None):
674 674 try:
675 675 return self._cache[k].value
676 676 except KeyError:
677 677 return default
678 678
679 679 def clear(self):
680 680 n = self._head
681 681 while n.key is not _notset:
682 682 n.markempty()
683 683 n = n.next
684 684
685 685 self._cache.clear()
686 686
687 687 def copy(self):
688 688 result = lrucachedict(self._capacity)
689 689 n = self._head.prev
690 690 # Iterate in oldest-to-newest order, so the copy has the right ordering
691 691 for i in range(len(self._cache)):
692 692 result[n.key] = n.value
693 693 n = n.prev
694 694 return result
695 695
696 696 def _movetohead(self, node):
697 697 """Mark a node as the newest, making it the new head.
698 698
699 699 When a node is accessed, it becomes the freshest entry in the LRU
700 700 list, which is denoted by self._head.
701 701
702 702 Visually, let's make ``N`` the new head node (* denotes head):
703 703
704 704 previous/oldest <-> head <-> next/next newest
705 705
706 706 ----<->--- A* ---<->-----
707 707 | |
708 708 E <-> D <-> N <-> C <-> B
709 709
710 710 To:
711 711
712 712 ----<->--- N* ---<->-----
713 713 | |
714 714 E <-> D <-> C <-> B <-> A
715 715
716 716 This requires the following moves:
717 717
718 718 C.next = D (node.prev.next = node.next)
719 719 D.prev = C (node.next.prev = node.prev)
720 720 E.next = N (head.prev.next = node)
721 721 N.prev = E (node.prev = head.prev)
722 722 N.next = A (node.next = head)
723 723 A.prev = N (head.prev = node)
724 724 """
725 725 head = self._head
726 726 # C.next = D
727 727 node.prev.next = node.next
728 728 # D.prev = C
729 729 node.next.prev = node.prev
730 730 # N.prev = E
731 731 node.prev = head.prev
732 732 # N.next = A
733 733 # It is tempting to do just "head" here, however if node is
734 734 # adjacent to head, this will do bad things.
735 735 node.next = head.prev.next
736 736 # E.next = N
737 737 node.next.prev = node
738 738 # A.prev = N
739 739 node.prev.next = node
740 740
741 741 self._head = node
742 742
743 743 def _addcapacity(self):
744 744 """Add a node to the circular linked list.
745 745
746 746 The new node is inserted before the head node.
747 747 """
748 748 head = self._head
749 749 node = _lrucachenode()
750 750 head.prev.next = node
751 751 node.prev = head.prev
752 752 node.next = head
753 753 head.prev = node
754 754 self._size += 1
755 755 return node
756 756
757 757 def lrucachefunc(func):
758 758 '''cache most recent results of function calls'''
759 759 cache = {}
760 760 order = collections.deque()
761 761 if func.__code__.co_argcount == 1:
762 762 def f(arg):
763 763 if arg not in cache:
764 764 if len(cache) > 20:
765 765 del cache[order.popleft()]
766 766 cache[arg] = func(arg)
767 767 else:
768 768 order.remove(arg)
769 769 order.append(arg)
770 770 return cache[arg]
771 771 else:
772 772 def f(*args):
773 773 if args not in cache:
774 774 if len(cache) > 20:
775 775 del cache[order.popleft()]
776 776 cache[args] = func(*args)
777 777 else:
778 778 order.remove(args)
779 779 order.append(args)
780 780 return cache[args]
781 781
782 782 return f
783 783
784 784 class propertycache(object):
785 785 def __init__(self, func):
786 786 self.func = func
787 787 self.name = func.__name__
788 788 def __get__(self, obj, type=None):
789 789 result = self.func(obj)
790 790 self.cachevalue(obj, result)
791 791 return result
792 792
793 793 def cachevalue(self, obj, value):
794 794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
795 795 obj.__dict__[self.name] = value
796 796
797 797 def pipefilter(s, cmd):
798 798 '''filter string S through command CMD, returning its output'''
799 799 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
800 800 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
801 801 pout, perr = p.communicate(s)
802 802 return pout
803 803
804 804 def tempfilter(s, cmd):
805 805 '''filter string S through a pair of temporary files with CMD.
806 806 CMD is used as a template to create the real command to be run,
807 807 with the strings INFILE and OUTFILE replaced by the real names of
808 808 the temporary files generated.'''
809 809 inname, outname = None, None
810 810 try:
811 811 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
812 812 fp = os.fdopen(infd, pycompat.sysstr('wb'))
813 813 fp.write(s)
814 814 fp.close()
815 815 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
816 816 os.close(outfd)
817 817 cmd = cmd.replace('INFILE', inname)
818 818 cmd = cmd.replace('OUTFILE', outname)
819 819 code = os.system(cmd)
820 820 if pycompat.sysplatform == 'OpenVMS' and code & 1:
821 821 code = 0
822 822 if code:
823 823 raise Abort(_("command '%s' failed: %s") %
824 824 (cmd, explainexit(code)))
825 825 return readfile(outname)
826 826 finally:
827 827 try:
828 828 if inname:
829 829 os.unlink(inname)
830 830 except OSError:
831 831 pass
832 832 try:
833 833 if outname:
834 834 os.unlink(outname)
835 835 except OSError:
836 836 pass
837 837
838 838 filtertable = {
839 839 'tempfile:': tempfilter,
840 840 'pipe:': pipefilter,
841 841 }
842 842
843 843 def filter(s, cmd):
844 844 "filter a string through a command that transforms its input to its output"
845 845 for name, fn in filtertable.iteritems():
846 846 if cmd.startswith(name):
847 847 return fn(s, cmd[len(name):].lstrip())
848 848 return pipefilter(s, cmd)
849 849
850 850 def binary(s):
851 851 """return true if a string is binary data"""
852 852 return bool(s and '\0' in s)
853 853
854 854 def increasingchunks(source, min=1024, max=65536):
855 855 '''return no less than min bytes per chunk while data remains,
856 856 doubling min after each chunk until it reaches max'''
857 857 def log2(x):
858 858 if not x:
859 859 return 0
860 860 i = 0
861 861 while x:
862 862 x >>= 1
863 863 i += 1
864 864 return i - 1
865 865
866 866 buf = []
867 867 blen = 0
868 868 for chunk in source:
869 869 buf.append(chunk)
870 870 blen += len(chunk)
871 871 if blen >= min:
872 872 if min < max:
873 873 min = min << 1
874 874 nmin = 1 << log2(blen)
875 875 if nmin > min:
876 876 min = nmin
877 877 if min > max:
878 878 min = max
879 879 yield ''.join(buf)
880 880 blen = 0
881 881 buf = []
882 882 if buf:
883 883 yield ''.join(buf)
884 884
885 885 Abort = error.Abort
886 886
887 887 def always(fn):
888 888 return True
889 889
890 890 def never(fn):
891 891 return False
892 892
893 893 def nogc(func):
894 894 """disable garbage collector
895 895
896 896 Python's garbage collector triggers a GC each time a certain number of
897 897 container objects (the number being defined by gc.get_threshold()) are
898 898 allocated even when marked not to be tracked by the collector. Tracking has
899 899 no effect on when GCs are triggered, only on what objects the GC looks
900 900 into. As a workaround, disable GC while building complex (huge)
901 901 containers.
902 902
903 903 This garbage collector issue have been fixed in 2.7.
904 904 """
905 905 if sys.version_info >= (2, 7):
906 906 return func
907 907 def wrapper(*args, **kwargs):
908 908 gcenabled = gc.isenabled()
909 909 gc.disable()
910 910 try:
911 911 return func(*args, **kwargs)
912 912 finally:
913 913 if gcenabled:
914 914 gc.enable()
915 915 return wrapper
916 916
917 917 def pathto(root, n1, n2):
918 918 '''return the relative path from one place to another.
919 919 root should use os.sep to separate directories
920 920 n1 should use os.sep to separate directories
921 921 n2 should use "/" to separate directories
922 922 returns an os.sep-separated path.
923 923
924 924 If n1 is a relative path, it's assumed it's
925 925 relative to root.
926 926 n2 should always be relative to root.
927 927 '''
928 928 if not n1:
929 929 return localpath(n2)
930 930 if os.path.isabs(n1):
931 931 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
932 932 return os.path.join(root, localpath(n2))
933 933 n2 = '/'.join((pconvert(root), n2))
934 934 a, b = splitpath(n1), n2.split('/')
935 935 a.reverse()
936 936 b.reverse()
937 937 while a and b and a[-1] == b[-1]:
938 938 a.pop()
939 939 b.pop()
940 940 b.reverse()
941 941 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
942 942
943 943 def mainfrozen():
944 944 """return True if we are a frozen executable.
945 945
946 946 The code supports py2exe (most common, Windows only) and tools/freeze
947 947 (portable, not much used).
948 948 """
949 949 return (safehasattr(sys, "frozen") or # new py2exe
950 950 safehasattr(sys, "importers") or # old py2exe
951 951 imp.is_frozen(u"__main__")) # tools/freeze
952 952
953 953 # the location of data files matching the source code
954 954 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
955 955 # executable version (py2exe) doesn't support __file__
956 956 datapath = os.path.dirname(pycompat.sysexecutable)
957 957 else:
958 958 datapath = os.path.dirname(pycompat.fsencode(__file__))
959 959
960 960 i18n.setdatapath(datapath)
961 961
962 962 _hgexecutable = None
963 963
964 964 def hgexecutable():
965 965 """return location of the 'hg' executable.
966 966
967 967 Defaults to $HG or 'hg' in the search path.
968 968 """
969 969 if _hgexecutable is None:
970 970 hg = encoding.environ.get('HG')
971 971 mainmod = sys.modules[pycompat.sysstr('__main__')]
972 972 if hg:
973 973 _sethgexecutable(hg)
974 974 elif mainfrozen():
975 975 if getattr(sys, 'frozen', None) == 'macosx_app':
976 976 # Env variable set by py2app
977 977 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
978 978 else:
979 979 _sethgexecutable(pycompat.sysexecutable)
980 980 elif (os.path.basename(
981 981 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
982 982 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
983 983 else:
984 984 exe = findexe('hg') or os.path.basename(sys.argv[0])
985 985 _sethgexecutable(exe)
986 986 return _hgexecutable
987 987
988 988 def _sethgexecutable(path):
989 989 """set location of the 'hg' executable"""
990 990 global _hgexecutable
991 991 _hgexecutable = path
992 992
993 993 def _isstdout(f):
994 994 fileno = getattr(f, 'fileno', None)
995 995 return fileno and fileno() == sys.__stdout__.fileno()
996 996
997 997 def shellenviron(environ=None):
998 998 """return environ with optional override, useful for shelling out"""
999 999 def py2shell(val):
1000 1000 'convert python object into string that is useful to shell'
1001 1001 if val is None or val is False:
1002 1002 return '0'
1003 1003 if val is True:
1004 1004 return '1'
1005 1005 return str(val)
1006 1006 env = dict(encoding.environ)
1007 1007 if environ:
1008 1008 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1009 1009 env['HG'] = hgexecutable()
1010 1010 return env
1011 1011
1012 1012 def system(cmd, environ=None, cwd=None, out=None):
1013 1013 '''enhanced shell command execution.
1014 1014 run with environment maybe modified, maybe in different dir.
1015 1015
1016 1016 if out is specified, it is assumed to be a file-like object that has a
1017 1017 write() method. stdout and stderr will be redirected to out.'''
1018 1018 try:
1019 1019 stdout.flush()
1020 1020 except Exception:
1021 1021 pass
1022 1022 cmd = quotecommand(cmd)
1023 1023 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1024 1024 and sys.version_info[1] < 7):
1025 1025 # subprocess kludge to work around issues in half-baked Python
1026 1026 # ports, notably bichued/python:
1027 1027 if not cwd is None:
1028 1028 os.chdir(cwd)
1029 1029 rc = os.system(cmd)
1030 1030 else:
1031 1031 env = shellenviron(environ)
1032 1032 if out is None or _isstdout(out):
1033 1033 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1034 1034 env=env, cwd=cwd)
1035 1035 else:
1036 1036 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1037 1037 env=env, cwd=cwd, stdout=subprocess.PIPE,
1038 1038 stderr=subprocess.STDOUT)
1039 1039 for line in iter(proc.stdout.readline, ''):
1040 1040 out.write(line)
1041 1041 proc.wait()
1042 1042 rc = proc.returncode
1043 1043 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1044 1044 rc = 0
1045 1045 return rc
1046 1046
1047 1047 def checksignature(func):
1048 1048 '''wrap a function with code to check for calling errors'''
1049 1049 def check(*args, **kwargs):
1050 1050 try:
1051 1051 return func(*args, **kwargs)
1052 1052 except TypeError:
1053 1053 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1054 1054 raise error.SignatureError
1055 1055 raise
1056 1056
1057 1057 return check
1058 1058
1059 1059 # Hardlinks are problematic on CIFS, do not allow hardlinks
1060 1060 # until we find a way to work around it cleanly (issue4546).
1061 1061 # This is a variable so extensions can opt-in to using them.
1062 1062 allowhardlinks = False
1063 1063
1064 1064 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1065 1065 '''copy a file, preserving mode and optionally other stat info like
1066 1066 atime/mtime
1067 1067
1068 1068 checkambig argument is used with filestat, and is useful only if
1069 1069 destination file is guarded by any lock (e.g. repo.lock or
1070 1070 repo.wlock).
1071 1071
1072 1072 copystat and checkambig should be exclusive.
1073 1073 '''
1074 1074 assert not (copystat and checkambig)
1075 1075 oldstat = None
1076 1076 if os.path.lexists(dest):
1077 1077 if checkambig:
1078 1078 oldstat = checkambig and filestat(dest)
1079 1079 unlink(dest)
1080 1080 if allowhardlinks and hardlink:
1081 1081 try:
1082 1082 oslink(src, dest)
1083 1083 return
1084 1084 except (IOError, OSError):
1085 1085 pass # fall back to normal copy
1086 1086 if os.path.islink(src):
1087 1087 os.symlink(os.readlink(src), dest)
1088 1088 # copytime is ignored for symlinks, but in general copytime isn't needed
1089 1089 # for them anyway
1090 1090 else:
1091 1091 try:
1092 1092 shutil.copyfile(src, dest)
1093 1093 if copystat:
1094 1094 # copystat also copies mode
1095 1095 shutil.copystat(src, dest)
1096 1096 else:
1097 1097 shutil.copymode(src, dest)
1098 1098 if oldstat and oldstat.stat:
1099 1099 newstat = filestat(dest)
1100 1100 if newstat.isambig(oldstat):
1101 1101 # stat of copied file is ambiguous to original one
1102 1102 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1103 1103 os.utime(dest, (advanced, advanced))
1104 1104 except shutil.Error as inst:
1105 1105 raise Abort(str(inst))
1106 1106
1107 1107 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1108 1108 """Copy a directory tree using hardlinks if possible."""
1109 1109 num = 0
1110 1110
1111 1111 if hardlink is None:
1112 1112 hardlink = (os.stat(src).st_dev ==
1113 1113 os.stat(os.path.dirname(dst)).st_dev)
1114 1114 if hardlink:
1115 1115 topic = _('linking')
1116 1116 else:
1117 1117 topic = _('copying')
1118 1118
1119 1119 if os.path.isdir(src):
1120 1120 os.mkdir(dst)
1121 1121 for name, kind in osutil.listdir(src):
1122 1122 srcname = os.path.join(src, name)
1123 1123 dstname = os.path.join(dst, name)
1124 1124 def nprog(t, pos):
1125 1125 if pos is not None:
1126 1126 return progress(t, pos + num)
1127 1127 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1128 1128 num += n
1129 1129 else:
1130 1130 if hardlink:
1131 1131 try:
1132 1132 oslink(src, dst)
1133 1133 except (IOError, OSError):
1134 1134 hardlink = False
1135 1135 shutil.copy(src, dst)
1136 1136 else:
1137 1137 shutil.copy(src, dst)
1138 1138 num += 1
1139 1139 progress(topic, num)
1140 1140 progress(topic, None)
1141 1141
1142 1142 return hardlink, num
1143 1143
1144 1144 _winreservednames = '''con prn aux nul
1145 1145 com1 com2 com3 com4 com5 com6 com7 com8 com9
1146 1146 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1147 1147 _winreservedchars = ':*?"<>|'
1148 1148 def checkwinfilename(path):
1149 1149 r'''Check that the base-relative path is a valid filename on Windows.
1150 1150 Returns None if the path is ok, or a UI string describing the problem.
1151 1151
1152 1152 >>> checkwinfilename("just/a/normal/path")
1153 1153 >>> checkwinfilename("foo/bar/con.xml")
1154 1154 "filename contains 'con', which is reserved on Windows"
1155 1155 >>> checkwinfilename("foo/con.xml/bar")
1156 1156 "filename contains 'con', which is reserved on Windows"
1157 1157 >>> checkwinfilename("foo/bar/xml.con")
1158 1158 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1159 1159 "filename contains 'AUX', which is reserved on Windows"
1160 1160 >>> checkwinfilename("foo/bar/bla:.txt")
1161 1161 "filename contains ':', which is reserved on Windows"
1162 1162 >>> checkwinfilename("foo/bar/b\07la.txt")
1163 1163 "filename contains '\\x07', which is invalid on Windows"
1164 1164 >>> checkwinfilename("foo/bar/bla ")
1165 1165 "filename ends with ' ', which is not allowed on Windows"
1166 1166 >>> checkwinfilename("../bar")
1167 1167 >>> checkwinfilename("foo\\")
1168 1168 "filename ends with '\\', which is invalid on Windows"
1169 1169 >>> checkwinfilename("foo\\/bar")
1170 1170 "directory name ends with '\\', which is invalid on Windows"
1171 1171 '''
1172 1172 if path.endswith('\\'):
1173 1173 return _("filename ends with '\\', which is invalid on Windows")
1174 1174 if '\\/' in path:
1175 1175 return _("directory name ends with '\\', which is invalid on Windows")
1176 1176 for n in path.replace('\\', '/').split('/'):
1177 1177 if not n:
1178 1178 continue
1179 1179 for c in pycompat.bytestr(n):
1180 1180 if c in _winreservedchars:
1181 1181 return _("filename contains '%s', which is reserved "
1182 1182 "on Windows") % c
1183 1183 if ord(c) <= 31:
1184 1184 return _("filename contains %r, which is invalid "
1185 1185 "on Windows") % c
1186 1186 base = n.split('.')[0]
1187 1187 if base and base.lower() in _winreservednames:
1188 1188 return _("filename contains '%s', which is reserved "
1189 1189 "on Windows") % base
1190 1190 t = n[-1]
1191 1191 if t in '. ' and n not in '..':
1192 1192 return _("filename ends with '%s', which is not allowed "
1193 1193 "on Windows") % t
1194 1194
1195 1195 if pycompat.osname == 'nt':
1196 1196 checkosfilename = checkwinfilename
1197 1197 timer = time.clock
1198 1198 else:
1199 1199 checkosfilename = platform.checkosfilename
1200 1200 timer = time.time
1201 1201
1202 1202 if safehasattr(time, "perf_counter"):
1203 1203 timer = time.perf_counter
1204 1204
1205 1205 def makelock(info, pathname):
1206 1206 try:
1207 1207 return os.symlink(info, pathname)
1208 1208 except OSError as why:
1209 1209 if why.errno == errno.EEXIST:
1210 1210 raise
1211 1211 except AttributeError: # no symlink in os
1212 1212 pass
1213 1213
1214 1214 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1215 1215 os.write(ld, info)
1216 1216 os.close(ld)
1217 1217
1218 1218 def readlock(pathname):
1219 1219 try:
1220 1220 return os.readlink(pathname)
1221 1221 except OSError as why:
1222 1222 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1223 1223 raise
1224 1224 except AttributeError: # no symlink in os
1225 1225 pass
1226 1226 fp = posixfile(pathname)
1227 1227 r = fp.read()
1228 1228 fp.close()
1229 1229 return r
1230 1230
1231 1231 def fstat(fp):
1232 1232 '''stat file object that may not have fileno method.'''
1233 1233 try:
1234 1234 return os.fstat(fp.fileno())
1235 1235 except AttributeError:
1236 1236 return os.stat(fp.name)
1237 1237
1238 1238 # File system features
1239 1239
1240 1240 def fscasesensitive(path):
1241 1241 """
1242 1242 Return true if the given path is on a case-sensitive filesystem
1243 1243
1244 1244 Requires a path (like /foo/.hg) ending with a foldable final
1245 1245 directory component.
1246 1246 """
1247 1247 s1 = os.lstat(path)
1248 1248 d, b = os.path.split(path)
1249 1249 b2 = b.upper()
1250 1250 if b == b2:
1251 1251 b2 = b.lower()
1252 1252 if b == b2:
1253 1253 return True # no evidence against case sensitivity
1254 1254 p2 = os.path.join(d, b2)
1255 1255 try:
1256 1256 s2 = os.lstat(p2)
1257 1257 if s2 == s1:
1258 1258 return False
1259 1259 return True
1260 1260 except OSError:
1261 1261 return True
1262 1262
1263 1263 try:
1264 1264 import re2
1265 1265 _re2 = None
1266 1266 except ImportError:
1267 1267 _re2 = False
1268 1268
1269 1269 class _re(object):
1270 1270 def _checkre2(self):
1271 1271 global _re2
1272 1272 try:
1273 1273 # check if match works, see issue3964
1274 1274 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1275 1275 except ImportError:
1276 1276 _re2 = False
1277 1277
1278 1278 def compile(self, pat, flags=0):
1279 1279 '''Compile a regular expression, using re2 if possible
1280 1280
1281 1281 For best performance, use only re2-compatible regexp features. The
1282 1282 only flags from the re module that are re2-compatible are
1283 1283 IGNORECASE and MULTILINE.'''
1284 1284 if _re2 is None:
1285 1285 self._checkre2()
1286 1286 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1287 1287 if flags & remod.IGNORECASE:
1288 1288 pat = '(?i)' + pat
1289 1289 if flags & remod.MULTILINE:
1290 1290 pat = '(?m)' + pat
1291 1291 try:
1292 1292 return re2.compile(pat)
1293 1293 except re2.error:
1294 1294 pass
1295 1295 return remod.compile(pat, flags)
1296 1296
1297 1297 @propertycache
1298 1298 def escape(self):
1299 1299 '''Return the version of escape corresponding to self.compile.
1300 1300
1301 1301 This is imperfect because whether re2 or re is used for a particular
1302 1302 function depends on the flags, etc, but it's the best we can do.
1303 1303 '''
1304 1304 global _re2
1305 1305 if _re2 is None:
1306 1306 self._checkre2()
1307 1307 if _re2:
1308 1308 return re2.escape
1309 1309 else:
1310 1310 return remod.escape
1311 1311
1312 1312 re = _re()
1313 1313
1314 1314 _fspathcache = {}
1315 1315 def fspath(name, root):
1316 1316 '''Get name in the case stored in the filesystem
1317 1317
1318 1318 The name should be relative to root, and be normcase-ed for efficiency.
1319 1319
1320 1320 Note that this function is unnecessary, and should not be
1321 1321 called, for case-sensitive filesystems (simply because it's expensive).
1322 1322
1323 1323 The root should be normcase-ed, too.
1324 1324 '''
1325 1325 def _makefspathcacheentry(dir):
1326 1326 return dict((normcase(n), n) for n in os.listdir(dir))
1327 1327
1328 1328 seps = pycompat.ossep
1329 1329 if pycompat.osaltsep:
1330 1330 seps = seps + pycompat.osaltsep
1331 1331 # Protect backslashes. This gets silly very quickly.
1332 1332 seps.replace('\\','\\\\')
1333 1333 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1334 1334 dir = os.path.normpath(root)
1335 1335 result = []
1336 1336 for part, sep in pattern.findall(name):
1337 1337 if sep:
1338 1338 result.append(sep)
1339 1339 continue
1340 1340
1341 1341 if dir not in _fspathcache:
1342 1342 _fspathcache[dir] = _makefspathcacheentry(dir)
1343 1343 contents = _fspathcache[dir]
1344 1344
1345 1345 found = contents.get(part)
1346 1346 if not found:
1347 1347 # retry "once per directory" per "dirstate.walk" which
1348 1348 # may take place for each patches of "hg qpush", for example
1349 1349 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1350 1350 found = contents.get(part)
1351 1351
1352 1352 result.append(found or part)
1353 1353 dir = os.path.join(dir, part)
1354 1354
1355 1355 return ''.join(result)
1356 1356
1357 1357 def checknlink(testfile):
1358 1358 '''check whether hardlink count reporting works properly'''
1359 1359
1360 1360 # testfile may be open, so we need a separate file for checking to
1361 1361 # work around issue2543 (or testfile may get lost on Samba shares)
1362 1362 f1 = testfile + ".hgtmp1"
1363 1363 if os.path.lexists(f1):
1364 1364 return False
1365 1365 try:
1366 1366 posixfile(f1, 'w').close()
1367 1367 except IOError:
1368 1368 try:
1369 1369 os.unlink(f1)
1370 1370 except OSError:
1371 1371 pass
1372 1372 return False
1373 1373
1374 1374 f2 = testfile + ".hgtmp2"
1375 1375 fd = None
1376 1376 try:
1377 1377 oslink(f1, f2)
1378 1378 # nlinks() may behave differently for files on Windows shares if
1379 1379 # the file is open.
1380 1380 fd = posixfile(f2)
1381 1381 return nlinks(f2) > 1
1382 1382 except OSError:
1383 1383 return False
1384 1384 finally:
1385 1385 if fd is not None:
1386 1386 fd.close()
1387 1387 for f in (f1, f2):
1388 1388 try:
1389 1389 os.unlink(f)
1390 1390 except OSError:
1391 1391 pass
1392 1392
1393 1393 def endswithsep(path):
1394 1394 '''Check path ends with os.sep or os.altsep.'''
1395 1395 return (path.endswith(pycompat.ossep)
1396 1396 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1397 1397
1398 1398 def splitpath(path):
1399 1399 '''Split path by os.sep.
1400 1400 Note that this function does not use os.altsep because this is
1401 1401 an alternative of simple "xxx.split(os.sep)".
1402 1402 It is recommended to use os.path.normpath() before using this
1403 1403 function if need.'''
1404 1404 return path.split(pycompat.ossep)
1405 1405
1406 1406 def gui():
1407 1407 '''Are we running in a GUI?'''
1408 1408 if pycompat.sysplatform == 'darwin':
1409 1409 if 'SSH_CONNECTION' in encoding.environ:
1410 1410 # handle SSH access to a box where the user is logged in
1411 1411 return False
1412 1412 elif getattr(osutil, 'isgui', None):
1413 1413 # check if a CoreGraphics session is available
1414 1414 return osutil.isgui()
1415 1415 else:
1416 1416 # pure build; use a safe default
1417 1417 return True
1418 1418 else:
1419 1419 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1420 1420
1421 1421 def mktempcopy(name, emptyok=False, createmode=None):
1422 1422 """Create a temporary file with the same contents from name
1423 1423
1424 1424 The permission bits are copied from the original file.
1425 1425
1426 1426 If the temporary file is going to be truncated immediately, you
1427 1427 can use emptyok=True as an optimization.
1428 1428
1429 1429 Returns the name of the temporary file.
1430 1430 """
1431 1431 d, fn = os.path.split(name)
1432 1432 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1433 1433 os.close(fd)
1434 1434 # Temporary files are created with mode 0600, which is usually not
1435 1435 # what we want. If the original file already exists, just copy
1436 1436 # its mode. Otherwise, manually obey umask.
1437 1437 copymode(name, temp, createmode)
1438 1438 if emptyok:
1439 1439 return temp
1440 1440 try:
1441 1441 try:
1442 1442 ifp = posixfile(name, "rb")
1443 1443 except IOError as inst:
1444 1444 if inst.errno == errno.ENOENT:
1445 1445 return temp
1446 1446 if not getattr(inst, 'filename', None):
1447 1447 inst.filename = name
1448 1448 raise
1449 1449 ofp = posixfile(temp, "wb")
1450 1450 for chunk in filechunkiter(ifp):
1451 1451 ofp.write(chunk)
1452 1452 ifp.close()
1453 1453 ofp.close()
1454 1454 except: # re-raises
1455 1455 try: os.unlink(temp)
1456 1456 except OSError: pass
1457 1457 raise
1458 1458 return temp
1459 1459
1460 1460 class filestat(object):
1461 1461 """help to exactly detect change of a file
1462 1462
1463 1463 'stat' attribute is result of 'os.stat()' if specified 'path'
1464 1464 exists. Otherwise, it is None. This can avoid preparative
1465 1465 'exists()' examination on client side of this class.
1466 1466 """
1467 1467 def __init__(self, path):
1468 1468 try:
1469 1469 self.stat = os.stat(path)
1470 1470 except OSError as err:
1471 1471 if err.errno != errno.ENOENT:
1472 1472 raise
1473 1473 self.stat = None
1474 1474
1475 1475 __hash__ = object.__hash__
1476 1476
1477 1477 def __eq__(self, old):
1478 1478 try:
1479 1479 # if ambiguity between stat of new and old file is
1480 1480 # avoided, comparison of size, ctime and mtime is enough
1481 1481 # to exactly detect change of a file regardless of platform
1482 1482 return (self.stat.st_size == old.stat.st_size and
1483 1483 self.stat.st_ctime == old.stat.st_ctime and
1484 1484 self.stat.st_mtime == old.stat.st_mtime)
1485 1485 except AttributeError:
1486 1486 return False
1487 1487
1488 1488 def isambig(self, old):
1489 1489 """Examine whether new (= self) stat is ambiguous against old one
1490 1490
1491 1491 "S[N]" below means stat of a file at N-th change:
1492 1492
1493 1493 - S[n-1].ctime < S[n].ctime: can detect change of a file
1494 1494 - S[n-1].ctime == S[n].ctime
1495 1495 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1496 1496 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1497 1497 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1498 1498 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1499 1499
1500 1500 Case (*2) above means that a file was changed twice or more at
1501 1501 same time in sec (= S[n-1].ctime), and comparison of timestamp
1502 1502 is ambiguous.
1503 1503
1504 1504 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1505 1505 timestamp is ambiguous".
1506 1506
1507 1507 But advancing mtime only in case (*2) doesn't work as
1508 1508 expected, because naturally advanced S[n].mtime in case (*1)
1509 1509 might be equal to manually advanced S[n-1 or earlier].mtime.
1510 1510
1511 1511 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1512 1512 treated as ambiguous regardless of mtime, to avoid overlooking
1513 1513 by confliction between such mtime.
1514 1514
1515 1515 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1516 1516 S[n].mtime", even if size of a file isn't changed.
1517 1517 """
1518 1518 try:
1519 1519 return (self.stat.st_ctime == old.stat.st_ctime)
1520 1520 except AttributeError:
1521 1521 return False
1522 1522
1523 1523 def avoidambig(self, path, old):
1524 1524 """Change file stat of specified path to avoid ambiguity
1525 1525
1526 1526 'old' should be previous filestat of 'path'.
1527 1527
1528 1528 This skips avoiding ambiguity, if a process doesn't have
1529 1529 appropriate privileges for 'path'.
1530 1530 """
1531 1531 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1532 1532 try:
1533 1533 os.utime(path, (advanced, advanced))
1534 1534 except OSError as inst:
1535 1535 if inst.errno == errno.EPERM:
1536 1536 # utime() on the file created by another user causes EPERM,
1537 1537 # if a process doesn't have appropriate privileges
1538 1538 return
1539 1539 raise
1540 1540
1541 1541 def __ne__(self, other):
1542 1542 return not self == other
1543 1543
1544 1544 class atomictempfile(object):
1545 1545 '''writable file object that atomically updates a file
1546 1546
1547 1547 All writes will go to a temporary copy of the original file. Call
1548 1548 close() when you are done writing, and atomictempfile will rename
1549 1549 the temporary copy to the original name, making the changes
1550 1550 visible. If the object is destroyed without being closed, all your
1551 1551 writes are discarded.
1552 1552
1553 1553 checkambig argument of constructor is used with filestat, and is
1554 1554 useful only if target file is guarded by any lock (e.g. repo.lock
1555 1555 or repo.wlock).
1556 1556 '''
1557 1557 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1558 1558 self.__name = name # permanent name
1559 1559 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1560 1560 createmode=createmode)
1561 1561 self._fp = posixfile(self._tempname, mode)
1562 1562 self._checkambig = checkambig
1563 1563
1564 1564 # delegated methods
1565 1565 self.read = self._fp.read
1566 1566 self.write = self._fp.write
1567 1567 self.seek = self._fp.seek
1568 1568 self.tell = self._fp.tell
1569 1569 self.fileno = self._fp.fileno
1570 1570
1571 1571 def close(self):
1572 1572 if not self._fp.closed:
1573 1573 self._fp.close()
1574 1574 filename = localpath(self.__name)
1575 1575 oldstat = self._checkambig and filestat(filename)
1576 1576 if oldstat and oldstat.stat:
1577 1577 rename(self._tempname, filename)
1578 1578 newstat = filestat(filename)
1579 1579 if newstat.isambig(oldstat):
1580 1580 # stat of changed file is ambiguous to original one
1581 1581 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1582 1582 os.utime(filename, (advanced, advanced))
1583 1583 else:
1584 1584 rename(self._tempname, filename)
1585 1585
1586 1586 def discard(self):
1587 1587 if not self._fp.closed:
1588 1588 try:
1589 1589 os.unlink(self._tempname)
1590 1590 except OSError:
1591 1591 pass
1592 1592 self._fp.close()
1593 1593
1594 1594 def __del__(self):
1595 1595 if safehasattr(self, '_fp'): # constructor actually did something
1596 1596 self.discard()
1597 1597
1598 1598 def __enter__(self):
1599 1599 return self
1600 1600
1601 1601 def __exit__(self, exctype, excvalue, traceback):
1602 1602 if exctype is not None:
1603 1603 self.discard()
1604 1604 else:
1605 1605 self.close()
1606 1606
1607 1607 def unlinkpath(f, ignoremissing=False):
1608 1608 """unlink and remove the directory if it is empty"""
1609 try:
1609 if ignoremissing:
1610 tryunlink(f)
1611 else:
1610 1612 unlink(f)
1611 except OSError as e:
1612 if not (ignoremissing and e.errno == errno.ENOENT):
1613 raise
1614 1613 # try removing directories that might now be empty
1615 1614 try:
1616 1615 removedirs(os.path.dirname(f))
1617 1616 except OSError:
1618 1617 pass
1619 1618
1620 1619 def tryunlink(f):
1621 1620 """Attempt to remove a file, ignoring ENOENT errors."""
1622 1621 try:
1623 1622 unlink(f)
1624 1623 except OSError as e:
1625 1624 if e.errno != errno.ENOENT:
1626 1625 raise
1627 1626
1628 1627 def makedirs(name, mode=None, notindexed=False):
1629 1628 """recursive directory creation with parent mode inheritance
1630 1629
1631 1630 Newly created directories are marked as "not to be indexed by
1632 1631 the content indexing service", if ``notindexed`` is specified
1633 1632 for "write" mode access.
1634 1633 """
1635 1634 try:
1636 1635 makedir(name, notindexed)
1637 1636 except OSError as err:
1638 1637 if err.errno == errno.EEXIST:
1639 1638 return
1640 1639 if err.errno != errno.ENOENT or not name:
1641 1640 raise
1642 1641 parent = os.path.dirname(os.path.abspath(name))
1643 1642 if parent == name:
1644 1643 raise
1645 1644 makedirs(parent, mode, notindexed)
1646 1645 try:
1647 1646 makedir(name, notindexed)
1648 1647 except OSError as err:
1649 1648 # Catch EEXIST to handle races
1650 1649 if err.errno == errno.EEXIST:
1651 1650 return
1652 1651 raise
1653 1652 if mode is not None:
1654 1653 os.chmod(name, mode)
1655 1654
1656 1655 def readfile(path):
1657 1656 with open(path, 'rb') as fp:
1658 1657 return fp.read()
1659 1658
1660 1659 def writefile(path, text):
1661 1660 with open(path, 'wb') as fp:
1662 1661 fp.write(text)
1663 1662
1664 1663 def appendfile(path, text):
1665 1664 with open(path, 'ab') as fp:
1666 1665 fp.write(text)
1667 1666
1668 1667 class chunkbuffer(object):
1669 1668 """Allow arbitrary sized chunks of data to be efficiently read from an
1670 1669 iterator over chunks of arbitrary size."""
1671 1670
1672 1671 def __init__(self, in_iter):
1673 1672 """in_iter is the iterator that's iterating over the input chunks.
1674 1673 targetsize is how big a buffer to try to maintain."""
1675 1674 def splitbig(chunks):
1676 1675 for chunk in chunks:
1677 1676 if len(chunk) > 2**20:
1678 1677 pos = 0
1679 1678 while pos < len(chunk):
1680 1679 end = pos + 2 ** 18
1681 1680 yield chunk[pos:end]
1682 1681 pos = end
1683 1682 else:
1684 1683 yield chunk
1685 1684 self.iter = splitbig(in_iter)
1686 1685 self._queue = collections.deque()
1687 1686 self._chunkoffset = 0
1688 1687
1689 1688 def read(self, l=None):
1690 1689 """Read L bytes of data from the iterator of chunks of data.
1691 1690 Returns less than L bytes if the iterator runs dry.
1692 1691
1693 1692 If size parameter is omitted, read everything"""
1694 1693 if l is None:
1695 1694 return ''.join(self.iter)
1696 1695
1697 1696 left = l
1698 1697 buf = []
1699 1698 queue = self._queue
1700 1699 while left > 0:
1701 1700 # refill the queue
1702 1701 if not queue:
1703 1702 target = 2**18
1704 1703 for chunk in self.iter:
1705 1704 queue.append(chunk)
1706 1705 target -= len(chunk)
1707 1706 if target <= 0:
1708 1707 break
1709 1708 if not queue:
1710 1709 break
1711 1710
1712 1711 # The easy way to do this would be to queue.popleft(), modify the
1713 1712 # chunk (if necessary), then queue.appendleft(). However, for cases
1714 1713 # where we read partial chunk content, this incurs 2 dequeue
1715 1714 # mutations and creates a new str for the remaining chunk in the
1716 1715 # queue. Our code below avoids this overhead.
1717 1716
1718 1717 chunk = queue[0]
1719 1718 chunkl = len(chunk)
1720 1719 offset = self._chunkoffset
1721 1720
1722 1721 # Use full chunk.
1723 1722 if offset == 0 and left >= chunkl:
1724 1723 left -= chunkl
1725 1724 queue.popleft()
1726 1725 buf.append(chunk)
1727 1726 # self._chunkoffset remains at 0.
1728 1727 continue
1729 1728
1730 1729 chunkremaining = chunkl - offset
1731 1730
1732 1731 # Use all of unconsumed part of chunk.
1733 1732 if left >= chunkremaining:
1734 1733 left -= chunkremaining
1735 1734 queue.popleft()
1736 1735 # offset == 0 is enabled by block above, so this won't merely
1737 1736 # copy via ``chunk[0:]``.
1738 1737 buf.append(chunk[offset:])
1739 1738 self._chunkoffset = 0
1740 1739
1741 1740 # Partial chunk needed.
1742 1741 else:
1743 1742 buf.append(chunk[offset:offset + left])
1744 1743 self._chunkoffset += left
1745 1744 left -= chunkremaining
1746 1745
1747 1746 return ''.join(buf)
1748 1747
1749 1748 def filechunkiter(f, size=131072, limit=None):
1750 1749 """Create a generator that produces the data in the file size
1751 1750 (default 131072) bytes at a time, up to optional limit (default is
1752 1751 to read all data). Chunks may be less than size bytes if the
1753 1752 chunk is the last chunk in the file, or the file is a socket or
1754 1753 some other type of file that sometimes reads less data than is
1755 1754 requested."""
1756 1755 assert size >= 0
1757 1756 assert limit is None or limit >= 0
1758 1757 while True:
1759 1758 if limit is None:
1760 1759 nbytes = size
1761 1760 else:
1762 1761 nbytes = min(limit, size)
1763 1762 s = nbytes and f.read(nbytes)
1764 1763 if not s:
1765 1764 break
1766 1765 if limit:
1767 1766 limit -= len(s)
1768 1767 yield s
1769 1768
1770 1769 def makedate(timestamp=None):
1771 1770 '''Return a unix timestamp (or the current time) as a (unixtime,
1772 1771 offset) tuple based off the local timezone.'''
1773 1772 if timestamp is None:
1774 1773 timestamp = time.time()
1775 1774 if timestamp < 0:
1776 1775 hint = _("check your clock")
1777 1776 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1778 1777 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1779 1778 datetime.datetime.fromtimestamp(timestamp))
1780 1779 tz = delta.days * 86400 + delta.seconds
1781 1780 return timestamp, tz
1782 1781
1783 1782 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1784 1783 """represent a (unixtime, offset) tuple as a localized time.
1785 1784 unixtime is seconds since the epoch, and offset is the time zone's
1786 1785 number of seconds away from UTC.
1787 1786
1788 1787 >>> datestr((0, 0))
1789 1788 'Thu Jan 01 00:00:00 1970 +0000'
1790 1789 >>> datestr((42, 0))
1791 1790 'Thu Jan 01 00:00:42 1970 +0000'
1792 1791 >>> datestr((-42, 0))
1793 1792 'Wed Dec 31 23:59:18 1969 +0000'
1794 1793 >>> datestr((0x7fffffff, 0))
1795 1794 'Tue Jan 19 03:14:07 2038 +0000'
1796 1795 >>> datestr((-0x80000000, 0))
1797 1796 'Fri Dec 13 20:45:52 1901 +0000'
1798 1797 """
1799 1798 t, tz = date or makedate()
1800 1799 if "%1" in format or "%2" in format or "%z" in format:
1801 1800 sign = (tz > 0) and "-" or "+"
1802 1801 minutes = abs(tz) // 60
1803 1802 q, r = divmod(minutes, 60)
1804 1803 format = format.replace("%z", "%1%2")
1805 1804 format = format.replace("%1", "%c%02d" % (sign, q))
1806 1805 format = format.replace("%2", "%02d" % r)
1807 1806 d = t - tz
1808 1807 if d > 0x7fffffff:
1809 1808 d = 0x7fffffff
1810 1809 elif d < -0x80000000:
1811 1810 d = -0x80000000
1812 1811 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1813 1812 # because they use the gmtime() system call which is buggy on Windows
1814 1813 # for negative values.
1815 1814 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1816 1815 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1817 1816 return s
1818 1817
1819 1818 def shortdate(date=None):
1820 1819 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1821 1820 return datestr(date, format='%Y-%m-%d')
1822 1821
1823 1822 def parsetimezone(s):
1824 1823 """find a trailing timezone, if any, in string, and return a
1825 1824 (offset, remainder) pair"""
1826 1825
1827 1826 if s.endswith("GMT") or s.endswith("UTC"):
1828 1827 return 0, s[:-3].rstrip()
1829 1828
1830 1829 # Unix-style timezones [+-]hhmm
1831 1830 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1832 1831 sign = (s[-5] == "+") and 1 or -1
1833 1832 hours = int(s[-4:-2])
1834 1833 minutes = int(s[-2:])
1835 1834 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1836 1835
1837 1836 # ISO8601 trailing Z
1838 1837 if s.endswith("Z") and s[-2:-1].isdigit():
1839 1838 return 0, s[:-1]
1840 1839
1841 1840 # ISO8601-style [+-]hh:mm
1842 1841 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1843 1842 s[-5:-3].isdigit() and s[-2:].isdigit()):
1844 1843 sign = (s[-6] == "+") and 1 or -1
1845 1844 hours = int(s[-5:-3])
1846 1845 minutes = int(s[-2:])
1847 1846 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1848 1847
1849 1848 return None, s
1850 1849
1851 1850 def strdate(string, format, defaults=None):
1852 1851 """parse a localized time string and return a (unixtime, offset) tuple.
1853 1852 if the string cannot be parsed, ValueError is raised."""
1854 1853 if defaults is None:
1855 1854 defaults = {}
1856 1855
1857 1856 # NOTE: unixtime = localunixtime + offset
1858 1857 offset, date = parsetimezone(string)
1859 1858
1860 1859 # add missing elements from defaults
1861 1860 usenow = False # default to using biased defaults
1862 1861 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1863 1862 found = [True for p in part if ("%"+p) in format]
1864 1863 if not found:
1865 1864 date += "@" + defaults[part][usenow]
1866 1865 format += "@%" + part[0]
1867 1866 else:
1868 1867 # We've found a specific time element, less specific time
1869 1868 # elements are relative to today
1870 1869 usenow = True
1871 1870
1872 1871 timetuple = time.strptime(date, format)
1873 1872 localunixtime = int(calendar.timegm(timetuple))
1874 1873 if offset is None:
1875 1874 # local timezone
1876 1875 unixtime = int(time.mktime(timetuple))
1877 1876 offset = unixtime - localunixtime
1878 1877 else:
1879 1878 unixtime = localunixtime + offset
1880 1879 return unixtime, offset
1881 1880
1882 1881 def parsedate(date, formats=None, bias=None):
1883 1882 """parse a localized date/time and return a (unixtime, offset) tuple.
1884 1883
1885 1884 The date may be a "unixtime offset" string or in one of the specified
1886 1885 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1887 1886
1888 1887 >>> parsedate(' today ') == parsedate(\
1889 1888 datetime.date.today().strftime('%b %d'))
1890 1889 True
1891 1890 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1892 1891 datetime.timedelta(days=1)\
1893 1892 ).strftime('%b %d'))
1894 1893 True
1895 1894 >>> now, tz = makedate()
1896 1895 >>> strnow, strtz = parsedate('now')
1897 1896 >>> (strnow - now) < 1
1898 1897 True
1899 1898 >>> tz == strtz
1900 1899 True
1901 1900 """
1902 1901 if bias is None:
1903 1902 bias = {}
1904 1903 if not date:
1905 1904 return 0, 0
1906 1905 if isinstance(date, tuple) and len(date) == 2:
1907 1906 return date
1908 1907 if not formats:
1909 1908 formats = defaultdateformats
1910 1909 date = date.strip()
1911 1910
1912 1911 if date == 'now' or date == _('now'):
1913 1912 return makedate()
1914 1913 if date == 'today' or date == _('today'):
1915 1914 date = datetime.date.today().strftime('%b %d')
1916 1915 elif date == 'yesterday' or date == _('yesterday'):
1917 1916 date = (datetime.date.today() -
1918 1917 datetime.timedelta(days=1)).strftime('%b %d')
1919 1918
1920 1919 try:
1921 1920 when, offset = map(int, date.split(' '))
1922 1921 except ValueError:
1923 1922 # fill out defaults
1924 1923 now = makedate()
1925 1924 defaults = {}
1926 1925 for part in ("d", "mb", "yY", "HI", "M", "S"):
1927 1926 # this piece is for rounding the specific end of unknowns
1928 1927 b = bias.get(part)
1929 1928 if b is None:
1930 1929 if part[0] in "HMS":
1931 1930 b = "00"
1932 1931 else:
1933 1932 b = "0"
1934 1933
1935 1934 # this piece is for matching the generic end to today's date
1936 1935 n = datestr(now, "%" + part[0])
1937 1936
1938 1937 defaults[part] = (b, n)
1939 1938
1940 1939 for format in formats:
1941 1940 try:
1942 1941 when, offset = strdate(date, format, defaults)
1943 1942 except (ValueError, OverflowError):
1944 1943 pass
1945 1944 else:
1946 1945 break
1947 1946 else:
1948 1947 raise Abort(_('invalid date: %r') % date)
1949 1948 # validate explicit (probably user-specified) date and
1950 1949 # time zone offset. values must fit in signed 32 bits for
1951 1950 # current 32-bit linux runtimes. timezones go from UTC-12
1952 1951 # to UTC+14
1953 1952 if when < -0x80000000 or when > 0x7fffffff:
1954 1953 raise Abort(_('date exceeds 32 bits: %d') % when)
1955 1954 if offset < -50400 or offset > 43200:
1956 1955 raise Abort(_('impossible time zone offset: %d') % offset)
1957 1956 return when, offset
1958 1957
1959 1958 def matchdate(date):
1960 1959 """Return a function that matches a given date match specifier
1961 1960
1962 1961 Formats include:
1963 1962
1964 1963 '{date}' match a given date to the accuracy provided
1965 1964
1966 1965 '<{date}' on or before a given date
1967 1966
1968 1967 '>{date}' on or after a given date
1969 1968
1970 1969 >>> p1 = parsedate("10:29:59")
1971 1970 >>> p2 = parsedate("10:30:00")
1972 1971 >>> p3 = parsedate("10:30:59")
1973 1972 >>> p4 = parsedate("10:31:00")
1974 1973 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1975 1974 >>> f = matchdate("10:30")
1976 1975 >>> f(p1[0])
1977 1976 False
1978 1977 >>> f(p2[0])
1979 1978 True
1980 1979 >>> f(p3[0])
1981 1980 True
1982 1981 >>> f(p4[0])
1983 1982 False
1984 1983 >>> f(p5[0])
1985 1984 False
1986 1985 """
1987 1986
1988 1987 def lower(date):
1989 1988 d = {'mb': "1", 'd': "1"}
1990 1989 return parsedate(date, extendeddateformats, d)[0]
1991 1990
1992 1991 def upper(date):
1993 1992 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1994 1993 for days in ("31", "30", "29"):
1995 1994 try:
1996 1995 d["d"] = days
1997 1996 return parsedate(date, extendeddateformats, d)[0]
1998 1997 except Abort:
1999 1998 pass
2000 1999 d["d"] = "28"
2001 2000 return parsedate(date, extendeddateformats, d)[0]
2002 2001
2003 2002 date = date.strip()
2004 2003
2005 2004 if not date:
2006 2005 raise Abort(_("dates cannot consist entirely of whitespace"))
2007 2006 elif date[0] == "<":
2008 2007 if not date[1:]:
2009 2008 raise Abort(_("invalid day spec, use '<DATE'"))
2010 2009 when = upper(date[1:])
2011 2010 return lambda x: x <= when
2012 2011 elif date[0] == ">":
2013 2012 if not date[1:]:
2014 2013 raise Abort(_("invalid day spec, use '>DATE'"))
2015 2014 when = lower(date[1:])
2016 2015 return lambda x: x >= when
2017 2016 elif date[0] == "-":
2018 2017 try:
2019 2018 days = int(date[1:])
2020 2019 except ValueError:
2021 2020 raise Abort(_("invalid day spec: %s") % date[1:])
2022 2021 if days < 0:
2023 2022 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2024 2023 % date[1:])
2025 2024 when = makedate()[0] - days * 3600 * 24
2026 2025 return lambda x: x >= when
2027 2026 elif " to " in date:
2028 2027 a, b = date.split(" to ")
2029 2028 start, stop = lower(a), upper(b)
2030 2029 return lambda x: x >= start and x <= stop
2031 2030 else:
2032 2031 start, stop = lower(date), upper(date)
2033 2032 return lambda x: x >= start and x <= stop
2034 2033
2035 2034 def stringmatcher(pattern, casesensitive=True):
2036 2035 """
2037 2036 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2038 2037 returns the matcher name, pattern, and matcher function.
2039 2038 missing or unknown prefixes are treated as literal matches.
2040 2039
2041 2040 helper for tests:
2042 2041 >>> def test(pattern, *tests):
2043 2042 ... kind, pattern, matcher = stringmatcher(pattern)
2044 2043 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2045 2044 >>> def itest(pattern, *tests):
2046 2045 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2047 2046 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2048 2047
2049 2048 exact matching (no prefix):
2050 2049 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2051 2050 ('literal', 'abcdefg', [False, False, True])
2052 2051
2053 2052 regex matching ('re:' prefix)
2054 2053 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2055 2054 ('re', 'a.+b', [False, False, True])
2056 2055
2057 2056 force exact matches ('literal:' prefix)
2058 2057 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2059 2058 ('literal', 're:foobar', [False, True])
2060 2059
2061 2060 unknown prefixes are ignored and treated as literals
2062 2061 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2063 2062 ('literal', 'foo:bar', [False, False, True])
2064 2063
2065 2064 case insensitive regex matches
2066 2065 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2067 2066 ('re', 'A.+b', [False, False, True])
2068 2067
2069 2068 case insensitive literal matches
2070 2069 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2071 2070 ('literal', 'ABCDEFG', [False, False, True])
2072 2071 """
2073 2072 if pattern.startswith('re:'):
2074 2073 pattern = pattern[3:]
2075 2074 try:
2076 2075 flags = 0
2077 2076 if not casesensitive:
2078 2077 flags = remod.I
2079 2078 regex = remod.compile(pattern, flags)
2080 2079 except remod.error as e:
2081 2080 raise error.ParseError(_('invalid regular expression: %s')
2082 2081 % e)
2083 2082 return 're', pattern, regex.search
2084 2083 elif pattern.startswith('literal:'):
2085 2084 pattern = pattern[8:]
2086 2085
2087 2086 match = pattern.__eq__
2088 2087
2089 2088 if not casesensitive:
2090 2089 ipat = encoding.lower(pattern)
2091 2090 match = lambda s: ipat == encoding.lower(s)
2092 2091 return 'literal', pattern, match
2093 2092
2094 2093 def shortuser(user):
2095 2094 """Return a short representation of a user name or email address."""
2096 2095 f = user.find('@')
2097 2096 if f >= 0:
2098 2097 user = user[:f]
2099 2098 f = user.find('<')
2100 2099 if f >= 0:
2101 2100 user = user[f + 1:]
2102 2101 f = user.find(' ')
2103 2102 if f >= 0:
2104 2103 user = user[:f]
2105 2104 f = user.find('.')
2106 2105 if f >= 0:
2107 2106 user = user[:f]
2108 2107 return user
2109 2108
2110 2109 def emailuser(user):
2111 2110 """Return the user portion of an email address."""
2112 2111 f = user.find('@')
2113 2112 if f >= 0:
2114 2113 user = user[:f]
2115 2114 f = user.find('<')
2116 2115 if f >= 0:
2117 2116 user = user[f + 1:]
2118 2117 return user
2119 2118
2120 2119 def email(author):
2121 2120 '''get email of author.'''
2122 2121 r = author.find('>')
2123 2122 if r == -1:
2124 2123 r = None
2125 2124 return author[author.find('<') + 1:r]
2126 2125
2127 2126 def ellipsis(text, maxlength=400):
2128 2127 """Trim string to at most maxlength (default: 400) columns in display."""
2129 2128 return encoding.trim(text, maxlength, ellipsis='...')
2130 2129
2131 2130 def unitcountfn(*unittable):
2132 2131 '''return a function that renders a readable count of some quantity'''
2133 2132
2134 2133 def go(count):
2135 2134 for multiplier, divisor, format in unittable:
2136 2135 if count >= divisor * multiplier:
2137 2136 return format % (count / float(divisor))
2138 2137 return unittable[-1][2] % count
2139 2138
2140 2139 return go
2141 2140
2142 2141 bytecount = unitcountfn(
2143 2142 (100, 1 << 30, _('%.0f GB')),
2144 2143 (10, 1 << 30, _('%.1f GB')),
2145 2144 (1, 1 << 30, _('%.2f GB')),
2146 2145 (100, 1 << 20, _('%.0f MB')),
2147 2146 (10, 1 << 20, _('%.1f MB')),
2148 2147 (1, 1 << 20, _('%.2f MB')),
2149 2148 (100, 1 << 10, _('%.0f KB')),
2150 2149 (10, 1 << 10, _('%.1f KB')),
2151 2150 (1, 1 << 10, _('%.2f KB')),
2152 2151 (1, 1, _('%.0f bytes')),
2153 2152 )
2154 2153
2155 2154 def escapestr(s):
2156 2155 # call underlying function of s.encode('string_escape') directly for
2157 2156 # Python 3 compatibility
2158 2157 return codecs.escape_encode(s)[0]
2159 2158
2160 2159 def unescapestr(s):
2161 2160 return codecs.escape_decode(s)[0]
2162 2161
2163 2162 def uirepr(s):
2164 2163 # Avoid double backslash in Windows path repr()
2165 2164 return repr(s).replace('\\\\', '\\')
2166 2165
2167 2166 # delay import of textwrap
2168 2167 def MBTextWrapper(**kwargs):
2169 2168 class tw(textwrap.TextWrapper):
2170 2169 """
2171 2170 Extend TextWrapper for width-awareness.
2172 2171
2173 2172 Neither number of 'bytes' in any encoding nor 'characters' is
2174 2173 appropriate to calculate terminal columns for specified string.
2175 2174
2176 2175 Original TextWrapper implementation uses built-in 'len()' directly,
2177 2176 so overriding is needed to use width information of each characters.
2178 2177
2179 2178 In addition, characters classified into 'ambiguous' width are
2180 2179 treated as wide in East Asian area, but as narrow in other.
2181 2180
2182 2181 This requires use decision to determine width of such characters.
2183 2182 """
2184 2183 def _cutdown(self, ucstr, space_left):
2185 2184 l = 0
2186 2185 colwidth = encoding.ucolwidth
2187 2186 for i in xrange(len(ucstr)):
2188 2187 l += colwidth(ucstr[i])
2189 2188 if space_left < l:
2190 2189 return (ucstr[:i], ucstr[i:])
2191 2190 return ucstr, ''
2192 2191
2193 2192 # overriding of base class
2194 2193 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2195 2194 space_left = max(width - cur_len, 1)
2196 2195
2197 2196 if self.break_long_words:
2198 2197 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2199 2198 cur_line.append(cut)
2200 2199 reversed_chunks[-1] = res
2201 2200 elif not cur_line:
2202 2201 cur_line.append(reversed_chunks.pop())
2203 2202
2204 2203 # this overriding code is imported from TextWrapper of Python 2.6
2205 2204 # to calculate columns of string by 'encoding.ucolwidth()'
2206 2205 def _wrap_chunks(self, chunks):
2207 2206 colwidth = encoding.ucolwidth
2208 2207
2209 2208 lines = []
2210 2209 if self.width <= 0:
2211 2210 raise ValueError("invalid width %r (must be > 0)" % self.width)
2212 2211
2213 2212 # Arrange in reverse order so items can be efficiently popped
2214 2213 # from a stack of chucks.
2215 2214 chunks.reverse()
2216 2215
2217 2216 while chunks:
2218 2217
2219 2218 # Start the list of chunks that will make up the current line.
2220 2219 # cur_len is just the length of all the chunks in cur_line.
2221 2220 cur_line = []
2222 2221 cur_len = 0
2223 2222
2224 2223 # Figure out which static string will prefix this line.
2225 2224 if lines:
2226 2225 indent = self.subsequent_indent
2227 2226 else:
2228 2227 indent = self.initial_indent
2229 2228
2230 2229 # Maximum width for this line.
2231 2230 width = self.width - len(indent)
2232 2231
2233 2232 # First chunk on line is whitespace -- drop it, unless this
2234 2233 # is the very beginning of the text (i.e. no lines started yet).
2235 2234 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2236 2235 del chunks[-1]
2237 2236
2238 2237 while chunks:
2239 2238 l = colwidth(chunks[-1])
2240 2239
2241 2240 # Can at least squeeze this chunk onto the current line.
2242 2241 if cur_len + l <= width:
2243 2242 cur_line.append(chunks.pop())
2244 2243 cur_len += l
2245 2244
2246 2245 # Nope, this line is full.
2247 2246 else:
2248 2247 break
2249 2248
2250 2249 # The current line is full, and the next chunk is too big to
2251 2250 # fit on *any* line (not just this one).
2252 2251 if chunks and colwidth(chunks[-1]) > width:
2253 2252 self._handle_long_word(chunks, cur_line, cur_len, width)
2254 2253
2255 2254 # If the last chunk on this line is all whitespace, drop it.
2256 2255 if (self.drop_whitespace and
2257 2256 cur_line and cur_line[-1].strip() == ''):
2258 2257 del cur_line[-1]
2259 2258
2260 2259 # Convert current line back to a string and store it in list
2261 2260 # of all lines (return value).
2262 2261 if cur_line:
2263 2262 lines.append(indent + ''.join(cur_line))
2264 2263
2265 2264 return lines
2266 2265
2267 2266 global MBTextWrapper
2268 2267 MBTextWrapper = tw
2269 2268 return tw(**kwargs)
2270 2269
2271 2270 def wrap(line, width, initindent='', hangindent=''):
2272 2271 maxindent = max(len(hangindent), len(initindent))
2273 2272 if width <= maxindent:
2274 2273 # adjust for weird terminal size
2275 2274 width = max(78, maxindent + 1)
2276 2275 line = line.decode(pycompat.sysstr(encoding.encoding),
2277 2276 pycompat.sysstr(encoding.encodingmode))
2278 2277 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2279 2278 pycompat.sysstr(encoding.encodingmode))
2280 2279 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2281 2280 pycompat.sysstr(encoding.encodingmode))
2282 2281 wrapper = MBTextWrapper(width=width,
2283 2282 initial_indent=initindent,
2284 2283 subsequent_indent=hangindent)
2285 2284 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2286 2285
2287 2286 if (pyplatform.python_implementation() == 'CPython' and
2288 2287 sys.version_info < (3, 0)):
2289 2288 # There is an issue in CPython that some IO methods do not handle EINTR
2290 2289 # correctly. The following table shows what CPython version (and functions)
2291 2290 # are affected (buggy: has the EINTR bug, okay: otherwise):
2292 2291 #
2293 2292 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2294 2293 # --------------------------------------------------
2295 2294 # fp.__iter__ | buggy | buggy | okay
2296 2295 # fp.read* | buggy | okay [1] | okay
2297 2296 #
2298 2297 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2299 2298 #
2300 2299 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2301 2300 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2302 2301 #
2303 2302 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2304 2303 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2305 2304 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2306 2305 # fp.__iter__ but not other fp.read* methods.
2307 2306 #
2308 2307 # On modern systems like Linux, the "read" syscall cannot be interrupted
2309 2308 # when reading "fast" files like on-disk files. So the EINTR issue only
2310 2309 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2311 2310 # files approximately as "fast" files and use the fast (unsafe) code path,
2312 2311 # to minimize the performance impact.
2313 2312 if sys.version_info >= (2, 7, 4):
2314 2313 # fp.readline deals with EINTR correctly, use it as a workaround.
2315 2314 def _safeiterfile(fp):
2316 2315 return iter(fp.readline, '')
2317 2316 else:
2318 2317 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2319 2318 # note: this may block longer than necessary because of bufsize.
2320 2319 def _safeiterfile(fp, bufsize=4096):
2321 2320 fd = fp.fileno()
2322 2321 line = ''
2323 2322 while True:
2324 2323 try:
2325 2324 buf = os.read(fd, bufsize)
2326 2325 except OSError as ex:
2327 2326 # os.read only raises EINTR before any data is read
2328 2327 if ex.errno == errno.EINTR:
2329 2328 continue
2330 2329 else:
2331 2330 raise
2332 2331 line += buf
2333 2332 if '\n' in buf:
2334 2333 splitted = line.splitlines(True)
2335 2334 line = ''
2336 2335 for l in splitted:
2337 2336 if l[-1] == '\n':
2338 2337 yield l
2339 2338 else:
2340 2339 line = l
2341 2340 if not buf:
2342 2341 break
2343 2342 if line:
2344 2343 yield line
2345 2344
2346 2345 def iterfile(fp):
2347 2346 fastpath = True
2348 2347 if type(fp) is file:
2349 2348 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2350 2349 if fastpath:
2351 2350 return fp
2352 2351 else:
2353 2352 return _safeiterfile(fp)
2354 2353 else:
2355 2354 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2356 2355 def iterfile(fp):
2357 2356 return fp
2358 2357
2359 2358 def iterlines(iterator):
2360 2359 for chunk in iterator:
2361 2360 for line in chunk.splitlines():
2362 2361 yield line
2363 2362
2364 2363 def expandpath(path):
2365 2364 return os.path.expanduser(os.path.expandvars(path))
2366 2365
2367 2366 def hgcmd():
2368 2367 """Return the command used to execute current hg
2369 2368
2370 2369 This is different from hgexecutable() because on Windows we want
2371 2370 to avoid things opening new shell windows like batch files, so we
2372 2371 get either the python call or current executable.
2373 2372 """
2374 2373 if mainfrozen():
2375 2374 if getattr(sys, 'frozen', None) == 'macosx_app':
2376 2375 # Env variable set by py2app
2377 2376 return [encoding.environ['EXECUTABLEPATH']]
2378 2377 else:
2379 2378 return [pycompat.sysexecutable]
2380 2379 return gethgcmd()
2381 2380
2382 2381 def rundetached(args, condfn):
2383 2382 """Execute the argument list in a detached process.
2384 2383
2385 2384 condfn is a callable which is called repeatedly and should return
2386 2385 True once the child process is known to have started successfully.
2387 2386 At this point, the child process PID is returned. If the child
2388 2387 process fails to start or finishes before condfn() evaluates to
2389 2388 True, return -1.
2390 2389 """
2391 2390 # Windows case is easier because the child process is either
2392 2391 # successfully starting and validating the condition or exiting
2393 2392 # on failure. We just poll on its PID. On Unix, if the child
2394 2393 # process fails to start, it will be left in a zombie state until
2395 2394 # the parent wait on it, which we cannot do since we expect a long
2396 2395 # running process on success. Instead we listen for SIGCHLD telling
2397 2396 # us our child process terminated.
2398 2397 terminated = set()
2399 2398 def handler(signum, frame):
2400 2399 terminated.add(os.wait())
2401 2400 prevhandler = None
2402 2401 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2403 2402 if SIGCHLD is not None:
2404 2403 prevhandler = signal.signal(SIGCHLD, handler)
2405 2404 try:
2406 2405 pid = spawndetached(args)
2407 2406 while not condfn():
2408 2407 if ((pid in terminated or not testpid(pid))
2409 2408 and not condfn()):
2410 2409 return -1
2411 2410 time.sleep(0.1)
2412 2411 return pid
2413 2412 finally:
2414 2413 if prevhandler is not None:
2415 2414 signal.signal(signal.SIGCHLD, prevhandler)
2416 2415
2417 2416 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2418 2417 """Return the result of interpolating items in the mapping into string s.
2419 2418
2420 2419 prefix is a single character string, or a two character string with
2421 2420 a backslash as the first character if the prefix needs to be escaped in
2422 2421 a regular expression.
2423 2422
2424 2423 fn is an optional function that will be applied to the replacement text
2425 2424 just before replacement.
2426 2425
2427 2426 escape_prefix is an optional flag that allows using doubled prefix for
2428 2427 its escaping.
2429 2428 """
2430 2429 fn = fn or (lambda s: s)
2431 2430 patterns = '|'.join(mapping.keys())
2432 2431 if escape_prefix:
2433 2432 patterns += '|' + prefix
2434 2433 if len(prefix) > 1:
2435 2434 prefix_char = prefix[1:]
2436 2435 else:
2437 2436 prefix_char = prefix
2438 2437 mapping[prefix_char] = prefix_char
2439 2438 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2440 2439 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2441 2440
2442 2441 def getport(port):
2443 2442 """Return the port for a given network service.
2444 2443
2445 2444 If port is an integer, it's returned as is. If it's a string, it's
2446 2445 looked up using socket.getservbyname(). If there's no matching
2447 2446 service, error.Abort is raised.
2448 2447 """
2449 2448 try:
2450 2449 return int(port)
2451 2450 except ValueError:
2452 2451 pass
2453 2452
2454 2453 try:
2455 2454 return socket.getservbyname(port)
2456 2455 except socket.error:
2457 2456 raise Abort(_("no port number associated with service '%s'") % port)
2458 2457
2459 2458 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2460 2459 '0': False, 'no': False, 'false': False, 'off': False,
2461 2460 'never': False}
2462 2461
2463 2462 def parsebool(s):
2464 2463 """Parse s into a boolean.
2465 2464
2466 2465 If s is not a valid boolean, returns None.
2467 2466 """
2468 2467 return _booleans.get(s.lower(), None)
2469 2468
2470 2469 _hextochr = dict((a + b, chr(int(a + b, 16)))
2471 2470 for a in string.hexdigits for b in string.hexdigits)
2472 2471
2473 2472 class url(object):
2474 2473 r"""Reliable URL parser.
2475 2474
2476 2475 This parses URLs and provides attributes for the following
2477 2476 components:
2478 2477
2479 2478 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2480 2479
2481 2480 Missing components are set to None. The only exception is
2482 2481 fragment, which is set to '' if present but empty.
2483 2482
2484 2483 If parsefragment is False, fragment is included in query. If
2485 2484 parsequery is False, query is included in path. If both are
2486 2485 False, both fragment and query are included in path.
2487 2486
2488 2487 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2489 2488
2490 2489 Note that for backward compatibility reasons, bundle URLs do not
2491 2490 take host names. That means 'bundle://../' has a path of '../'.
2492 2491
2493 2492 Examples:
2494 2493
2495 2494 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2496 2495 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2497 2496 >>> url('ssh://[::1]:2200//home/joe/repo')
2498 2497 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2499 2498 >>> url('file:///home/joe/repo')
2500 2499 <url scheme: 'file', path: '/home/joe/repo'>
2501 2500 >>> url('file:///c:/temp/foo/')
2502 2501 <url scheme: 'file', path: 'c:/temp/foo/'>
2503 2502 >>> url('bundle:foo')
2504 2503 <url scheme: 'bundle', path: 'foo'>
2505 2504 >>> url('bundle://../foo')
2506 2505 <url scheme: 'bundle', path: '../foo'>
2507 2506 >>> url(r'c:\foo\bar')
2508 2507 <url path: 'c:\\foo\\bar'>
2509 2508 >>> url(r'\\blah\blah\blah')
2510 2509 <url path: '\\\\blah\\blah\\blah'>
2511 2510 >>> url(r'\\blah\blah\blah#baz')
2512 2511 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2513 2512 >>> url(r'file:///C:\users\me')
2514 2513 <url scheme: 'file', path: 'C:\\users\\me'>
2515 2514
2516 2515 Authentication credentials:
2517 2516
2518 2517 >>> url('ssh://joe:xyz@x/repo')
2519 2518 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2520 2519 >>> url('ssh://joe@x/repo')
2521 2520 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2522 2521
2523 2522 Query strings and fragments:
2524 2523
2525 2524 >>> url('http://host/a?b#c')
2526 2525 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2527 2526 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2528 2527 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2529 2528
2530 2529 Empty path:
2531 2530
2532 2531 >>> url('')
2533 2532 <url path: ''>
2534 2533 >>> url('#a')
2535 2534 <url path: '', fragment: 'a'>
2536 2535 >>> url('http://host/')
2537 2536 <url scheme: 'http', host: 'host', path: ''>
2538 2537 >>> url('http://host/#a')
2539 2538 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2540 2539
2541 2540 Only scheme:
2542 2541
2543 2542 >>> url('http:')
2544 2543 <url scheme: 'http'>
2545 2544 """
2546 2545
2547 2546 _safechars = "!~*'()+"
2548 2547 _safepchars = "/!~*'()+:\\"
2549 2548 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2550 2549
2551 2550 def __init__(self, path, parsequery=True, parsefragment=True):
2552 2551 # We slowly chomp away at path until we have only the path left
2553 2552 self.scheme = self.user = self.passwd = self.host = None
2554 2553 self.port = self.path = self.query = self.fragment = None
2555 2554 self._localpath = True
2556 2555 self._hostport = ''
2557 2556 self._origpath = path
2558 2557
2559 2558 if parsefragment and '#' in path:
2560 2559 path, self.fragment = path.split('#', 1)
2561 2560
2562 2561 # special case for Windows drive letters and UNC paths
2563 2562 if hasdriveletter(path) or path.startswith('\\\\'):
2564 2563 self.path = path
2565 2564 return
2566 2565
2567 2566 # For compatibility reasons, we can't handle bundle paths as
2568 2567 # normal URLS
2569 2568 if path.startswith('bundle:'):
2570 2569 self.scheme = 'bundle'
2571 2570 path = path[7:]
2572 2571 if path.startswith('//'):
2573 2572 path = path[2:]
2574 2573 self.path = path
2575 2574 return
2576 2575
2577 2576 if self._matchscheme(path):
2578 2577 parts = path.split(':', 1)
2579 2578 if parts[0]:
2580 2579 self.scheme, path = parts
2581 2580 self._localpath = False
2582 2581
2583 2582 if not path:
2584 2583 path = None
2585 2584 if self._localpath:
2586 2585 self.path = ''
2587 2586 return
2588 2587 else:
2589 2588 if self._localpath:
2590 2589 self.path = path
2591 2590 return
2592 2591
2593 2592 if parsequery and '?' in path:
2594 2593 path, self.query = path.split('?', 1)
2595 2594 if not path:
2596 2595 path = None
2597 2596 if not self.query:
2598 2597 self.query = None
2599 2598
2600 2599 # // is required to specify a host/authority
2601 2600 if path and path.startswith('//'):
2602 2601 parts = path[2:].split('/', 1)
2603 2602 if len(parts) > 1:
2604 2603 self.host, path = parts
2605 2604 else:
2606 2605 self.host = parts[0]
2607 2606 path = None
2608 2607 if not self.host:
2609 2608 self.host = None
2610 2609 # path of file:///d is /d
2611 2610 # path of file:///d:/ is d:/, not /d:/
2612 2611 if path and not hasdriveletter(path):
2613 2612 path = '/' + path
2614 2613
2615 2614 if self.host and '@' in self.host:
2616 2615 self.user, self.host = self.host.rsplit('@', 1)
2617 2616 if ':' in self.user:
2618 2617 self.user, self.passwd = self.user.split(':', 1)
2619 2618 if not self.host:
2620 2619 self.host = None
2621 2620
2622 2621 # Don't split on colons in IPv6 addresses without ports
2623 2622 if (self.host and ':' in self.host and
2624 2623 not (self.host.startswith('[') and self.host.endswith(']'))):
2625 2624 self._hostport = self.host
2626 2625 self.host, self.port = self.host.rsplit(':', 1)
2627 2626 if not self.host:
2628 2627 self.host = None
2629 2628
2630 2629 if (self.host and self.scheme == 'file' and
2631 2630 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2632 2631 raise Abort(_('file:// URLs can only refer to localhost'))
2633 2632
2634 2633 self.path = path
2635 2634
2636 2635 # leave the query string escaped
2637 2636 for a in ('user', 'passwd', 'host', 'port',
2638 2637 'path', 'fragment'):
2639 2638 v = getattr(self, a)
2640 2639 if v is not None:
2641 2640 setattr(self, a, pycompat.urlunquote(v))
2642 2641
2643 2642 def __repr__(self):
2644 2643 attrs = []
2645 2644 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2646 2645 'query', 'fragment'):
2647 2646 v = getattr(self, a)
2648 2647 if v is not None:
2649 2648 attrs.append('%s: %r' % (a, v))
2650 2649 return '<url %s>' % ', '.join(attrs)
2651 2650
2652 2651 def __str__(self):
2653 2652 r"""Join the URL's components back into a URL string.
2654 2653
2655 2654 Examples:
2656 2655
2657 2656 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2658 2657 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2659 2658 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2660 2659 'http://user:pw@host:80/?foo=bar&baz=42'
2661 2660 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2662 2661 'http://user:pw@host:80/?foo=bar%3dbaz'
2663 2662 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2664 2663 'ssh://user:pw@[::1]:2200//home/joe#'
2665 2664 >>> str(url('http://localhost:80//'))
2666 2665 'http://localhost:80//'
2667 2666 >>> str(url('http://localhost:80/'))
2668 2667 'http://localhost:80/'
2669 2668 >>> str(url('http://localhost:80'))
2670 2669 'http://localhost:80/'
2671 2670 >>> str(url('bundle:foo'))
2672 2671 'bundle:foo'
2673 2672 >>> str(url('bundle://../foo'))
2674 2673 'bundle:../foo'
2675 2674 >>> str(url('path'))
2676 2675 'path'
2677 2676 >>> str(url('file:///tmp/foo/bar'))
2678 2677 'file:///tmp/foo/bar'
2679 2678 >>> str(url('file:///c:/tmp/foo/bar'))
2680 2679 'file:///c:/tmp/foo/bar'
2681 2680 >>> print url(r'bundle:foo\bar')
2682 2681 bundle:foo\bar
2683 2682 >>> print url(r'file:///D:\data\hg')
2684 2683 file:///D:\data\hg
2685 2684 """
2686 2685 return encoding.strfromlocal(self.__bytes__())
2687 2686
2688 2687 def __bytes__(self):
2689 2688 if self._localpath:
2690 2689 s = self.path
2691 2690 if self.scheme == 'bundle':
2692 2691 s = 'bundle:' + s
2693 2692 if self.fragment:
2694 2693 s += '#' + self.fragment
2695 2694 return s
2696 2695
2697 2696 s = self.scheme + ':'
2698 2697 if self.user or self.passwd or self.host:
2699 2698 s += '//'
2700 2699 elif self.scheme and (not self.path or self.path.startswith('/')
2701 2700 or hasdriveletter(self.path)):
2702 2701 s += '//'
2703 2702 if hasdriveletter(self.path):
2704 2703 s += '/'
2705 2704 if self.user:
2706 2705 s += urlreq.quote(self.user, safe=self._safechars)
2707 2706 if self.passwd:
2708 2707 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2709 2708 if self.user or self.passwd:
2710 2709 s += '@'
2711 2710 if self.host:
2712 2711 if not (self.host.startswith('[') and self.host.endswith(']')):
2713 2712 s += urlreq.quote(self.host)
2714 2713 else:
2715 2714 s += self.host
2716 2715 if self.port:
2717 2716 s += ':' + urlreq.quote(self.port)
2718 2717 if self.host:
2719 2718 s += '/'
2720 2719 if self.path:
2721 2720 # TODO: similar to the query string, we should not unescape the
2722 2721 # path when we store it, the path might contain '%2f' = '/',
2723 2722 # which we should *not* escape.
2724 2723 s += urlreq.quote(self.path, safe=self._safepchars)
2725 2724 if self.query:
2726 2725 # we store the query in escaped form.
2727 2726 s += '?' + self.query
2728 2727 if self.fragment is not None:
2729 2728 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2730 2729 return s
2731 2730
2732 2731 def authinfo(self):
2733 2732 user, passwd = self.user, self.passwd
2734 2733 try:
2735 2734 self.user, self.passwd = None, None
2736 2735 s = str(self)
2737 2736 finally:
2738 2737 self.user, self.passwd = user, passwd
2739 2738 if not self.user:
2740 2739 return (s, None)
2741 2740 # authinfo[1] is passed to urllib2 password manager, and its
2742 2741 # URIs must not contain credentials. The host is passed in the
2743 2742 # URIs list because Python < 2.4.3 uses only that to search for
2744 2743 # a password.
2745 2744 return (s, (None, (s, self.host),
2746 2745 self.user, self.passwd or ''))
2747 2746
2748 2747 def isabs(self):
2749 2748 if self.scheme and self.scheme != 'file':
2750 2749 return True # remote URL
2751 2750 if hasdriveletter(self.path):
2752 2751 return True # absolute for our purposes - can't be joined()
2753 2752 if self.path.startswith(r'\\'):
2754 2753 return True # Windows UNC path
2755 2754 if self.path.startswith('/'):
2756 2755 return True # POSIX-style
2757 2756 return False
2758 2757
2759 2758 def localpath(self):
2760 2759 if self.scheme == 'file' or self.scheme == 'bundle':
2761 2760 path = self.path or '/'
2762 2761 # For Windows, we need to promote hosts containing drive
2763 2762 # letters to paths with drive letters.
2764 2763 if hasdriveletter(self._hostport):
2765 2764 path = self._hostport + '/' + self.path
2766 2765 elif (self.host is not None and self.path
2767 2766 and not hasdriveletter(path)):
2768 2767 path = '/' + path
2769 2768 return path
2770 2769 return self._origpath
2771 2770
2772 2771 def islocal(self):
2773 2772 '''whether localpath will return something that posixfile can open'''
2774 2773 return (not self.scheme or self.scheme == 'file'
2775 2774 or self.scheme == 'bundle')
2776 2775
2777 2776 def hasscheme(path):
2778 2777 return bool(url(path).scheme)
2779 2778
2780 2779 def hasdriveletter(path):
2781 2780 return path and path[1:2] == ':' and path[0:1].isalpha()
2782 2781
2783 2782 def urllocalpath(path):
2784 2783 return url(path, parsequery=False, parsefragment=False).localpath()
2785 2784
2786 2785 def hidepassword(u):
2787 2786 '''hide user credential in a url string'''
2788 2787 u = url(u)
2789 2788 if u.passwd:
2790 2789 u.passwd = '***'
2791 2790 return str(u)
2792 2791
2793 2792 def removeauth(u):
2794 2793 '''remove all authentication information from a url string'''
2795 2794 u = url(u)
2796 2795 u.user = u.passwd = None
2797 2796 return str(u)
2798 2797
2799 2798 timecount = unitcountfn(
2800 2799 (1, 1e3, _('%.0f s')),
2801 2800 (100, 1, _('%.1f s')),
2802 2801 (10, 1, _('%.2f s')),
2803 2802 (1, 1, _('%.3f s')),
2804 2803 (100, 0.001, _('%.1f ms')),
2805 2804 (10, 0.001, _('%.2f ms')),
2806 2805 (1, 0.001, _('%.3f ms')),
2807 2806 (100, 0.000001, _('%.1f us')),
2808 2807 (10, 0.000001, _('%.2f us')),
2809 2808 (1, 0.000001, _('%.3f us')),
2810 2809 (100, 0.000000001, _('%.1f ns')),
2811 2810 (10, 0.000000001, _('%.2f ns')),
2812 2811 (1, 0.000000001, _('%.3f ns')),
2813 2812 )
2814 2813
2815 2814 _timenesting = [0]
2816 2815
2817 2816 def timed(func):
2818 2817 '''Report the execution time of a function call to stderr.
2819 2818
2820 2819 During development, use as a decorator when you need to measure
2821 2820 the cost of a function, e.g. as follows:
2822 2821
2823 2822 @util.timed
2824 2823 def foo(a, b, c):
2825 2824 pass
2826 2825 '''
2827 2826
2828 2827 def wrapper(*args, **kwargs):
2829 2828 start = timer()
2830 2829 indent = 2
2831 2830 _timenesting[0] += indent
2832 2831 try:
2833 2832 return func(*args, **kwargs)
2834 2833 finally:
2835 2834 elapsed = timer() - start
2836 2835 _timenesting[0] -= indent
2837 2836 stderr.write('%s%s: %s\n' %
2838 2837 (' ' * _timenesting[0], func.__name__,
2839 2838 timecount(elapsed)))
2840 2839 return wrapper
2841 2840
2842 2841 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2843 2842 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2844 2843
2845 2844 def sizetoint(s):
2846 2845 '''Convert a space specifier to a byte count.
2847 2846
2848 2847 >>> sizetoint('30')
2849 2848 30
2850 2849 >>> sizetoint('2.2kb')
2851 2850 2252
2852 2851 >>> sizetoint('6M')
2853 2852 6291456
2854 2853 '''
2855 2854 t = s.strip().lower()
2856 2855 try:
2857 2856 for k, u in _sizeunits:
2858 2857 if t.endswith(k):
2859 2858 return int(float(t[:-len(k)]) * u)
2860 2859 return int(t)
2861 2860 except ValueError:
2862 2861 raise error.ParseError(_("couldn't parse size: %s") % s)
2863 2862
2864 2863 class hooks(object):
2865 2864 '''A collection of hook functions that can be used to extend a
2866 2865 function's behavior. Hooks are called in lexicographic order,
2867 2866 based on the names of their sources.'''
2868 2867
2869 2868 def __init__(self):
2870 2869 self._hooks = []
2871 2870
2872 2871 def add(self, source, hook):
2873 2872 self._hooks.append((source, hook))
2874 2873
2875 2874 def __call__(self, *args):
2876 2875 self._hooks.sort(key=lambda x: x[0])
2877 2876 results = []
2878 2877 for source, hook in self._hooks:
2879 2878 results.append(hook(*args))
2880 2879 return results
2881 2880
2882 2881 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2883 2882 '''Yields lines for a nicely formatted stacktrace.
2884 2883 Skips the 'skip' last entries, then return the last 'depth' entries.
2885 2884 Each file+linenumber is formatted according to fileline.
2886 2885 Each line is formatted according to line.
2887 2886 If line is None, it yields:
2888 2887 length of longest filepath+line number,
2889 2888 filepath+linenumber,
2890 2889 function
2891 2890
2892 2891 Not be used in production code but very convenient while developing.
2893 2892 '''
2894 2893 entries = [(fileline % (fn, ln), func)
2895 2894 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2896 2895 ][-depth:]
2897 2896 if entries:
2898 2897 fnmax = max(len(entry[0]) for entry in entries)
2899 2898 for fnln, func in entries:
2900 2899 if line is None:
2901 2900 yield (fnmax, fnln, func)
2902 2901 else:
2903 2902 yield line % (fnmax, fnln, func)
2904 2903
2905 2904 def debugstacktrace(msg='stacktrace', skip=0,
2906 2905 f=stderr, otherf=stdout, depth=0):
2907 2906 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2908 2907 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2909 2908 By default it will flush stdout first.
2910 2909 It can be used everywhere and intentionally does not require an ui object.
2911 2910 Not be used in production code but very convenient while developing.
2912 2911 '''
2913 2912 if otherf:
2914 2913 otherf.flush()
2915 2914 f.write('%s at:\n' % msg.rstrip())
2916 2915 for line in getstackframes(skip + 1, depth=depth):
2917 2916 f.write(line)
2918 2917 f.flush()
2919 2918
2920 2919 class dirs(object):
2921 2920 '''a multiset of directory names from a dirstate or manifest'''
2922 2921
2923 2922 def __init__(self, map, skip=None):
2924 2923 self._dirs = {}
2925 2924 addpath = self.addpath
2926 2925 if safehasattr(map, 'iteritems') and skip is not None:
2927 2926 for f, s in map.iteritems():
2928 2927 if s[0] != skip:
2929 2928 addpath(f)
2930 2929 else:
2931 2930 for f in map:
2932 2931 addpath(f)
2933 2932
2934 2933 def addpath(self, path):
2935 2934 dirs = self._dirs
2936 2935 for base in finddirs(path):
2937 2936 if base in dirs:
2938 2937 dirs[base] += 1
2939 2938 return
2940 2939 dirs[base] = 1
2941 2940
2942 2941 def delpath(self, path):
2943 2942 dirs = self._dirs
2944 2943 for base in finddirs(path):
2945 2944 if dirs[base] > 1:
2946 2945 dirs[base] -= 1
2947 2946 return
2948 2947 del dirs[base]
2949 2948
2950 2949 def __iter__(self):
2951 2950 return iter(self._dirs)
2952 2951
2953 2952 def __contains__(self, d):
2954 2953 return d in self._dirs
2955 2954
2956 2955 if safehasattr(parsers, 'dirs'):
2957 2956 dirs = parsers.dirs
2958 2957
2959 2958 def finddirs(path):
2960 2959 pos = path.rfind('/')
2961 2960 while pos != -1:
2962 2961 yield path[:pos]
2963 2962 pos = path.rfind('/', 0, pos)
2964 2963
2965 2964 class ctxmanager(object):
2966 2965 '''A context manager for use in 'with' blocks to allow multiple
2967 2966 contexts to be entered at once. This is both safer and more
2968 2967 flexible than contextlib.nested.
2969 2968
2970 2969 Once Mercurial supports Python 2.7+, this will become mostly
2971 2970 unnecessary.
2972 2971 '''
2973 2972
2974 2973 def __init__(self, *args):
2975 2974 '''Accepts a list of no-argument functions that return context
2976 2975 managers. These will be invoked at __call__ time.'''
2977 2976 self._pending = args
2978 2977 self._atexit = []
2979 2978
2980 2979 def __enter__(self):
2981 2980 return self
2982 2981
2983 2982 def enter(self):
2984 2983 '''Create and enter context managers in the order in which they were
2985 2984 passed to the constructor.'''
2986 2985 values = []
2987 2986 for func in self._pending:
2988 2987 obj = func()
2989 2988 values.append(obj.__enter__())
2990 2989 self._atexit.append(obj.__exit__)
2991 2990 del self._pending
2992 2991 return values
2993 2992
2994 2993 def atexit(self, func, *args, **kwargs):
2995 2994 '''Add a function to call when this context manager exits. The
2996 2995 ordering of multiple atexit calls is unspecified, save that
2997 2996 they will happen before any __exit__ functions.'''
2998 2997 def wrapper(exc_type, exc_val, exc_tb):
2999 2998 func(*args, **kwargs)
3000 2999 self._atexit.append(wrapper)
3001 3000 return func
3002 3001
3003 3002 def __exit__(self, exc_type, exc_val, exc_tb):
3004 3003 '''Context managers are exited in the reverse order from which
3005 3004 they were created.'''
3006 3005 received = exc_type is not None
3007 3006 suppressed = False
3008 3007 pending = None
3009 3008 self._atexit.reverse()
3010 3009 for exitfunc in self._atexit:
3011 3010 try:
3012 3011 if exitfunc(exc_type, exc_val, exc_tb):
3013 3012 suppressed = True
3014 3013 exc_type = None
3015 3014 exc_val = None
3016 3015 exc_tb = None
3017 3016 except BaseException:
3018 3017 pending = sys.exc_info()
3019 3018 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3020 3019 del self._atexit
3021 3020 if pending:
3022 3021 raise exc_val
3023 3022 return received and suppressed
3024 3023
3025 3024 # compression code
3026 3025
3027 3026 SERVERROLE = 'server'
3028 3027 CLIENTROLE = 'client'
3029 3028
3030 3029 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3031 3030 (u'name', u'serverpriority',
3032 3031 u'clientpriority'))
3033 3032
3034 3033 class compressormanager(object):
3035 3034 """Holds registrations of various compression engines.
3036 3035
3037 3036 This class essentially abstracts the differences between compression
3038 3037 engines to allow new compression formats to be added easily, possibly from
3039 3038 extensions.
3040 3039
3041 3040 Compressors are registered against the global instance by calling its
3042 3041 ``register()`` method.
3043 3042 """
3044 3043 def __init__(self):
3045 3044 self._engines = {}
3046 3045 # Bundle spec human name to engine name.
3047 3046 self._bundlenames = {}
3048 3047 # Internal bundle identifier to engine name.
3049 3048 self._bundletypes = {}
3050 3049 # Revlog header to engine name.
3051 3050 self._revlogheaders = {}
3052 3051 # Wire proto identifier to engine name.
3053 3052 self._wiretypes = {}
3054 3053
3055 3054 def __getitem__(self, key):
3056 3055 return self._engines[key]
3057 3056
3058 3057 def __contains__(self, key):
3059 3058 return key in self._engines
3060 3059
3061 3060 def __iter__(self):
3062 3061 return iter(self._engines.keys())
3063 3062
3064 3063 def register(self, engine):
3065 3064 """Register a compression engine with the manager.
3066 3065
3067 3066 The argument must be a ``compressionengine`` instance.
3068 3067 """
3069 3068 if not isinstance(engine, compressionengine):
3070 3069 raise ValueError(_('argument must be a compressionengine'))
3071 3070
3072 3071 name = engine.name()
3073 3072
3074 3073 if name in self._engines:
3075 3074 raise error.Abort(_('compression engine %s already registered') %
3076 3075 name)
3077 3076
3078 3077 bundleinfo = engine.bundletype()
3079 3078 if bundleinfo:
3080 3079 bundlename, bundletype = bundleinfo
3081 3080
3082 3081 if bundlename in self._bundlenames:
3083 3082 raise error.Abort(_('bundle name %s already registered') %
3084 3083 bundlename)
3085 3084 if bundletype in self._bundletypes:
3086 3085 raise error.Abort(_('bundle type %s already registered by %s') %
3087 3086 (bundletype, self._bundletypes[bundletype]))
3088 3087
3089 3088 # No external facing name declared.
3090 3089 if bundlename:
3091 3090 self._bundlenames[bundlename] = name
3092 3091
3093 3092 self._bundletypes[bundletype] = name
3094 3093
3095 3094 wiresupport = engine.wireprotosupport()
3096 3095 if wiresupport:
3097 3096 wiretype = wiresupport.name
3098 3097 if wiretype in self._wiretypes:
3099 3098 raise error.Abort(_('wire protocol compression %s already '
3100 3099 'registered by %s') %
3101 3100 (wiretype, self._wiretypes[wiretype]))
3102 3101
3103 3102 self._wiretypes[wiretype] = name
3104 3103
3105 3104 revlogheader = engine.revlogheader()
3106 3105 if revlogheader and revlogheader in self._revlogheaders:
3107 3106 raise error.Abort(_('revlog header %s already registered by %s') %
3108 3107 (revlogheader, self._revlogheaders[revlogheader]))
3109 3108
3110 3109 if revlogheader:
3111 3110 self._revlogheaders[revlogheader] = name
3112 3111
3113 3112 self._engines[name] = engine
3114 3113
3115 3114 @property
3116 3115 def supportedbundlenames(self):
3117 3116 return set(self._bundlenames.keys())
3118 3117
3119 3118 @property
3120 3119 def supportedbundletypes(self):
3121 3120 return set(self._bundletypes.keys())
3122 3121
3123 3122 def forbundlename(self, bundlename):
3124 3123 """Obtain a compression engine registered to a bundle name.
3125 3124
3126 3125 Will raise KeyError if the bundle type isn't registered.
3127 3126
3128 3127 Will abort if the engine is known but not available.
3129 3128 """
3130 3129 engine = self._engines[self._bundlenames[bundlename]]
3131 3130 if not engine.available():
3132 3131 raise error.Abort(_('compression engine %s could not be loaded') %
3133 3132 engine.name())
3134 3133 return engine
3135 3134
3136 3135 def forbundletype(self, bundletype):
3137 3136 """Obtain a compression engine registered to a bundle type.
3138 3137
3139 3138 Will raise KeyError if the bundle type isn't registered.
3140 3139
3141 3140 Will abort if the engine is known but not available.
3142 3141 """
3143 3142 engine = self._engines[self._bundletypes[bundletype]]
3144 3143 if not engine.available():
3145 3144 raise error.Abort(_('compression engine %s could not be loaded') %
3146 3145 engine.name())
3147 3146 return engine
3148 3147
3149 3148 def supportedwireengines(self, role, onlyavailable=True):
3150 3149 """Obtain compression engines that support the wire protocol.
3151 3150
3152 3151 Returns a list of engines in prioritized order, most desired first.
3153 3152
3154 3153 If ``onlyavailable`` is set, filter out engines that can't be
3155 3154 loaded.
3156 3155 """
3157 3156 assert role in (SERVERROLE, CLIENTROLE)
3158 3157
3159 3158 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3160 3159
3161 3160 engines = [self._engines[e] for e in self._wiretypes.values()]
3162 3161 if onlyavailable:
3163 3162 engines = [e for e in engines if e.available()]
3164 3163
3165 3164 def getkey(e):
3166 3165 # Sort first by priority, highest first. In case of tie, sort
3167 3166 # alphabetically. This is arbitrary, but ensures output is
3168 3167 # stable.
3169 3168 w = e.wireprotosupport()
3170 3169 return -1 * getattr(w, attr), w.name
3171 3170
3172 3171 return list(sorted(engines, key=getkey))
3173 3172
3174 3173 def forwiretype(self, wiretype):
3175 3174 engine = self._engines[self._wiretypes[wiretype]]
3176 3175 if not engine.available():
3177 3176 raise error.Abort(_('compression engine %s could not be loaded') %
3178 3177 engine.name())
3179 3178 return engine
3180 3179
3181 3180 def forrevlogheader(self, header):
3182 3181 """Obtain a compression engine registered to a revlog header.
3183 3182
3184 3183 Will raise KeyError if the revlog header value isn't registered.
3185 3184 """
3186 3185 return self._engines[self._revlogheaders[header]]
3187 3186
3188 3187 compengines = compressormanager()
3189 3188
3190 3189 class compressionengine(object):
3191 3190 """Base class for compression engines.
3192 3191
3193 3192 Compression engines must implement the interface defined by this class.
3194 3193 """
3195 3194 def name(self):
3196 3195 """Returns the name of the compression engine.
3197 3196
3198 3197 This is the key the engine is registered under.
3199 3198
3200 3199 This method must be implemented.
3201 3200 """
3202 3201 raise NotImplementedError()
3203 3202
3204 3203 def available(self):
3205 3204 """Whether the compression engine is available.
3206 3205
3207 3206 The intent of this method is to allow optional compression engines
3208 3207 that may not be available in all installations (such as engines relying
3209 3208 on C extensions that may not be present).
3210 3209 """
3211 3210 return True
3212 3211
3213 3212 def bundletype(self):
3214 3213 """Describes bundle identifiers for this engine.
3215 3214
3216 3215 If this compression engine isn't supported for bundles, returns None.
3217 3216
3218 3217 If this engine can be used for bundles, returns a 2-tuple of strings of
3219 3218 the user-facing "bundle spec" compression name and an internal
3220 3219 identifier used to denote the compression format within bundles. To
3221 3220 exclude the name from external usage, set the first element to ``None``.
3222 3221
3223 3222 If bundle compression is supported, the class must also implement
3224 3223 ``compressstream`` and `decompressorreader``.
3225 3224 """
3226 3225 return None
3227 3226
3228 3227 def wireprotosupport(self):
3229 3228 """Declare support for this compression format on the wire protocol.
3230 3229
3231 3230 If this compression engine isn't supported for compressing wire
3232 3231 protocol payloads, returns None.
3233 3232
3234 3233 Otherwise, returns ``compenginewireprotosupport`` with the following
3235 3234 fields:
3236 3235
3237 3236 * String format identifier
3238 3237 * Integer priority for the server
3239 3238 * Integer priority for the client
3240 3239
3241 3240 The integer priorities are used to order the advertisement of format
3242 3241 support by server and client. The highest integer is advertised
3243 3242 first. Integers with non-positive values aren't advertised.
3244 3243
3245 3244 The priority values are somewhat arbitrary and only used for default
3246 3245 ordering. The relative order can be changed via config options.
3247 3246
3248 3247 If wire protocol compression is supported, the class must also implement
3249 3248 ``compressstream`` and ``decompressorreader``.
3250 3249 """
3251 3250 return None
3252 3251
3253 3252 def revlogheader(self):
3254 3253 """Header added to revlog chunks that identifies this engine.
3255 3254
3256 3255 If this engine can be used to compress revlogs, this method should
3257 3256 return the bytes used to identify chunks compressed with this engine.
3258 3257 Else, the method should return ``None`` to indicate it does not
3259 3258 participate in revlog compression.
3260 3259 """
3261 3260 return None
3262 3261
3263 3262 def compressstream(self, it, opts=None):
3264 3263 """Compress an iterator of chunks.
3265 3264
3266 3265 The method receives an iterator (ideally a generator) of chunks of
3267 3266 bytes to be compressed. It returns an iterator (ideally a generator)
3268 3267 of bytes of chunks representing the compressed output.
3269 3268
3270 3269 Optionally accepts an argument defining how to perform compression.
3271 3270 Each engine treats this argument differently.
3272 3271 """
3273 3272 raise NotImplementedError()
3274 3273
3275 3274 def decompressorreader(self, fh):
3276 3275 """Perform decompression on a file object.
3277 3276
3278 3277 Argument is an object with a ``read(size)`` method that returns
3279 3278 compressed data. Return value is an object with a ``read(size)`` that
3280 3279 returns uncompressed data.
3281 3280 """
3282 3281 raise NotImplementedError()
3283 3282
3284 3283 def revlogcompressor(self, opts=None):
3285 3284 """Obtain an object that can be used to compress revlog entries.
3286 3285
3287 3286 The object has a ``compress(data)`` method that compresses binary
3288 3287 data. This method returns compressed binary data or ``None`` if
3289 3288 the data could not be compressed (too small, not compressible, etc).
3290 3289 The returned data should have a header uniquely identifying this
3291 3290 compression format so decompression can be routed to this engine.
3292 3291 This header should be identified by the ``revlogheader()`` return
3293 3292 value.
3294 3293
3295 3294 The object has a ``decompress(data)`` method that decompresses
3296 3295 data. The method will only be called if ``data`` begins with
3297 3296 ``revlogheader()``. The method should return the raw, uncompressed
3298 3297 data or raise a ``RevlogError``.
3299 3298
3300 3299 The object is reusable but is not thread safe.
3301 3300 """
3302 3301 raise NotImplementedError()
3303 3302
3304 3303 class _zlibengine(compressionengine):
3305 3304 def name(self):
3306 3305 return 'zlib'
3307 3306
3308 3307 def bundletype(self):
3309 3308 return 'gzip', 'GZ'
3310 3309
3311 3310 def wireprotosupport(self):
3312 3311 return compewireprotosupport('zlib', 20, 20)
3313 3312
3314 3313 def revlogheader(self):
3315 3314 return 'x'
3316 3315
3317 3316 def compressstream(self, it, opts=None):
3318 3317 opts = opts or {}
3319 3318
3320 3319 z = zlib.compressobj(opts.get('level', -1))
3321 3320 for chunk in it:
3322 3321 data = z.compress(chunk)
3323 3322 # Not all calls to compress emit data. It is cheaper to inspect
3324 3323 # here than to feed empty chunks through generator.
3325 3324 if data:
3326 3325 yield data
3327 3326
3328 3327 yield z.flush()
3329 3328
3330 3329 def decompressorreader(self, fh):
3331 3330 def gen():
3332 3331 d = zlib.decompressobj()
3333 3332 for chunk in filechunkiter(fh):
3334 3333 while chunk:
3335 3334 # Limit output size to limit memory.
3336 3335 yield d.decompress(chunk, 2 ** 18)
3337 3336 chunk = d.unconsumed_tail
3338 3337
3339 3338 return chunkbuffer(gen())
3340 3339
3341 3340 class zlibrevlogcompressor(object):
3342 3341 def compress(self, data):
3343 3342 insize = len(data)
3344 3343 # Caller handles empty input case.
3345 3344 assert insize > 0
3346 3345
3347 3346 if insize < 44:
3348 3347 return None
3349 3348
3350 3349 elif insize <= 1000000:
3351 3350 compressed = zlib.compress(data)
3352 3351 if len(compressed) < insize:
3353 3352 return compressed
3354 3353 return None
3355 3354
3356 3355 # zlib makes an internal copy of the input buffer, doubling
3357 3356 # memory usage for large inputs. So do streaming compression
3358 3357 # on large inputs.
3359 3358 else:
3360 3359 z = zlib.compressobj()
3361 3360 parts = []
3362 3361 pos = 0
3363 3362 while pos < insize:
3364 3363 pos2 = pos + 2**20
3365 3364 parts.append(z.compress(data[pos:pos2]))
3366 3365 pos = pos2
3367 3366 parts.append(z.flush())
3368 3367
3369 3368 if sum(map(len, parts)) < insize:
3370 3369 return ''.join(parts)
3371 3370 return None
3372 3371
3373 3372 def decompress(self, data):
3374 3373 try:
3375 3374 return zlib.decompress(data)
3376 3375 except zlib.error as e:
3377 3376 raise error.RevlogError(_('revlog decompress error: %s') %
3378 3377 str(e))
3379 3378
3380 3379 def revlogcompressor(self, opts=None):
3381 3380 return self.zlibrevlogcompressor()
3382 3381
3383 3382 compengines.register(_zlibengine())
3384 3383
3385 3384 class _bz2engine(compressionengine):
3386 3385 def name(self):
3387 3386 return 'bz2'
3388 3387
3389 3388 def bundletype(self):
3390 3389 return 'bzip2', 'BZ'
3391 3390
3392 3391 # We declare a protocol name but don't advertise by default because
3393 3392 # it is slow.
3394 3393 def wireprotosupport(self):
3395 3394 return compewireprotosupport('bzip2', 0, 0)
3396 3395
3397 3396 def compressstream(self, it, opts=None):
3398 3397 opts = opts or {}
3399 3398 z = bz2.BZ2Compressor(opts.get('level', 9))
3400 3399 for chunk in it:
3401 3400 data = z.compress(chunk)
3402 3401 if data:
3403 3402 yield data
3404 3403
3405 3404 yield z.flush()
3406 3405
3407 3406 def decompressorreader(self, fh):
3408 3407 def gen():
3409 3408 d = bz2.BZ2Decompressor()
3410 3409 for chunk in filechunkiter(fh):
3411 3410 yield d.decompress(chunk)
3412 3411
3413 3412 return chunkbuffer(gen())
3414 3413
3415 3414 compengines.register(_bz2engine())
3416 3415
3417 3416 class _truncatedbz2engine(compressionengine):
3418 3417 def name(self):
3419 3418 return 'bz2truncated'
3420 3419
3421 3420 def bundletype(self):
3422 3421 return None, '_truncatedBZ'
3423 3422
3424 3423 # We don't implement compressstream because it is hackily handled elsewhere.
3425 3424
3426 3425 def decompressorreader(self, fh):
3427 3426 def gen():
3428 3427 # The input stream doesn't have the 'BZ' header. So add it back.
3429 3428 d = bz2.BZ2Decompressor()
3430 3429 d.decompress('BZ')
3431 3430 for chunk in filechunkiter(fh):
3432 3431 yield d.decompress(chunk)
3433 3432
3434 3433 return chunkbuffer(gen())
3435 3434
3436 3435 compengines.register(_truncatedbz2engine())
3437 3436
3438 3437 class _noopengine(compressionengine):
3439 3438 def name(self):
3440 3439 return 'none'
3441 3440
3442 3441 def bundletype(self):
3443 3442 return 'none', 'UN'
3444 3443
3445 3444 # Clients always support uncompressed payloads. Servers don't because
3446 3445 # unless you are on a fast network, uncompressed payloads can easily
3447 3446 # saturate your network pipe.
3448 3447 def wireprotosupport(self):
3449 3448 return compewireprotosupport('none', 0, 10)
3450 3449
3451 3450 # We don't implement revlogheader because it is handled specially
3452 3451 # in the revlog class.
3453 3452
3454 3453 def compressstream(self, it, opts=None):
3455 3454 return it
3456 3455
3457 3456 def decompressorreader(self, fh):
3458 3457 return fh
3459 3458
3460 3459 class nooprevlogcompressor(object):
3461 3460 def compress(self, data):
3462 3461 return None
3463 3462
3464 3463 def revlogcompressor(self, opts=None):
3465 3464 return self.nooprevlogcompressor()
3466 3465
3467 3466 compengines.register(_noopengine())
3468 3467
3469 3468 class _zstdengine(compressionengine):
3470 3469 def name(self):
3471 3470 return 'zstd'
3472 3471
3473 3472 @propertycache
3474 3473 def _module(self):
3475 3474 # Not all installs have the zstd module available. So defer importing
3476 3475 # until first access.
3477 3476 try:
3478 3477 from . import zstd
3479 3478 # Force delayed import.
3480 3479 zstd.__version__
3481 3480 return zstd
3482 3481 except ImportError:
3483 3482 return None
3484 3483
3485 3484 def available(self):
3486 3485 return bool(self._module)
3487 3486
3488 3487 def bundletype(self):
3489 3488 return 'zstd', 'ZS'
3490 3489
3491 3490 def wireprotosupport(self):
3492 3491 return compewireprotosupport('zstd', 50, 50)
3493 3492
3494 3493 def revlogheader(self):
3495 3494 return '\x28'
3496 3495
3497 3496 def compressstream(self, it, opts=None):
3498 3497 opts = opts or {}
3499 3498 # zstd level 3 is almost always significantly faster than zlib
3500 3499 # while providing no worse compression. It strikes a good balance
3501 3500 # between speed and compression.
3502 3501 level = opts.get('level', 3)
3503 3502
3504 3503 zstd = self._module
3505 3504 z = zstd.ZstdCompressor(level=level).compressobj()
3506 3505 for chunk in it:
3507 3506 data = z.compress(chunk)
3508 3507 if data:
3509 3508 yield data
3510 3509
3511 3510 yield z.flush()
3512 3511
3513 3512 def decompressorreader(self, fh):
3514 3513 zstd = self._module
3515 3514 dctx = zstd.ZstdDecompressor()
3516 3515 return chunkbuffer(dctx.read_from(fh))
3517 3516
3518 3517 class zstdrevlogcompressor(object):
3519 3518 def __init__(self, zstd, level=3):
3520 3519 # Writing the content size adds a few bytes to the output. However,
3521 3520 # it allows decompression to be more optimal since we can
3522 3521 # pre-allocate a buffer to hold the result.
3523 3522 self._cctx = zstd.ZstdCompressor(level=level,
3524 3523 write_content_size=True)
3525 3524 self._dctx = zstd.ZstdDecompressor()
3526 3525 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3527 3526 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3528 3527
3529 3528 def compress(self, data):
3530 3529 insize = len(data)
3531 3530 # Caller handles empty input case.
3532 3531 assert insize > 0
3533 3532
3534 3533 if insize < 50:
3535 3534 return None
3536 3535
3537 3536 elif insize <= 1000000:
3538 3537 compressed = self._cctx.compress(data)
3539 3538 if len(compressed) < insize:
3540 3539 return compressed
3541 3540 return None
3542 3541 else:
3543 3542 z = self._cctx.compressobj()
3544 3543 chunks = []
3545 3544 pos = 0
3546 3545 while pos < insize:
3547 3546 pos2 = pos + self._compinsize
3548 3547 chunk = z.compress(data[pos:pos2])
3549 3548 if chunk:
3550 3549 chunks.append(chunk)
3551 3550 pos = pos2
3552 3551 chunks.append(z.flush())
3553 3552
3554 3553 if sum(map(len, chunks)) < insize:
3555 3554 return ''.join(chunks)
3556 3555 return None
3557 3556
3558 3557 def decompress(self, data):
3559 3558 insize = len(data)
3560 3559
3561 3560 try:
3562 3561 # This was measured to be faster than other streaming
3563 3562 # decompressors.
3564 3563 dobj = self._dctx.decompressobj()
3565 3564 chunks = []
3566 3565 pos = 0
3567 3566 while pos < insize:
3568 3567 pos2 = pos + self._decompinsize
3569 3568 chunk = dobj.decompress(data[pos:pos2])
3570 3569 if chunk:
3571 3570 chunks.append(chunk)
3572 3571 pos = pos2
3573 3572 # Frame should be exhausted, so no finish() API.
3574 3573
3575 3574 return ''.join(chunks)
3576 3575 except Exception as e:
3577 3576 raise error.RevlogError(_('revlog decompress error: %s') %
3578 3577 str(e))
3579 3578
3580 3579 def revlogcompressor(self, opts=None):
3581 3580 opts = opts or {}
3582 3581 return self.zstdrevlogcompressor(self._module,
3583 3582 level=opts.get('level', 3))
3584 3583
3585 3584 compengines.register(_zstdengine())
3586 3585
3587 3586 # convenient shortcut
3588 3587 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now