##// END OF EJS Templates
util: enable hardlink for some BSD-family filesystems...
Jun Wu -
r31598:32f09ead default
parent child Browse files
Show More
@@ -1,3606 +1,3609 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 username = platform.username
139 139
140 140 # Python compatibility
141 141
142 142 _notset = object()
143 143
144 144 # disable Python's problematic floating point timestamps (issue4836)
145 145 # (Python hypocritically says you shouldn't change this behavior in
146 146 # libraries, and sure enough Mercurial is not a library.)
147 147 os.stat_float_times(False)
148 148
149 149 def safehasattr(thing, attr):
150 150 return getattr(thing, attr, _notset) is not _notset
151 151
152 152 def bitsfrom(container):
153 153 bits = 0
154 154 for bit in container:
155 155 bits |= bit
156 156 return bits
157 157
158 158 DIGESTS = {
159 159 'md5': hashlib.md5,
160 160 'sha1': hashlib.sha1,
161 161 'sha512': hashlib.sha512,
162 162 }
163 163 # List of digest types from strongest to weakest
164 164 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
165 165
166 166 for k in DIGESTS_BY_STRENGTH:
167 167 assert k in DIGESTS
168 168
169 169 class digester(object):
170 170 """helper to compute digests.
171 171
172 172 This helper can be used to compute one or more digests given their name.
173 173
174 174 >>> d = digester(['md5', 'sha1'])
175 175 >>> d.update('foo')
176 176 >>> [k for k in sorted(d)]
177 177 ['md5', 'sha1']
178 178 >>> d['md5']
179 179 'acbd18db4cc2f85cedef654fccc4a4d8'
180 180 >>> d['sha1']
181 181 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
182 182 >>> digester.preferred(['md5', 'sha1'])
183 183 'sha1'
184 184 """
185 185
186 186 def __init__(self, digests, s=''):
187 187 self._hashes = {}
188 188 for k in digests:
189 189 if k not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 self._hashes[k] = DIGESTS[k]()
192 192 if s:
193 193 self.update(s)
194 194
195 195 def update(self, data):
196 196 for h in self._hashes.values():
197 197 h.update(data)
198 198
199 199 def __getitem__(self, key):
200 200 if key not in DIGESTS:
201 201 raise Abort(_('unknown digest type: %s') % k)
202 202 return self._hashes[key].hexdigest()
203 203
204 204 def __iter__(self):
205 205 return iter(self._hashes)
206 206
207 207 @staticmethod
208 208 def preferred(supported):
209 209 """returns the strongest digest type in both supported and DIGESTS."""
210 210
211 211 for k in DIGESTS_BY_STRENGTH:
212 212 if k in supported:
213 213 return k
214 214 return None
215 215
216 216 class digestchecker(object):
217 217 """file handle wrapper that additionally checks content against a given
218 218 size and digests.
219 219
220 220 d = digestchecker(fh, size, {'md5': '...'})
221 221
222 222 When multiple digests are given, all of them are validated.
223 223 """
224 224
225 225 def __init__(self, fh, size, digests):
226 226 self._fh = fh
227 227 self._size = size
228 228 self._got = 0
229 229 self._digests = dict(digests)
230 230 self._digester = digester(self._digests.keys())
231 231
232 232 def read(self, length=-1):
233 233 content = self._fh.read(length)
234 234 self._digester.update(content)
235 235 self._got += len(content)
236 236 return content
237 237
238 238 def validate(self):
239 239 if self._size != self._got:
240 240 raise Abort(_('size mismatch: expected %d, got %d') %
241 241 (self._size, self._got))
242 242 for k, v in self._digests.items():
243 243 if v != self._digester[k]:
244 244 # i18n: first parameter is a digest name
245 245 raise Abort(_('%s mismatch: expected %s, got %s') %
246 246 (k, v, self._digester[k]))
247 247
248 248 try:
249 249 buffer = buffer
250 250 except NameError:
251 251 if not pycompat.ispy3:
252 252 def buffer(sliceable, offset=0, length=None):
253 253 if length is not None:
254 254 return sliceable[offset:offset + length]
255 255 return sliceable[offset:]
256 256 else:
257 257 def buffer(sliceable, offset=0, length=None):
258 258 if length is not None:
259 259 return memoryview(sliceable)[offset:offset + length]
260 260 return memoryview(sliceable)[offset:]
261 261
262 262 closefds = pycompat.osname == 'posix'
263 263
264 264 _chunksize = 4096
265 265
266 266 class bufferedinputpipe(object):
267 267 """a manually buffered input pipe
268 268
269 269 Python will not let us use buffered IO and lazy reading with 'polling' at
270 270 the same time. We cannot probe the buffer state and select will not detect
271 271 that data are ready to read if they are already buffered.
272 272
273 273 This class let us work around that by implementing its own buffering
274 274 (allowing efficient readline) while offering a way to know if the buffer is
275 275 empty from the output (allowing collaboration of the buffer with polling).
276 276
277 277 This class lives in the 'util' module because it makes use of the 'os'
278 278 module from the python stdlib.
279 279 """
280 280
281 281 def __init__(self, input):
282 282 self._input = input
283 283 self._buffer = []
284 284 self._eof = False
285 285 self._lenbuf = 0
286 286
287 287 @property
288 288 def hasbuffer(self):
289 289 """True is any data is currently buffered
290 290
291 291 This will be used externally a pre-step for polling IO. If there is
292 292 already data then no polling should be set in place."""
293 293 return bool(self._buffer)
294 294
295 295 @property
296 296 def closed(self):
297 297 return self._input.closed
298 298
299 299 def fileno(self):
300 300 return self._input.fileno()
301 301
302 302 def close(self):
303 303 return self._input.close()
304 304
305 305 def read(self, size):
306 306 while (not self._eof) and (self._lenbuf < size):
307 307 self._fillbuffer()
308 308 return self._frombuffer(size)
309 309
310 310 def readline(self, *args, **kwargs):
311 311 if 1 < len(self._buffer):
312 312 # this should not happen because both read and readline end with a
313 313 # _frombuffer call that collapse it.
314 314 self._buffer = [''.join(self._buffer)]
315 315 self._lenbuf = len(self._buffer[0])
316 316 lfi = -1
317 317 if self._buffer:
318 318 lfi = self._buffer[-1].find('\n')
319 319 while (not self._eof) and lfi < 0:
320 320 self._fillbuffer()
321 321 if self._buffer:
322 322 lfi = self._buffer[-1].find('\n')
323 323 size = lfi + 1
324 324 if lfi < 0: # end of file
325 325 size = self._lenbuf
326 326 elif 1 < len(self._buffer):
327 327 # we need to take previous chunks into account
328 328 size += self._lenbuf - len(self._buffer[-1])
329 329 return self._frombuffer(size)
330 330
331 331 def _frombuffer(self, size):
332 332 """return at most 'size' data from the buffer
333 333
334 334 The data are removed from the buffer."""
335 335 if size == 0 or not self._buffer:
336 336 return ''
337 337 buf = self._buffer[0]
338 338 if 1 < len(self._buffer):
339 339 buf = ''.join(self._buffer)
340 340
341 341 data = buf[:size]
342 342 buf = buf[len(data):]
343 343 if buf:
344 344 self._buffer = [buf]
345 345 self._lenbuf = len(buf)
346 346 else:
347 347 self._buffer = []
348 348 self._lenbuf = 0
349 349 return data
350 350
351 351 def _fillbuffer(self):
352 352 """read data to the buffer"""
353 353 data = os.read(self._input.fileno(), _chunksize)
354 354 if not data:
355 355 self._eof = True
356 356 else:
357 357 self._lenbuf += len(data)
358 358 self._buffer.append(data)
359 359
360 360 def popen2(cmd, env=None, newlines=False):
361 361 # Setting bufsize to -1 lets the system decide the buffer size.
362 362 # The default for bufsize is 0, meaning unbuffered. This leads to
363 363 # poor performance on Mac OS X: http://bugs.python.org/issue4194
364 364 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
365 365 close_fds=closefds,
366 366 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
367 367 universal_newlines=newlines,
368 368 env=env)
369 369 return p.stdin, p.stdout
370 370
371 371 def popen3(cmd, env=None, newlines=False):
372 372 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
373 373 return stdin, stdout, stderr
374 374
375 375 def popen4(cmd, env=None, newlines=False, bufsize=-1):
376 376 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
377 377 close_fds=closefds,
378 378 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
379 379 stderr=subprocess.PIPE,
380 380 universal_newlines=newlines,
381 381 env=env)
382 382 return p.stdin, p.stdout, p.stderr, p
383 383
384 384 def version():
385 385 """Return version information if available."""
386 386 try:
387 387 from . import __version__
388 388 return __version__.version
389 389 except ImportError:
390 390 return 'unknown'
391 391
392 392 def versiontuple(v=None, n=4):
393 393 """Parses a Mercurial version string into an N-tuple.
394 394
395 395 The version string to be parsed is specified with the ``v`` argument.
396 396 If it isn't defined, the current Mercurial version string will be parsed.
397 397
398 398 ``n`` can be 2, 3, or 4. Here is how some version strings map to
399 399 returned values:
400 400
401 401 >>> v = '3.6.1+190-df9b73d2d444'
402 402 >>> versiontuple(v, 2)
403 403 (3, 6)
404 404 >>> versiontuple(v, 3)
405 405 (3, 6, 1)
406 406 >>> versiontuple(v, 4)
407 407 (3, 6, 1, '190-df9b73d2d444')
408 408
409 409 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
410 410 (3, 6, 1, '190-df9b73d2d444+20151118')
411 411
412 412 >>> v = '3.6'
413 413 >>> versiontuple(v, 2)
414 414 (3, 6)
415 415 >>> versiontuple(v, 3)
416 416 (3, 6, None)
417 417 >>> versiontuple(v, 4)
418 418 (3, 6, None, None)
419 419
420 420 >>> v = '3.9-rc'
421 421 >>> versiontuple(v, 2)
422 422 (3, 9)
423 423 >>> versiontuple(v, 3)
424 424 (3, 9, None)
425 425 >>> versiontuple(v, 4)
426 426 (3, 9, None, 'rc')
427 427
428 428 >>> v = '3.9-rc+2-02a8fea4289b'
429 429 >>> versiontuple(v, 2)
430 430 (3, 9)
431 431 >>> versiontuple(v, 3)
432 432 (3, 9, None)
433 433 >>> versiontuple(v, 4)
434 434 (3, 9, None, 'rc+2-02a8fea4289b')
435 435 """
436 436 if not v:
437 437 v = version()
438 438 parts = remod.split('[\+-]', v, 1)
439 439 if len(parts) == 1:
440 440 vparts, extra = parts[0], None
441 441 else:
442 442 vparts, extra = parts
443 443
444 444 vints = []
445 445 for i in vparts.split('.'):
446 446 try:
447 447 vints.append(int(i))
448 448 except ValueError:
449 449 break
450 450 # (3, 6) -> (3, 6, None)
451 451 while len(vints) < 3:
452 452 vints.append(None)
453 453
454 454 if n == 2:
455 455 return (vints[0], vints[1])
456 456 if n == 3:
457 457 return (vints[0], vints[1], vints[2])
458 458 if n == 4:
459 459 return (vints[0], vints[1], vints[2], extra)
460 460
461 461 # used by parsedate
462 462 defaultdateformats = (
463 463 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
464 464 '%Y-%m-%dT%H:%M', # without seconds
465 465 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
466 466 '%Y-%m-%dT%H%M', # without seconds
467 467 '%Y-%m-%d %H:%M:%S', # our common legal variant
468 468 '%Y-%m-%d %H:%M', # without seconds
469 469 '%Y-%m-%d %H%M%S', # without :
470 470 '%Y-%m-%d %H%M', # without seconds
471 471 '%Y-%m-%d %I:%M:%S%p',
472 472 '%Y-%m-%d %H:%M',
473 473 '%Y-%m-%d %I:%M%p',
474 474 '%Y-%m-%d',
475 475 '%m-%d',
476 476 '%m/%d',
477 477 '%m/%d/%y',
478 478 '%m/%d/%Y',
479 479 '%a %b %d %H:%M:%S %Y',
480 480 '%a %b %d %I:%M:%S%p %Y',
481 481 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
482 482 '%b %d %H:%M:%S %Y',
483 483 '%b %d %I:%M:%S%p %Y',
484 484 '%b %d %H:%M:%S',
485 485 '%b %d %I:%M:%S%p',
486 486 '%b %d %H:%M',
487 487 '%b %d %I:%M%p',
488 488 '%b %d %Y',
489 489 '%b %d',
490 490 '%H:%M:%S',
491 491 '%I:%M:%S%p',
492 492 '%H:%M',
493 493 '%I:%M%p',
494 494 )
495 495
496 496 extendeddateformats = defaultdateformats + (
497 497 "%Y",
498 498 "%Y-%m",
499 499 "%b",
500 500 "%b %Y",
501 501 )
502 502
503 503 def cachefunc(func):
504 504 '''cache the result of function calls'''
505 505 # XXX doesn't handle keywords args
506 506 if func.__code__.co_argcount == 0:
507 507 cache = []
508 508 def f():
509 509 if len(cache) == 0:
510 510 cache.append(func())
511 511 return cache[0]
512 512 return f
513 513 cache = {}
514 514 if func.__code__.co_argcount == 1:
515 515 # we gain a small amount of time because
516 516 # we don't need to pack/unpack the list
517 517 def f(arg):
518 518 if arg not in cache:
519 519 cache[arg] = func(arg)
520 520 return cache[arg]
521 521 else:
522 522 def f(*args):
523 523 if args not in cache:
524 524 cache[args] = func(*args)
525 525 return cache[args]
526 526
527 527 return f
528 528
529 529 class sortdict(dict):
530 530 '''a simple sorted dictionary'''
531 531 def __init__(self, data=None):
532 532 self._list = []
533 533 if data:
534 534 self.update(data)
535 535 def copy(self):
536 536 return sortdict(self)
537 537 def __setitem__(self, key, val):
538 538 if key in self:
539 539 self._list.remove(key)
540 540 self._list.append(key)
541 541 dict.__setitem__(self, key, val)
542 542 def __iter__(self):
543 543 return self._list.__iter__()
544 544 def update(self, src):
545 545 if isinstance(src, dict):
546 546 src = src.iteritems()
547 547 for k, v in src:
548 548 self[k] = v
549 549 def clear(self):
550 550 dict.clear(self)
551 551 self._list = []
552 552 def items(self):
553 553 return [(k, self[k]) for k in self._list]
554 554 def __delitem__(self, key):
555 555 dict.__delitem__(self, key)
556 556 self._list.remove(key)
557 557 def pop(self, key, *args, **kwargs):
558 558 dict.pop(self, key, *args, **kwargs)
559 559 try:
560 560 self._list.remove(key)
561 561 except ValueError:
562 562 pass
563 563 def keys(self):
564 564 return self._list[:]
565 565 def iterkeys(self):
566 566 return self._list.__iter__()
567 567 def iteritems(self):
568 568 for k in self._list:
569 569 yield k, self[k]
570 570 def insert(self, index, key, val):
571 571 self._list.insert(index, key)
572 572 dict.__setitem__(self, key, val)
573 573 def __repr__(self):
574 574 if not self:
575 575 return '%s()' % self.__class__.__name__
576 576 return '%s(%r)' % (self.__class__.__name__, self.items())
577 577
578 578 class _lrucachenode(object):
579 579 """A node in a doubly linked list.
580 580
581 581 Holds a reference to nodes on either side as well as a key-value
582 582 pair for the dictionary entry.
583 583 """
584 584 __slots__ = (u'next', u'prev', u'key', u'value')
585 585
586 586 def __init__(self):
587 587 self.next = None
588 588 self.prev = None
589 589
590 590 self.key = _notset
591 591 self.value = None
592 592
593 593 def markempty(self):
594 594 """Mark the node as emptied."""
595 595 self.key = _notset
596 596
597 597 class lrucachedict(object):
598 598 """Dict that caches most recent accesses and sets.
599 599
600 600 The dict consists of an actual backing dict - indexed by original
601 601 key - and a doubly linked circular list defining the order of entries in
602 602 the cache.
603 603
604 604 The head node is the newest entry in the cache. If the cache is full,
605 605 we recycle head.prev and make it the new head. Cache accesses result in
606 606 the node being moved to before the existing head and being marked as the
607 607 new head node.
608 608 """
609 609 def __init__(self, max):
610 610 self._cache = {}
611 611
612 612 self._head = head = _lrucachenode()
613 613 head.prev = head
614 614 head.next = head
615 615 self._size = 1
616 616 self._capacity = max
617 617
618 618 def __len__(self):
619 619 return len(self._cache)
620 620
621 621 def __contains__(self, k):
622 622 return k in self._cache
623 623
624 624 def __iter__(self):
625 625 # We don't have to iterate in cache order, but why not.
626 626 n = self._head
627 627 for i in range(len(self._cache)):
628 628 yield n.key
629 629 n = n.next
630 630
631 631 def __getitem__(self, k):
632 632 node = self._cache[k]
633 633 self._movetohead(node)
634 634 return node.value
635 635
636 636 def __setitem__(self, k, v):
637 637 node = self._cache.get(k)
638 638 # Replace existing value and mark as newest.
639 639 if node is not None:
640 640 node.value = v
641 641 self._movetohead(node)
642 642 return
643 643
644 644 if self._size < self._capacity:
645 645 node = self._addcapacity()
646 646 else:
647 647 # Grab the last/oldest item.
648 648 node = self._head.prev
649 649
650 650 # At capacity. Kill the old entry.
651 651 if node.key is not _notset:
652 652 del self._cache[node.key]
653 653
654 654 node.key = k
655 655 node.value = v
656 656 self._cache[k] = node
657 657 # And mark it as newest entry. No need to adjust order since it
658 658 # is already self._head.prev.
659 659 self._head = node
660 660
661 661 def __delitem__(self, k):
662 662 node = self._cache.pop(k)
663 663 node.markempty()
664 664
665 665 # Temporarily mark as newest item before re-adjusting head to make
666 666 # this node the oldest item.
667 667 self._movetohead(node)
668 668 self._head = node.next
669 669
670 670 # Additional dict methods.
671 671
672 672 def get(self, k, default=None):
673 673 try:
674 674 return self._cache[k].value
675 675 except KeyError:
676 676 return default
677 677
678 678 def clear(self):
679 679 n = self._head
680 680 while n.key is not _notset:
681 681 n.markempty()
682 682 n = n.next
683 683
684 684 self._cache.clear()
685 685
686 686 def copy(self):
687 687 result = lrucachedict(self._capacity)
688 688 n = self._head.prev
689 689 # Iterate in oldest-to-newest order, so the copy has the right ordering
690 690 for i in range(len(self._cache)):
691 691 result[n.key] = n.value
692 692 n = n.prev
693 693 return result
694 694
695 695 def _movetohead(self, node):
696 696 """Mark a node as the newest, making it the new head.
697 697
698 698 When a node is accessed, it becomes the freshest entry in the LRU
699 699 list, which is denoted by self._head.
700 700
701 701 Visually, let's make ``N`` the new head node (* denotes head):
702 702
703 703 previous/oldest <-> head <-> next/next newest
704 704
705 705 ----<->--- A* ---<->-----
706 706 | |
707 707 E <-> D <-> N <-> C <-> B
708 708
709 709 To:
710 710
711 711 ----<->--- N* ---<->-----
712 712 | |
713 713 E <-> D <-> C <-> B <-> A
714 714
715 715 This requires the following moves:
716 716
717 717 C.next = D (node.prev.next = node.next)
718 718 D.prev = C (node.next.prev = node.prev)
719 719 E.next = N (head.prev.next = node)
720 720 N.prev = E (node.prev = head.prev)
721 721 N.next = A (node.next = head)
722 722 A.prev = N (head.prev = node)
723 723 """
724 724 head = self._head
725 725 # C.next = D
726 726 node.prev.next = node.next
727 727 # D.prev = C
728 728 node.next.prev = node.prev
729 729 # N.prev = E
730 730 node.prev = head.prev
731 731 # N.next = A
732 732 # It is tempting to do just "head" here, however if node is
733 733 # adjacent to head, this will do bad things.
734 734 node.next = head.prev.next
735 735 # E.next = N
736 736 node.next.prev = node
737 737 # A.prev = N
738 738 node.prev.next = node
739 739
740 740 self._head = node
741 741
742 742 def _addcapacity(self):
743 743 """Add a node to the circular linked list.
744 744
745 745 The new node is inserted before the head node.
746 746 """
747 747 head = self._head
748 748 node = _lrucachenode()
749 749 head.prev.next = node
750 750 node.prev = head.prev
751 751 node.next = head
752 752 head.prev = node
753 753 self._size += 1
754 754 return node
755 755
756 756 def lrucachefunc(func):
757 757 '''cache most recent results of function calls'''
758 758 cache = {}
759 759 order = collections.deque()
760 760 if func.__code__.co_argcount == 1:
761 761 def f(arg):
762 762 if arg not in cache:
763 763 if len(cache) > 20:
764 764 del cache[order.popleft()]
765 765 cache[arg] = func(arg)
766 766 else:
767 767 order.remove(arg)
768 768 order.append(arg)
769 769 return cache[arg]
770 770 else:
771 771 def f(*args):
772 772 if args not in cache:
773 773 if len(cache) > 20:
774 774 del cache[order.popleft()]
775 775 cache[args] = func(*args)
776 776 else:
777 777 order.remove(args)
778 778 order.append(args)
779 779 return cache[args]
780 780
781 781 return f
782 782
783 783 class propertycache(object):
784 784 def __init__(self, func):
785 785 self.func = func
786 786 self.name = func.__name__
787 787 def __get__(self, obj, type=None):
788 788 result = self.func(obj)
789 789 self.cachevalue(obj, result)
790 790 return result
791 791
792 792 def cachevalue(self, obj, value):
793 793 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
794 794 obj.__dict__[self.name] = value
795 795
796 796 def pipefilter(s, cmd):
797 797 '''filter string S through command CMD, returning its output'''
798 798 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
799 799 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
800 800 pout, perr = p.communicate(s)
801 801 return pout
802 802
803 803 def tempfilter(s, cmd):
804 804 '''filter string S through a pair of temporary files with CMD.
805 805 CMD is used as a template to create the real command to be run,
806 806 with the strings INFILE and OUTFILE replaced by the real names of
807 807 the temporary files generated.'''
808 808 inname, outname = None, None
809 809 try:
810 810 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
811 811 fp = os.fdopen(infd, pycompat.sysstr('wb'))
812 812 fp.write(s)
813 813 fp.close()
814 814 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
815 815 os.close(outfd)
816 816 cmd = cmd.replace('INFILE', inname)
817 817 cmd = cmd.replace('OUTFILE', outname)
818 818 code = os.system(cmd)
819 819 if pycompat.sysplatform == 'OpenVMS' and code & 1:
820 820 code = 0
821 821 if code:
822 822 raise Abort(_("command '%s' failed: %s") %
823 823 (cmd, explainexit(code)))
824 824 return readfile(outname)
825 825 finally:
826 826 try:
827 827 if inname:
828 828 os.unlink(inname)
829 829 except OSError:
830 830 pass
831 831 try:
832 832 if outname:
833 833 os.unlink(outname)
834 834 except OSError:
835 835 pass
836 836
837 837 filtertable = {
838 838 'tempfile:': tempfilter,
839 839 'pipe:': pipefilter,
840 840 }
841 841
842 842 def filter(s, cmd):
843 843 "filter a string through a command that transforms its input to its output"
844 844 for name, fn in filtertable.iteritems():
845 845 if cmd.startswith(name):
846 846 return fn(s, cmd[len(name):].lstrip())
847 847 return pipefilter(s, cmd)
848 848
849 849 def binary(s):
850 850 """return true if a string is binary data"""
851 851 return bool(s and '\0' in s)
852 852
853 853 def increasingchunks(source, min=1024, max=65536):
854 854 '''return no less than min bytes per chunk while data remains,
855 855 doubling min after each chunk until it reaches max'''
856 856 def log2(x):
857 857 if not x:
858 858 return 0
859 859 i = 0
860 860 while x:
861 861 x >>= 1
862 862 i += 1
863 863 return i - 1
864 864
865 865 buf = []
866 866 blen = 0
867 867 for chunk in source:
868 868 buf.append(chunk)
869 869 blen += len(chunk)
870 870 if blen >= min:
871 871 if min < max:
872 872 min = min << 1
873 873 nmin = 1 << log2(blen)
874 874 if nmin > min:
875 875 min = nmin
876 876 if min > max:
877 877 min = max
878 878 yield ''.join(buf)
879 879 blen = 0
880 880 buf = []
881 881 if buf:
882 882 yield ''.join(buf)
883 883
884 884 Abort = error.Abort
885 885
886 886 def always(fn):
887 887 return True
888 888
889 889 def never(fn):
890 890 return False
891 891
892 892 def nogc(func):
893 893 """disable garbage collector
894 894
895 895 Python's garbage collector triggers a GC each time a certain number of
896 896 container objects (the number being defined by gc.get_threshold()) are
897 897 allocated even when marked not to be tracked by the collector. Tracking has
898 898 no effect on when GCs are triggered, only on what objects the GC looks
899 899 into. As a workaround, disable GC while building complex (huge)
900 900 containers.
901 901
902 902 This garbage collector issue have been fixed in 2.7.
903 903 """
904 904 if sys.version_info >= (2, 7):
905 905 return func
906 906 def wrapper(*args, **kwargs):
907 907 gcenabled = gc.isenabled()
908 908 gc.disable()
909 909 try:
910 910 return func(*args, **kwargs)
911 911 finally:
912 912 if gcenabled:
913 913 gc.enable()
914 914 return wrapper
915 915
916 916 def pathto(root, n1, n2):
917 917 '''return the relative path from one place to another.
918 918 root should use os.sep to separate directories
919 919 n1 should use os.sep to separate directories
920 920 n2 should use "/" to separate directories
921 921 returns an os.sep-separated path.
922 922
923 923 If n1 is a relative path, it's assumed it's
924 924 relative to root.
925 925 n2 should always be relative to root.
926 926 '''
927 927 if not n1:
928 928 return localpath(n2)
929 929 if os.path.isabs(n1):
930 930 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
931 931 return os.path.join(root, localpath(n2))
932 932 n2 = '/'.join((pconvert(root), n2))
933 933 a, b = splitpath(n1), n2.split('/')
934 934 a.reverse()
935 935 b.reverse()
936 936 while a and b and a[-1] == b[-1]:
937 937 a.pop()
938 938 b.pop()
939 939 b.reverse()
940 940 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
941 941
942 942 def mainfrozen():
943 943 """return True if we are a frozen executable.
944 944
945 945 The code supports py2exe (most common, Windows only) and tools/freeze
946 946 (portable, not much used).
947 947 """
948 948 return (safehasattr(sys, "frozen") or # new py2exe
949 949 safehasattr(sys, "importers") or # old py2exe
950 950 imp.is_frozen(u"__main__")) # tools/freeze
951 951
952 952 # the location of data files matching the source code
953 953 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
954 954 # executable version (py2exe) doesn't support __file__
955 955 datapath = os.path.dirname(pycompat.sysexecutable)
956 956 else:
957 957 datapath = os.path.dirname(pycompat.fsencode(__file__))
958 958
959 959 i18n.setdatapath(datapath)
960 960
961 961 _hgexecutable = None
962 962
963 963 def hgexecutable():
964 964 """return location of the 'hg' executable.
965 965
966 966 Defaults to $HG or 'hg' in the search path.
967 967 """
968 968 if _hgexecutable is None:
969 969 hg = encoding.environ.get('HG')
970 970 mainmod = sys.modules[pycompat.sysstr('__main__')]
971 971 if hg:
972 972 _sethgexecutable(hg)
973 973 elif mainfrozen():
974 974 if getattr(sys, 'frozen', None) == 'macosx_app':
975 975 # Env variable set by py2app
976 976 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
977 977 else:
978 978 _sethgexecutable(pycompat.sysexecutable)
979 979 elif (os.path.basename(
980 980 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
981 981 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
982 982 else:
983 983 exe = findexe('hg') or os.path.basename(sys.argv[0])
984 984 _sethgexecutable(exe)
985 985 return _hgexecutable
986 986
987 987 def _sethgexecutable(path):
988 988 """set location of the 'hg' executable"""
989 989 global _hgexecutable
990 990 _hgexecutable = path
991 991
992 992 def _isstdout(f):
993 993 fileno = getattr(f, 'fileno', None)
994 994 return fileno and fileno() == sys.__stdout__.fileno()
995 995
996 996 def shellenviron(environ=None):
997 997 """return environ with optional override, useful for shelling out"""
998 998 def py2shell(val):
999 999 'convert python object into string that is useful to shell'
1000 1000 if val is None or val is False:
1001 1001 return '0'
1002 1002 if val is True:
1003 1003 return '1'
1004 1004 return str(val)
1005 1005 env = dict(encoding.environ)
1006 1006 if environ:
1007 1007 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1008 1008 env['HG'] = hgexecutable()
1009 1009 return env
1010 1010
1011 1011 def system(cmd, environ=None, cwd=None, out=None):
1012 1012 '''enhanced shell command execution.
1013 1013 run with environment maybe modified, maybe in different dir.
1014 1014
1015 1015 if out is specified, it is assumed to be a file-like object that has a
1016 1016 write() method. stdout and stderr will be redirected to out.'''
1017 1017 try:
1018 1018 stdout.flush()
1019 1019 except Exception:
1020 1020 pass
1021 1021 cmd = quotecommand(cmd)
1022 1022 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1023 1023 and sys.version_info[1] < 7):
1024 1024 # subprocess kludge to work around issues in half-baked Python
1025 1025 # ports, notably bichued/python:
1026 1026 if not cwd is None:
1027 1027 os.chdir(cwd)
1028 1028 rc = os.system(cmd)
1029 1029 else:
1030 1030 env = shellenviron(environ)
1031 1031 if out is None or _isstdout(out):
1032 1032 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1033 1033 env=env, cwd=cwd)
1034 1034 else:
1035 1035 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1036 1036 env=env, cwd=cwd, stdout=subprocess.PIPE,
1037 1037 stderr=subprocess.STDOUT)
1038 1038 for line in iter(proc.stdout.readline, ''):
1039 1039 out.write(line)
1040 1040 proc.wait()
1041 1041 rc = proc.returncode
1042 1042 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1043 1043 rc = 0
1044 1044 return rc
1045 1045
1046 1046 def checksignature(func):
1047 1047 '''wrap a function with code to check for calling errors'''
1048 1048 def check(*args, **kwargs):
1049 1049 try:
1050 1050 return func(*args, **kwargs)
1051 1051 except TypeError:
1052 1052 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1053 1053 raise error.SignatureError
1054 1054 raise
1055 1055
1056 1056 return check
1057 1057
1058 1058 # a whilelist of known filesystems where hardlink works reliably
1059 1059 _hardlinkfswhitelist = set([
1060 1060 'btrfs',
1061 1061 'ext2',
1062 1062 'ext3',
1063 1063 'ext4',
1064 'hfs',
1064 1065 'jfs',
1065 1066 'reiserfs',
1066 1067 'tmpfs',
1068 'ufs',
1067 1069 'xfs',
1070 'zfs',
1068 1071 ])
1069 1072
1070 1073 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1071 1074 '''copy a file, preserving mode and optionally other stat info like
1072 1075 atime/mtime
1073 1076
1074 1077 checkambig argument is used with filestat, and is useful only if
1075 1078 destination file is guarded by any lock (e.g. repo.lock or
1076 1079 repo.wlock).
1077 1080
1078 1081 copystat and checkambig should be exclusive.
1079 1082 '''
1080 1083 assert not (copystat and checkambig)
1081 1084 oldstat = None
1082 1085 if os.path.lexists(dest):
1083 1086 if checkambig:
1084 1087 oldstat = checkambig and filestat(dest)
1085 1088 unlink(dest)
1086 1089 if hardlink:
1087 1090 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1088 1091 # unless we are confident that dest is on a whitelisted filesystem.
1089 1092 fstype = getfstype(os.path.dirname(dest))
1090 1093 if fstype not in _hardlinkfswhitelist:
1091 1094 hardlink = False
1092 1095 if hardlink:
1093 1096 try:
1094 1097 oslink(src, dest)
1095 1098 return
1096 1099 except (IOError, OSError):
1097 1100 pass # fall back to normal copy
1098 1101 if os.path.islink(src):
1099 1102 os.symlink(os.readlink(src), dest)
1100 1103 # copytime is ignored for symlinks, but in general copytime isn't needed
1101 1104 # for them anyway
1102 1105 else:
1103 1106 try:
1104 1107 shutil.copyfile(src, dest)
1105 1108 if copystat:
1106 1109 # copystat also copies mode
1107 1110 shutil.copystat(src, dest)
1108 1111 else:
1109 1112 shutil.copymode(src, dest)
1110 1113 if oldstat and oldstat.stat:
1111 1114 newstat = filestat(dest)
1112 1115 if newstat.isambig(oldstat):
1113 1116 # stat of copied file is ambiguous to original one
1114 1117 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1115 1118 os.utime(dest, (advanced, advanced))
1116 1119 except shutil.Error as inst:
1117 1120 raise Abort(str(inst))
1118 1121
1119 1122 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1120 1123 """Copy a directory tree using hardlinks if possible."""
1121 1124 num = 0
1122 1125
1123 1126 if hardlink is None:
1124 1127 hardlink = (os.stat(src).st_dev ==
1125 1128 os.stat(os.path.dirname(dst)).st_dev)
1126 1129 if hardlink:
1127 1130 topic = _('linking')
1128 1131 else:
1129 1132 topic = _('copying')
1130 1133
1131 1134 if os.path.isdir(src):
1132 1135 os.mkdir(dst)
1133 1136 for name, kind in osutil.listdir(src):
1134 1137 srcname = os.path.join(src, name)
1135 1138 dstname = os.path.join(dst, name)
1136 1139 def nprog(t, pos):
1137 1140 if pos is not None:
1138 1141 return progress(t, pos + num)
1139 1142 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1140 1143 num += n
1141 1144 else:
1142 1145 if hardlink:
1143 1146 try:
1144 1147 oslink(src, dst)
1145 1148 except (IOError, OSError):
1146 1149 hardlink = False
1147 1150 shutil.copy(src, dst)
1148 1151 else:
1149 1152 shutil.copy(src, dst)
1150 1153 num += 1
1151 1154 progress(topic, num)
1152 1155 progress(topic, None)
1153 1156
1154 1157 return hardlink, num
1155 1158
1156 1159 _winreservednames = '''con prn aux nul
1157 1160 com1 com2 com3 com4 com5 com6 com7 com8 com9
1158 1161 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1159 1162 _winreservedchars = ':*?"<>|'
1160 1163 def checkwinfilename(path):
1161 1164 r'''Check that the base-relative path is a valid filename on Windows.
1162 1165 Returns None if the path is ok, or a UI string describing the problem.
1163 1166
1164 1167 >>> checkwinfilename("just/a/normal/path")
1165 1168 >>> checkwinfilename("foo/bar/con.xml")
1166 1169 "filename contains 'con', which is reserved on Windows"
1167 1170 >>> checkwinfilename("foo/con.xml/bar")
1168 1171 "filename contains 'con', which is reserved on Windows"
1169 1172 >>> checkwinfilename("foo/bar/xml.con")
1170 1173 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1171 1174 "filename contains 'AUX', which is reserved on Windows"
1172 1175 >>> checkwinfilename("foo/bar/bla:.txt")
1173 1176 "filename contains ':', which is reserved on Windows"
1174 1177 >>> checkwinfilename("foo/bar/b\07la.txt")
1175 1178 "filename contains '\\x07', which is invalid on Windows"
1176 1179 >>> checkwinfilename("foo/bar/bla ")
1177 1180 "filename ends with ' ', which is not allowed on Windows"
1178 1181 >>> checkwinfilename("../bar")
1179 1182 >>> checkwinfilename("foo\\")
1180 1183 "filename ends with '\\', which is invalid on Windows"
1181 1184 >>> checkwinfilename("foo\\/bar")
1182 1185 "directory name ends with '\\', which is invalid on Windows"
1183 1186 '''
1184 1187 if path.endswith('\\'):
1185 1188 return _("filename ends with '\\', which is invalid on Windows")
1186 1189 if '\\/' in path:
1187 1190 return _("directory name ends with '\\', which is invalid on Windows")
1188 1191 for n in path.replace('\\', '/').split('/'):
1189 1192 if not n:
1190 1193 continue
1191 1194 for c in pycompat.bytestr(n):
1192 1195 if c in _winreservedchars:
1193 1196 return _("filename contains '%s', which is reserved "
1194 1197 "on Windows") % c
1195 1198 if ord(c) <= 31:
1196 1199 return _("filename contains %r, which is invalid "
1197 1200 "on Windows") % c
1198 1201 base = n.split('.')[0]
1199 1202 if base and base.lower() in _winreservednames:
1200 1203 return _("filename contains '%s', which is reserved "
1201 1204 "on Windows") % base
1202 1205 t = n[-1]
1203 1206 if t in '. ' and n not in '..':
1204 1207 return _("filename ends with '%s', which is not allowed "
1205 1208 "on Windows") % t
1206 1209
1207 1210 if pycompat.osname == 'nt':
1208 1211 checkosfilename = checkwinfilename
1209 1212 timer = time.clock
1210 1213 else:
1211 1214 checkosfilename = platform.checkosfilename
1212 1215 timer = time.time
1213 1216
1214 1217 if safehasattr(time, "perf_counter"):
1215 1218 timer = time.perf_counter
1216 1219
1217 1220 def makelock(info, pathname):
1218 1221 try:
1219 1222 return os.symlink(info, pathname)
1220 1223 except OSError as why:
1221 1224 if why.errno == errno.EEXIST:
1222 1225 raise
1223 1226 except AttributeError: # no symlink in os
1224 1227 pass
1225 1228
1226 1229 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1227 1230 os.write(ld, info)
1228 1231 os.close(ld)
1229 1232
1230 1233 def readlock(pathname):
1231 1234 try:
1232 1235 return os.readlink(pathname)
1233 1236 except OSError as why:
1234 1237 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1235 1238 raise
1236 1239 except AttributeError: # no symlink in os
1237 1240 pass
1238 1241 fp = posixfile(pathname)
1239 1242 r = fp.read()
1240 1243 fp.close()
1241 1244 return r
1242 1245
1243 1246 def fstat(fp):
1244 1247 '''stat file object that may not have fileno method.'''
1245 1248 try:
1246 1249 return os.fstat(fp.fileno())
1247 1250 except AttributeError:
1248 1251 return os.stat(fp.name)
1249 1252
1250 1253 # File system features
1251 1254
1252 1255 def fscasesensitive(path):
1253 1256 """
1254 1257 Return true if the given path is on a case-sensitive filesystem
1255 1258
1256 1259 Requires a path (like /foo/.hg) ending with a foldable final
1257 1260 directory component.
1258 1261 """
1259 1262 s1 = os.lstat(path)
1260 1263 d, b = os.path.split(path)
1261 1264 b2 = b.upper()
1262 1265 if b == b2:
1263 1266 b2 = b.lower()
1264 1267 if b == b2:
1265 1268 return True # no evidence against case sensitivity
1266 1269 p2 = os.path.join(d, b2)
1267 1270 try:
1268 1271 s2 = os.lstat(p2)
1269 1272 if s2 == s1:
1270 1273 return False
1271 1274 return True
1272 1275 except OSError:
1273 1276 return True
1274 1277
1275 1278 try:
1276 1279 import re2
1277 1280 _re2 = None
1278 1281 except ImportError:
1279 1282 _re2 = False
1280 1283
1281 1284 class _re(object):
1282 1285 def _checkre2(self):
1283 1286 global _re2
1284 1287 try:
1285 1288 # check if match works, see issue3964
1286 1289 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1287 1290 except ImportError:
1288 1291 _re2 = False
1289 1292
1290 1293 def compile(self, pat, flags=0):
1291 1294 '''Compile a regular expression, using re2 if possible
1292 1295
1293 1296 For best performance, use only re2-compatible regexp features. The
1294 1297 only flags from the re module that are re2-compatible are
1295 1298 IGNORECASE and MULTILINE.'''
1296 1299 if _re2 is None:
1297 1300 self._checkre2()
1298 1301 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1299 1302 if flags & remod.IGNORECASE:
1300 1303 pat = '(?i)' + pat
1301 1304 if flags & remod.MULTILINE:
1302 1305 pat = '(?m)' + pat
1303 1306 try:
1304 1307 return re2.compile(pat)
1305 1308 except re2.error:
1306 1309 pass
1307 1310 return remod.compile(pat, flags)
1308 1311
1309 1312 @propertycache
1310 1313 def escape(self):
1311 1314 '''Return the version of escape corresponding to self.compile.
1312 1315
1313 1316 This is imperfect because whether re2 or re is used for a particular
1314 1317 function depends on the flags, etc, but it's the best we can do.
1315 1318 '''
1316 1319 global _re2
1317 1320 if _re2 is None:
1318 1321 self._checkre2()
1319 1322 if _re2:
1320 1323 return re2.escape
1321 1324 else:
1322 1325 return remod.escape
1323 1326
1324 1327 re = _re()
1325 1328
1326 1329 _fspathcache = {}
1327 1330 def fspath(name, root):
1328 1331 '''Get name in the case stored in the filesystem
1329 1332
1330 1333 The name should be relative to root, and be normcase-ed for efficiency.
1331 1334
1332 1335 Note that this function is unnecessary, and should not be
1333 1336 called, for case-sensitive filesystems (simply because it's expensive).
1334 1337
1335 1338 The root should be normcase-ed, too.
1336 1339 '''
1337 1340 def _makefspathcacheentry(dir):
1338 1341 return dict((normcase(n), n) for n in os.listdir(dir))
1339 1342
1340 1343 seps = pycompat.ossep
1341 1344 if pycompat.osaltsep:
1342 1345 seps = seps + pycompat.osaltsep
1343 1346 # Protect backslashes. This gets silly very quickly.
1344 1347 seps.replace('\\','\\\\')
1345 1348 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1346 1349 dir = os.path.normpath(root)
1347 1350 result = []
1348 1351 for part, sep in pattern.findall(name):
1349 1352 if sep:
1350 1353 result.append(sep)
1351 1354 continue
1352 1355
1353 1356 if dir not in _fspathcache:
1354 1357 _fspathcache[dir] = _makefspathcacheentry(dir)
1355 1358 contents = _fspathcache[dir]
1356 1359
1357 1360 found = contents.get(part)
1358 1361 if not found:
1359 1362 # retry "once per directory" per "dirstate.walk" which
1360 1363 # may take place for each patches of "hg qpush", for example
1361 1364 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1362 1365 found = contents.get(part)
1363 1366
1364 1367 result.append(found or part)
1365 1368 dir = os.path.join(dir, part)
1366 1369
1367 1370 return ''.join(result)
1368 1371
1369 1372 def getfstype(dirpath):
1370 1373 '''Get the filesystem type name from a directory (best-effort)
1371 1374
1372 1375 Returns None if we are unsure, or errors like ENOENT, EPERM happen.
1373 1376 '''
1374 1377 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1375 1378
1376 1379 def checknlink(testfile):
1377 1380 '''check whether hardlink count reporting works properly'''
1378 1381
1379 1382 # testfile may be open, so we need a separate file for checking to
1380 1383 # work around issue2543 (or testfile may get lost on Samba shares)
1381 1384 f1 = testfile + ".hgtmp1"
1382 1385 if os.path.lexists(f1):
1383 1386 return False
1384 1387 try:
1385 1388 posixfile(f1, 'w').close()
1386 1389 except IOError:
1387 1390 try:
1388 1391 os.unlink(f1)
1389 1392 except OSError:
1390 1393 pass
1391 1394 return False
1392 1395
1393 1396 f2 = testfile + ".hgtmp2"
1394 1397 fd = None
1395 1398 try:
1396 1399 oslink(f1, f2)
1397 1400 # nlinks() may behave differently for files on Windows shares if
1398 1401 # the file is open.
1399 1402 fd = posixfile(f2)
1400 1403 return nlinks(f2) > 1
1401 1404 except OSError:
1402 1405 return False
1403 1406 finally:
1404 1407 if fd is not None:
1405 1408 fd.close()
1406 1409 for f in (f1, f2):
1407 1410 try:
1408 1411 os.unlink(f)
1409 1412 except OSError:
1410 1413 pass
1411 1414
1412 1415 def endswithsep(path):
1413 1416 '''Check path ends with os.sep or os.altsep.'''
1414 1417 return (path.endswith(pycompat.ossep)
1415 1418 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1416 1419
1417 1420 def splitpath(path):
1418 1421 '''Split path by os.sep.
1419 1422 Note that this function does not use os.altsep because this is
1420 1423 an alternative of simple "xxx.split(os.sep)".
1421 1424 It is recommended to use os.path.normpath() before using this
1422 1425 function if need.'''
1423 1426 return path.split(pycompat.ossep)
1424 1427
1425 1428 def gui():
1426 1429 '''Are we running in a GUI?'''
1427 1430 if pycompat.sysplatform == 'darwin':
1428 1431 if 'SSH_CONNECTION' in encoding.environ:
1429 1432 # handle SSH access to a box where the user is logged in
1430 1433 return False
1431 1434 elif getattr(osutil, 'isgui', None):
1432 1435 # check if a CoreGraphics session is available
1433 1436 return osutil.isgui()
1434 1437 else:
1435 1438 # pure build; use a safe default
1436 1439 return True
1437 1440 else:
1438 1441 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1439 1442
1440 1443 def mktempcopy(name, emptyok=False, createmode=None):
1441 1444 """Create a temporary file with the same contents from name
1442 1445
1443 1446 The permission bits are copied from the original file.
1444 1447
1445 1448 If the temporary file is going to be truncated immediately, you
1446 1449 can use emptyok=True as an optimization.
1447 1450
1448 1451 Returns the name of the temporary file.
1449 1452 """
1450 1453 d, fn = os.path.split(name)
1451 1454 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1452 1455 os.close(fd)
1453 1456 # Temporary files are created with mode 0600, which is usually not
1454 1457 # what we want. If the original file already exists, just copy
1455 1458 # its mode. Otherwise, manually obey umask.
1456 1459 copymode(name, temp, createmode)
1457 1460 if emptyok:
1458 1461 return temp
1459 1462 try:
1460 1463 try:
1461 1464 ifp = posixfile(name, "rb")
1462 1465 except IOError as inst:
1463 1466 if inst.errno == errno.ENOENT:
1464 1467 return temp
1465 1468 if not getattr(inst, 'filename', None):
1466 1469 inst.filename = name
1467 1470 raise
1468 1471 ofp = posixfile(temp, "wb")
1469 1472 for chunk in filechunkiter(ifp):
1470 1473 ofp.write(chunk)
1471 1474 ifp.close()
1472 1475 ofp.close()
1473 1476 except: # re-raises
1474 1477 try: os.unlink(temp)
1475 1478 except OSError: pass
1476 1479 raise
1477 1480 return temp
1478 1481
1479 1482 class filestat(object):
1480 1483 """help to exactly detect change of a file
1481 1484
1482 1485 'stat' attribute is result of 'os.stat()' if specified 'path'
1483 1486 exists. Otherwise, it is None. This can avoid preparative
1484 1487 'exists()' examination on client side of this class.
1485 1488 """
1486 1489 def __init__(self, path):
1487 1490 try:
1488 1491 self.stat = os.stat(path)
1489 1492 except OSError as err:
1490 1493 if err.errno != errno.ENOENT:
1491 1494 raise
1492 1495 self.stat = None
1493 1496
1494 1497 __hash__ = object.__hash__
1495 1498
1496 1499 def __eq__(self, old):
1497 1500 try:
1498 1501 # if ambiguity between stat of new and old file is
1499 1502 # avoided, comparison of size, ctime and mtime is enough
1500 1503 # to exactly detect change of a file regardless of platform
1501 1504 return (self.stat.st_size == old.stat.st_size and
1502 1505 self.stat.st_ctime == old.stat.st_ctime and
1503 1506 self.stat.st_mtime == old.stat.st_mtime)
1504 1507 except AttributeError:
1505 1508 return False
1506 1509
1507 1510 def isambig(self, old):
1508 1511 """Examine whether new (= self) stat is ambiguous against old one
1509 1512
1510 1513 "S[N]" below means stat of a file at N-th change:
1511 1514
1512 1515 - S[n-1].ctime < S[n].ctime: can detect change of a file
1513 1516 - S[n-1].ctime == S[n].ctime
1514 1517 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1515 1518 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1516 1519 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1517 1520 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1518 1521
1519 1522 Case (*2) above means that a file was changed twice or more at
1520 1523 same time in sec (= S[n-1].ctime), and comparison of timestamp
1521 1524 is ambiguous.
1522 1525
1523 1526 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1524 1527 timestamp is ambiguous".
1525 1528
1526 1529 But advancing mtime only in case (*2) doesn't work as
1527 1530 expected, because naturally advanced S[n].mtime in case (*1)
1528 1531 might be equal to manually advanced S[n-1 or earlier].mtime.
1529 1532
1530 1533 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1531 1534 treated as ambiguous regardless of mtime, to avoid overlooking
1532 1535 by confliction between such mtime.
1533 1536
1534 1537 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1535 1538 S[n].mtime", even if size of a file isn't changed.
1536 1539 """
1537 1540 try:
1538 1541 return (self.stat.st_ctime == old.stat.st_ctime)
1539 1542 except AttributeError:
1540 1543 return False
1541 1544
1542 1545 def avoidambig(self, path, old):
1543 1546 """Change file stat of specified path to avoid ambiguity
1544 1547
1545 1548 'old' should be previous filestat of 'path'.
1546 1549
1547 1550 This skips avoiding ambiguity, if a process doesn't have
1548 1551 appropriate privileges for 'path'.
1549 1552 """
1550 1553 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1551 1554 try:
1552 1555 os.utime(path, (advanced, advanced))
1553 1556 except OSError as inst:
1554 1557 if inst.errno == errno.EPERM:
1555 1558 # utime() on the file created by another user causes EPERM,
1556 1559 # if a process doesn't have appropriate privileges
1557 1560 return
1558 1561 raise
1559 1562
1560 1563 def __ne__(self, other):
1561 1564 return not self == other
1562 1565
1563 1566 class atomictempfile(object):
1564 1567 '''writable file object that atomically updates a file
1565 1568
1566 1569 All writes will go to a temporary copy of the original file. Call
1567 1570 close() when you are done writing, and atomictempfile will rename
1568 1571 the temporary copy to the original name, making the changes
1569 1572 visible. If the object is destroyed without being closed, all your
1570 1573 writes are discarded.
1571 1574
1572 1575 checkambig argument of constructor is used with filestat, and is
1573 1576 useful only if target file is guarded by any lock (e.g. repo.lock
1574 1577 or repo.wlock).
1575 1578 '''
1576 1579 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1577 1580 self.__name = name # permanent name
1578 1581 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1579 1582 createmode=createmode)
1580 1583 self._fp = posixfile(self._tempname, mode)
1581 1584 self._checkambig = checkambig
1582 1585
1583 1586 # delegated methods
1584 1587 self.read = self._fp.read
1585 1588 self.write = self._fp.write
1586 1589 self.seek = self._fp.seek
1587 1590 self.tell = self._fp.tell
1588 1591 self.fileno = self._fp.fileno
1589 1592
1590 1593 def close(self):
1591 1594 if not self._fp.closed:
1592 1595 self._fp.close()
1593 1596 filename = localpath(self.__name)
1594 1597 oldstat = self._checkambig and filestat(filename)
1595 1598 if oldstat and oldstat.stat:
1596 1599 rename(self._tempname, filename)
1597 1600 newstat = filestat(filename)
1598 1601 if newstat.isambig(oldstat):
1599 1602 # stat of changed file is ambiguous to original one
1600 1603 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1601 1604 os.utime(filename, (advanced, advanced))
1602 1605 else:
1603 1606 rename(self._tempname, filename)
1604 1607
1605 1608 def discard(self):
1606 1609 if not self._fp.closed:
1607 1610 try:
1608 1611 os.unlink(self._tempname)
1609 1612 except OSError:
1610 1613 pass
1611 1614 self._fp.close()
1612 1615
1613 1616 def __del__(self):
1614 1617 if safehasattr(self, '_fp'): # constructor actually did something
1615 1618 self.discard()
1616 1619
1617 1620 def __enter__(self):
1618 1621 return self
1619 1622
1620 1623 def __exit__(self, exctype, excvalue, traceback):
1621 1624 if exctype is not None:
1622 1625 self.discard()
1623 1626 else:
1624 1627 self.close()
1625 1628
1626 1629 def unlinkpath(f, ignoremissing=False):
1627 1630 """unlink and remove the directory if it is empty"""
1628 1631 if ignoremissing:
1629 1632 tryunlink(f)
1630 1633 else:
1631 1634 unlink(f)
1632 1635 # try removing directories that might now be empty
1633 1636 try:
1634 1637 removedirs(os.path.dirname(f))
1635 1638 except OSError:
1636 1639 pass
1637 1640
1638 1641 def tryunlink(f):
1639 1642 """Attempt to remove a file, ignoring ENOENT errors."""
1640 1643 try:
1641 1644 unlink(f)
1642 1645 except OSError as e:
1643 1646 if e.errno != errno.ENOENT:
1644 1647 raise
1645 1648
1646 1649 def makedirs(name, mode=None, notindexed=False):
1647 1650 """recursive directory creation with parent mode inheritance
1648 1651
1649 1652 Newly created directories are marked as "not to be indexed by
1650 1653 the content indexing service", if ``notindexed`` is specified
1651 1654 for "write" mode access.
1652 1655 """
1653 1656 try:
1654 1657 makedir(name, notindexed)
1655 1658 except OSError as err:
1656 1659 if err.errno == errno.EEXIST:
1657 1660 return
1658 1661 if err.errno != errno.ENOENT or not name:
1659 1662 raise
1660 1663 parent = os.path.dirname(os.path.abspath(name))
1661 1664 if parent == name:
1662 1665 raise
1663 1666 makedirs(parent, mode, notindexed)
1664 1667 try:
1665 1668 makedir(name, notindexed)
1666 1669 except OSError as err:
1667 1670 # Catch EEXIST to handle races
1668 1671 if err.errno == errno.EEXIST:
1669 1672 return
1670 1673 raise
1671 1674 if mode is not None:
1672 1675 os.chmod(name, mode)
1673 1676
1674 1677 def readfile(path):
1675 1678 with open(path, 'rb') as fp:
1676 1679 return fp.read()
1677 1680
1678 1681 def writefile(path, text):
1679 1682 with open(path, 'wb') as fp:
1680 1683 fp.write(text)
1681 1684
1682 1685 def appendfile(path, text):
1683 1686 with open(path, 'ab') as fp:
1684 1687 fp.write(text)
1685 1688
1686 1689 class chunkbuffer(object):
1687 1690 """Allow arbitrary sized chunks of data to be efficiently read from an
1688 1691 iterator over chunks of arbitrary size."""
1689 1692
1690 1693 def __init__(self, in_iter):
1691 1694 """in_iter is the iterator that's iterating over the input chunks.
1692 1695 targetsize is how big a buffer to try to maintain."""
1693 1696 def splitbig(chunks):
1694 1697 for chunk in chunks:
1695 1698 if len(chunk) > 2**20:
1696 1699 pos = 0
1697 1700 while pos < len(chunk):
1698 1701 end = pos + 2 ** 18
1699 1702 yield chunk[pos:end]
1700 1703 pos = end
1701 1704 else:
1702 1705 yield chunk
1703 1706 self.iter = splitbig(in_iter)
1704 1707 self._queue = collections.deque()
1705 1708 self._chunkoffset = 0
1706 1709
1707 1710 def read(self, l=None):
1708 1711 """Read L bytes of data from the iterator of chunks of data.
1709 1712 Returns less than L bytes if the iterator runs dry.
1710 1713
1711 1714 If size parameter is omitted, read everything"""
1712 1715 if l is None:
1713 1716 return ''.join(self.iter)
1714 1717
1715 1718 left = l
1716 1719 buf = []
1717 1720 queue = self._queue
1718 1721 while left > 0:
1719 1722 # refill the queue
1720 1723 if not queue:
1721 1724 target = 2**18
1722 1725 for chunk in self.iter:
1723 1726 queue.append(chunk)
1724 1727 target -= len(chunk)
1725 1728 if target <= 0:
1726 1729 break
1727 1730 if not queue:
1728 1731 break
1729 1732
1730 1733 # The easy way to do this would be to queue.popleft(), modify the
1731 1734 # chunk (if necessary), then queue.appendleft(). However, for cases
1732 1735 # where we read partial chunk content, this incurs 2 dequeue
1733 1736 # mutations and creates a new str for the remaining chunk in the
1734 1737 # queue. Our code below avoids this overhead.
1735 1738
1736 1739 chunk = queue[0]
1737 1740 chunkl = len(chunk)
1738 1741 offset = self._chunkoffset
1739 1742
1740 1743 # Use full chunk.
1741 1744 if offset == 0 and left >= chunkl:
1742 1745 left -= chunkl
1743 1746 queue.popleft()
1744 1747 buf.append(chunk)
1745 1748 # self._chunkoffset remains at 0.
1746 1749 continue
1747 1750
1748 1751 chunkremaining = chunkl - offset
1749 1752
1750 1753 # Use all of unconsumed part of chunk.
1751 1754 if left >= chunkremaining:
1752 1755 left -= chunkremaining
1753 1756 queue.popleft()
1754 1757 # offset == 0 is enabled by block above, so this won't merely
1755 1758 # copy via ``chunk[0:]``.
1756 1759 buf.append(chunk[offset:])
1757 1760 self._chunkoffset = 0
1758 1761
1759 1762 # Partial chunk needed.
1760 1763 else:
1761 1764 buf.append(chunk[offset:offset + left])
1762 1765 self._chunkoffset += left
1763 1766 left -= chunkremaining
1764 1767
1765 1768 return ''.join(buf)
1766 1769
1767 1770 def filechunkiter(f, size=131072, limit=None):
1768 1771 """Create a generator that produces the data in the file size
1769 1772 (default 131072) bytes at a time, up to optional limit (default is
1770 1773 to read all data). Chunks may be less than size bytes if the
1771 1774 chunk is the last chunk in the file, or the file is a socket or
1772 1775 some other type of file that sometimes reads less data than is
1773 1776 requested."""
1774 1777 assert size >= 0
1775 1778 assert limit is None or limit >= 0
1776 1779 while True:
1777 1780 if limit is None:
1778 1781 nbytes = size
1779 1782 else:
1780 1783 nbytes = min(limit, size)
1781 1784 s = nbytes and f.read(nbytes)
1782 1785 if not s:
1783 1786 break
1784 1787 if limit:
1785 1788 limit -= len(s)
1786 1789 yield s
1787 1790
1788 1791 def makedate(timestamp=None):
1789 1792 '''Return a unix timestamp (or the current time) as a (unixtime,
1790 1793 offset) tuple based off the local timezone.'''
1791 1794 if timestamp is None:
1792 1795 timestamp = time.time()
1793 1796 if timestamp < 0:
1794 1797 hint = _("check your clock")
1795 1798 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1796 1799 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1797 1800 datetime.datetime.fromtimestamp(timestamp))
1798 1801 tz = delta.days * 86400 + delta.seconds
1799 1802 return timestamp, tz
1800 1803
1801 1804 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1802 1805 """represent a (unixtime, offset) tuple as a localized time.
1803 1806 unixtime is seconds since the epoch, and offset is the time zone's
1804 1807 number of seconds away from UTC.
1805 1808
1806 1809 >>> datestr((0, 0))
1807 1810 'Thu Jan 01 00:00:00 1970 +0000'
1808 1811 >>> datestr((42, 0))
1809 1812 'Thu Jan 01 00:00:42 1970 +0000'
1810 1813 >>> datestr((-42, 0))
1811 1814 'Wed Dec 31 23:59:18 1969 +0000'
1812 1815 >>> datestr((0x7fffffff, 0))
1813 1816 'Tue Jan 19 03:14:07 2038 +0000'
1814 1817 >>> datestr((-0x80000000, 0))
1815 1818 'Fri Dec 13 20:45:52 1901 +0000'
1816 1819 """
1817 1820 t, tz = date or makedate()
1818 1821 if "%1" in format or "%2" in format or "%z" in format:
1819 1822 sign = (tz > 0) and "-" or "+"
1820 1823 minutes = abs(tz) // 60
1821 1824 q, r = divmod(minutes, 60)
1822 1825 format = format.replace("%z", "%1%2")
1823 1826 format = format.replace("%1", "%c%02d" % (sign, q))
1824 1827 format = format.replace("%2", "%02d" % r)
1825 1828 d = t - tz
1826 1829 if d > 0x7fffffff:
1827 1830 d = 0x7fffffff
1828 1831 elif d < -0x80000000:
1829 1832 d = -0x80000000
1830 1833 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1831 1834 # because they use the gmtime() system call which is buggy on Windows
1832 1835 # for negative values.
1833 1836 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1834 1837 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1835 1838 return s
1836 1839
1837 1840 def shortdate(date=None):
1838 1841 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1839 1842 return datestr(date, format='%Y-%m-%d')
1840 1843
1841 1844 def parsetimezone(s):
1842 1845 """find a trailing timezone, if any, in string, and return a
1843 1846 (offset, remainder) pair"""
1844 1847
1845 1848 if s.endswith("GMT") or s.endswith("UTC"):
1846 1849 return 0, s[:-3].rstrip()
1847 1850
1848 1851 # Unix-style timezones [+-]hhmm
1849 1852 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1850 1853 sign = (s[-5] == "+") and 1 or -1
1851 1854 hours = int(s[-4:-2])
1852 1855 minutes = int(s[-2:])
1853 1856 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1854 1857
1855 1858 # ISO8601 trailing Z
1856 1859 if s.endswith("Z") and s[-2:-1].isdigit():
1857 1860 return 0, s[:-1]
1858 1861
1859 1862 # ISO8601-style [+-]hh:mm
1860 1863 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1861 1864 s[-5:-3].isdigit() and s[-2:].isdigit()):
1862 1865 sign = (s[-6] == "+") and 1 or -1
1863 1866 hours = int(s[-5:-3])
1864 1867 minutes = int(s[-2:])
1865 1868 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1866 1869
1867 1870 return None, s
1868 1871
1869 1872 def strdate(string, format, defaults=None):
1870 1873 """parse a localized time string and return a (unixtime, offset) tuple.
1871 1874 if the string cannot be parsed, ValueError is raised."""
1872 1875 if defaults is None:
1873 1876 defaults = {}
1874 1877
1875 1878 # NOTE: unixtime = localunixtime + offset
1876 1879 offset, date = parsetimezone(string)
1877 1880
1878 1881 # add missing elements from defaults
1879 1882 usenow = False # default to using biased defaults
1880 1883 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1881 1884 found = [True for p in part if ("%"+p) in format]
1882 1885 if not found:
1883 1886 date += "@" + defaults[part][usenow]
1884 1887 format += "@%" + part[0]
1885 1888 else:
1886 1889 # We've found a specific time element, less specific time
1887 1890 # elements are relative to today
1888 1891 usenow = True
1889 1892
1890 1893 timetuple = time.strptime(date, format)
1891 1894 localunixtime = int(calendar.timegm(timetuple))
1892 1895 if offset is None:
1893 1896 # local timezone
1894 1897 unixtime = int(time.mktime(timetuple))
1895 1898 offset = unixtime - localunixtime
1896 1899 else:
1897 1900 unixtime = localunixtime + offset
1898 1901 return unixtime, offset
1899 1902
1900 1903 def parsedate(date, formats=None, bias=None):
1901 1904 """parse a localized date/time and return a (unixtime, offset) tuple.
1902 1905
1903 1906 The date may be a "unixtime offset" string or in one of the specified
1904 1907 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1905 1908
1906 1909 >>> parsedate(' today ') == parsedate(\
1907 1910 datetime.date.today().strftime('%b %d'))
1908 1911 True
1909 1912 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1910 1913 datetime.timedelta(days=1)\
1911 1914 ).strftime('%b %d'))
1912 1915 True
1913 1916 >>> now, tz = makedate()
1914 1917 >>> strnow, strtz = parsedate('now')
1915 1918 >>> (strnow - now) < 1
1916 1919 True
1917 1920 >>> tz == strtz
1918 1921 True
1919 1922 """
1920 1923 if bias is None:
1921 1924 bias = {}
1922 1925 if not date:
1923 1926 return 0, 0
1924 1927 if isinstance(date, tuple) and len(date) == 2:
1925 1928 return date
1926 1929 if not formats:
1927 1930 formats = defaultdateformats
1928 1931 date = date.strip()
1929 1932
1930 1933 if date == 'now' or date == _('now'):
1931 1934 return makedate()
1932 1935 if date == 'today' or date == _('today'):
1933 1936 date = datetime.date.today().strftime('%b %d')
1934 1937 elif date == 'yesterday' or date == _('yesterday'):
1935 1938 date = (datetime.date.today() -
1936 1939 datetime.timedelta(days=1)).strftime('%b %d')
1937 1940
1938 1941 try:
1939 1942 when, offset = map(int, date.split(' '))
1940 1943 except ValueError:
1941 1944 # fill out defaults
1942 1945 now = makedate()
1943 1946 defaults = {}
1944 1947 for part in ("d", "mb", "yY", "HI", "M", "S"):
1945 1948 # this piece is for rounding the specific end of unknowns
1946 1949 b = bias.get(part)
1947 1950 if b is None:
1948 1951 if part[0] in "HMS":
1949 1952 b = "00"
1950 1953 else:
1951 1954 b = "0"
1952 1955
1953 1956 # this piece is for matching the generic end to today's date
1954 1957 n = datestr(now, "%" + part[0])
1955 1958
1956 1959 defaults[part] = (b, n)
1957 1960
1958 1961 for format in formats:
1959 1962 try:
1960 1963 when, offset = strdate(date, format, defaults)
1961 1964 except (ValueError, OverflowError):
1962 1965 pass
1963 1966 else:
1964 1967 break
1965 1968 else:
1966 1969 raise Abort(_('invalid date: %r') % date)
1967 1970 # validate explicit (probably user-specified) date and
1968 1971 # time zone offset. values must fit in signed 32 bits for
1969 1972 # current 32-bit linux runtimes. timezones go from UTC-12
1970 1973 # to UTC+14
1971 1974 if when < -0x80000000 or when > 0x7fffffff:
1972 1975 raise Abort(_('date exceeds 32 bits: %d') % when)
1973 1976 if offset < -50400 or offset > 43200:
1974 1977 raise Abort(_('impossible time zone offset: %d') % offset)
1975 1978 return when, offset
1976 1979
1977 1980 def matchdate(date):
1978 1981 """Return a function that matches a given date match specifier
1979 1982
1980 1983 Formats include:
1981 1984
1982 1985 '{date}' match a given date to the accuracy provided
1983 1986
1984 1987 '<{date}' on or before a given date
1985 1988
1986 1989 '>{date}' on or after a given date
1987 1990
1988 1991 >>> p1 = parsedate("10:29:59")
1989 1992 >>> p2 = parsedate("10:30:00")
1990 1993 >>> p3 = parsedate("10:30:59")
1991 1994 >>> p4 = parsedate("10:31:00")
1992 1995 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1993 1996 >>> f = matchdate("10:30")
1994 1997 >>> f(p1[0])
1995 1998 False
1996 1999 >>> f(p2[0])
1997 2000 True
1998 2001 >>> f(p3[0])
1999 2002 True
2000 2003 >>> f(p4[0])
2001 2004 False
2002 2005 >>> f(p5[0])
2003 2006 False
2004 2007 """
2005 2008
2006 2009 def lower(date):
2007 2010 d = {'mb': "1", 'd': "1"}
2008 2011 return parsedate(date, extendeddateformats, d)[0]
2009 2012
2010 2013 def upper(date):
2011 2014 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2012 2015 for days in ("31", "30", "29"):
2013 2016 try:
2014 2017 d["d"] = days
2015 2018 return parsedate(date, extendeddateformats, d)[0]
2016 2019 except Abort:
2017 2020 pass
2018 2021 d["d"] = "28"
2019 2022 return parsedate(date, extendeddateformats, d)[0]
2020 2023
2021 2024 date = date.strip()
2022 2025
2023 2026 if not date:
2024 2027 raise Abort(_("dates cannot consist entirely of whitespace"))
2025 2028 elif date[0] == "<":
2026 2029 if not date[1:]:
2027 2030 raise Abort(_("invalid day spec, use '<DATE'"))
2028 2031 when = upper(date[1:])
2029 2032 return lambda x: x <= when
2030 2033 elif date[0] == ">":
2031 2034 if not date[1:]:
2032 2035 raise Abort(_("invalid day spec, use '>DATE'"))
2033 2036 when = lower(date[1:])
2034 2037 return lambda x: x >= when
2035 2038 elif date[0] == "-":
2036 2039 try:
2037 2040 days = int(date[1:])
2038 2041 except ValueError:
2039 2042 raise Abort(_("invalid day spec: %s") % date[1:])
2040 2043 if days < 0:
2041 2044 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2042 2045 % date[1:])
2043 2046 when = makedate()[0] - days * 3600 * 24
2044 2047 return lambda x: x >= when
2045 2048 elif " to " in date:
2046 2049 a, b = date.split(" to ")
2047 2050 start, stop = lower(a), upper(b)
2048 2051 return lambda x: x >= start and x <= stop
2049 2052 else:
2050 2053 start, stop = lower(date), upper(date)
2051 2054 return lambda x: x >= start and x <= stop
2052 2055
2053 2056 def stringmatcher(pattern, casesensitive=True):
2054 2057 """
2055 2058 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2056 2059 returns the matcher name, pattern, and matcher function.
2057 2060 missing or unknown prefixes are treated as literal matches.
2058 2061
2059 2062 helper for tests:
2060 2063 >>> def test(pattern, *tests):
2061 2064 ... kind, pattern, matcher = stringmatcher(pattern)
2062 2065 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2063 2066 >>> def itest(pattern, *tests):
2064 2067 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2065 2068 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2066 2069
2067 2070 exact matching (no prefix):
2068 2071 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2069 2072 ('literal', 'abcdefg', [False, False, True])
2070 2073
2071 2074 regex matching ('re:' prefix)
2072 2075 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2073 2076 ('re', 'a.+b', [False, False, True])
2074 2077
2075 2078 force exact matches ('literal:' prefix)
2076 2079 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2077 2080 ('literal', 're:foobar', [False, True])
2078 2081
2079 2082 unknown prefixes are ignored and treated as literals
2080 2083 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2081 2084 ('literal', 'foo:bar', [False, False, True])
2082 2085
2083 2086 case insensitive regex matches
2084 2087 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2085 2088 ('re', 'A.+b', [False, False, True])
2086 2089
2087 2090 case insensitive literal matches
2088 2091 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2089 2092 ('literal', 'ABCDEFG', [False, False, True])
2090 2093 """
2091 2094 if pattern.startswith('re:'):
2092 2095 pattern = pattern[3:]
2093 2096 try:
2094 2097 flags = 0
2095 2098 if not casesensitive:
2096 2099 flags = remod.I
2097 2100 regex = remod.compile(pattern, flags)
2098 2101 except remod.error as e:
2099 2102 raise error.ParseError(_('invalid regular expression: %s')
2100 2103 % e)
2101 2104 return 're', pattern, regex.search
2102 2105 elif pattern.startswith('literal:'):
2103 2106 pattern = pattern[8:]
2104 2107
2105 2108 match = pattern.__eq__
2106 2109
2107 2110 if not casesensitive:
2108 2111 ipat = encoding.lower(pattern)
2109 2112 match = lambda s: ipat == encoding.lower(s)
2110 2113 return 'literal', pattern, match
2111 2114
2112 2115 def shortuser(user):
2113 2116 """Return a short representation of a user name or email address."""
2114 2117 f = user.find('@')
2115 2118 if f >= 0:
2116 2119 user = user[:f]
2117 2120 f = user.find('<')
2118 2121 if f >= 0:
2119 2122 user = user[f + 1:]
2120 2123 f = user.find(' ')
2121 2124 if f >= 0:
2122 2125 user = user[:f]
2123 2126 f = user.find('.')
2124 2127 if f >= 0:
2125 2128 user = user[:f]
2126 2129 return user
2127 2130
2128 2131 def emailuser(user):
2129 2132 """Return the user portion of an email address."""
2130 2133 f = user.find('@')
2131 2134 if f >= 0:
2132 2135 user = user[:f]
2133 2136 f = user.find('<')
2134 2137 if f >= 0:
2135 2138 user = user[f + 1:]
2136 2139 return user
2137 2140
2138 2141 def email(author):
2139 2142 '''get email of author.'''
2140 2143 r = author.find('>')
2141 2144 if r == -1:
2142 2145 r = None
2143 2146 return author[author.find('<') + 1:r]
2144 2147
2145 2148 def ellipsis(text, maxlength=400):
2146 2149 """Trim string to at most maxlength (default: 400) columns in display."""
2147 2150 return encoding.trim(text, maxlength, ellipsis='...')
2148 2151
2149 2152 def unitcountfn(*unittable):
2150 2153 '''return a function that renders a readable count of some quantity'''
2151 2154
2152 2155 def go(count):
2153 2156 for multiplier, divisor, format in unittable:
2154 2157 if count >= divisor * multiplier:
2155 2158 return format % (count / float(divisor))
2156 2159 return unittable[-1][2] % count
2157 2160
2158 2161 return go
2159 2162
2160 2163 bytecount = unitcountfn(
2161 2164 (100, 1 << 30, _('%.0f GB')),
2162 2165 (10, 1 << 30, _('%.1f GB')),
2163 2166 (1, 1 << 30, _('%.2f GB')),
2164 2167 (100, 1 << 20, _('%.0f MB')),
2165 2168 (10, 1 << 20, _('%.1f MB')),
2166 2169 (1, 1 << 20, _('%.2f MB')),
2167 2170 (100, 1 << 10, _('%.0f KB')),
2168 2171 (10, 1 << 10, _('%.1f KB')),
2169 2172 (1, 1 << 10, _('%.2f KB')),
2170 2173 (1, 1, _('%.0f bytes')),
2171 2174 )
2172 2175
2173 2176 def escapestr(s):
2174 2177 # call underlying function of s.encode('string_escape') directly for
2175 2178 # Python 3 compatibility
2176 2179 return codecs.escape_encode(s)[0]
2177 2180
2178 2181 def unescapestr(s):
2179 2182 return codecs.escape_decode(s)[0]
2180 2183
2181 2184 def uirepr(s):
2182 2185 # Avoid double backslash in Windows path repr()
2183 2186 return repr(s).replace('\\\\', '\\')
2184 2187
2185 2188 # delay import of textwrap
2186 2189 def MBTextWrapper(**kwargs):
2187 2190 class tw(textwrap.TextWrapper):
2188 2191 """
2189 2192 Extend TextWrapper for width-awareness.
2190 2193
2191 2194 Neither number of 'bytes' in any encoding nor 'characters' is
2192 2195 appropriate to calculate terminal columns for specified string.
2193 2196
2194 2197 Original TextWrapper implementation uses built-in 'len()' directly,
2195 2198 so overriding is needed to use width information of each characters.
2196 2199
2197 2200 In addition, characters classified into 'ambiguous' width are
2198 2201 treated as wide in East Asian area, but as narrow in other.
2199 2202
2200 2203 This requires use decision to determine width of such characters.
2201 2204 """
2202 2205 def _cutdown(self, ucstr, space_left):
2203 2206 l = 0
2204 2207 colwidth = encoding.ucolwidth
2205 2208 for i in xrange(len(ucstr)):
2206 2209 l += colwidth(ucstr[i])
2207 2210 if space_left < l:
2208 2211 return (ucstr[:i], ucstr[i:])
2209 2212 return ucstr, ''
2210 2213
2211 2214 # overriding of base class
2212 2215 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2213 2216 space_left = max(width - cur_len, 1)
2214 2217
2215 2218 if self.break_long_words:
2216 2219 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2217 2220 cur_line.append(cut)
2218 2221 reversed_chunks[-1] = res
2219 2222 elif not cur_line:
2220 2223 cur_line.append(reversed_chunks.pop())
2221 2224
2222 2225 # this overriding code is imported from TextWrapper of Python 2.6
2223 2226 # to calculate columns of string by 'encoding.ucolwidth()'
2224 2227 def _wrap_chunks(self, chunks):
2225 2228 colwidth = encoding.ucolwidth
2226 2229
2227 2230 lines = []
2228 2231 if self.width <= 0:
2229 2232 raise ValueError("invalid width %r (must be > 0)" % self.width)
2230 2233
2231 2234 # Arrange in reverse order so items can be efficiently popped
2232 2235 # from a stack of chucks.
2233 2236 chunks.reverse()
2234 2237
2235 2238 while chunks:
2236 2239
2237 2240 # Start the list of chunks that will make up the current line.
2238 2241 # cur_len is just the length of all the chunks in cur_line.
2239 2242 cur_line = []
2240 2243 cur_len = 0
2241 2244
2242 2245 # Figure out which static string will prefix this line.
2243 2246 if lines:
2244 2247 indent = self.subsequent_indent
2245 2248 else:
2246 2249 indent = self.initial_indent
2247 2250
2248 2251 # Maximum width for this line.
2249 2252 width = self.width - len(indent)
2250 2253
2251 2254 # First chunk on line is whitespace -- drop it, unless this
2252 2255 # is the very beginning of the text (i.e. no lines started yet).
2253 2256 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2254 2257 del chunks[-1]
2255 2258
2256 2259 while chunks:
2257 2260 l = colwidth(chunks[-1])
2258 2261
2259 2262 # Can at least squeeze this chunk onto the current line.
2260 2263 if cur_len + l <= width:
2261 2264 cur_line.append(chunks.pop())
2262 2265 cur_len += l
2263 2266
2264 2267 # Nope, this line is full.
2265 2268 else:
2266 2269 break
2267 2270
2268 2271 # The current line is full, and the next chunk is too big to
2269 2272 # fit on *any* line (not just this one).
2270 2273 if chunks and colwidth(chunks[-1]) > width:
2271 2274 self._handle_long_word(chunks, cur_line, cur_len, width)
2272 2275
2273 2276 # If the last chunk on this line is all whitespace, drop it.
2274 2277 if (self.drop_whitespace and
2275 2278 cur_line and cur_line[-1].strip() == ''):
2276 2279 del cur_line[-1]
2277 2280
2278 2281 # Convert current line back to a string and store it in list
2279 2282 # of all lines (return value).
2280 2283 if cur_line:
2281 2284 lines.append(indent + ''.join(cur_line))
2282 2285
2283 2286 return lines
2284 2287
2285 2288 global MBTextWrapper
2286 2289 MBTextWrapper = tw
2287 2290 return tw(**kwargs)
2288 2291
2289 2292 def wrap(line, width, initindent='', hangindent=''):
2290 2293 maxindent = max(len(hangindent), len(initindent))
2291 2294 if width <= maxindent:
2292 2295 # adjust for weird terminal size
2293 2296 width = max(78, maxindent + 1)
2294 2297 line = line.decode(pycompat.sysstr(encoding.encoding),
2295 2298 pycompat.sysstr(encoding.encodingmode))
2296 2299 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2297 2300 pycompat.sysstr(encoding.encodingmode))
2298 2301 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2299 2302 pycompat.sysstr(encoding.encodingmode))
2300 2303 wrapper = MBTextWrapper(width=width,
2301 2304 initial_indent=initindent,
2302 2305 subsequent_indent=hangindent)
2303 2306 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2304 2307
2305 2308 if (pyplatform.python_implementation() == 'CPython' and
2306 2309 sys.version_info < (3, 0)):
2307 2310 # There is an issue in CPython that some IO methods do not handle EINTR
2308 2311 # correctly. The following table shows what CPython version (and functions)
2309 2312 # are affected (buggy: has the EINTR bug, okay: otherwise):
2310 2313 #
2311 2314 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2312 2315 # --------------------------------------------------
2313 2316 # fp.__iter__ | buggy | buggy | okay
2314 2317 # fp.read* | buggy | okay [1] | okay
2315 2318 #
2316 2319 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2317 2320 #
2318 2321 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2319 2322 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2320 2323 #
2321 2324 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2322 2325 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2323 2326 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2324 2327 # fp.__iter__ but not other fp.read* methods.
2325 2328 #
2326 2329 # On modern systems like Linux, the "read" syscall cannot be interrupted
2327 2330 # when reading "fast" files like on-disk files. So the EINTR issue only
2328 2331 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2329 2332 # files approximately as "fast" files and use the fast (unsafe) code path,
2330 2333 # to minimize the performance impact.
2331 2334 if sys.version_info >= (2, 7, 4):
2332 2335 # fp.readline deals with EINTR correctly, use it as a workaround.
2333 2336 def _safeiterfile(fp):
2334 2337 return iter(fp.readline, '')
2335 2338 else:
2336 2339 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2337 2340 # note: this may block longer than necessary because of bufsize.
2338 2341 def _safeiterfile(fp, bufsize=4096):
2339 2342 fd = fp.fileno()
2340 2343 line = ''
2341 2344 while True:
2342 2345 try:
2343 2346 buf = os.read(fd, bufsize)
2344 2347 except OSError as ex:
2345 2348 # os.read only raises EINTR before any data is read
2346 2349 if ex.errno == errno.EINTR:
2347 2350 continue
2348 2351 else:
2349 2352 raise
2350 2353 line += buf
2351 2354 if '\n' in buf:
2352 2355 splitted = line.splitlines(True)
2353 2356 line = ''
2354 2357 for l in splitted:
2355 2358 if l[-1] == '\n':
2356 2359 yield l
2357 2360 else:
2358 2361 line = l
2359 2362 if not buf:
2360 2363 break
2361 2364 if line:
2362 2365 yield line
2363 2366
2364 2367 def iterfile(fp):
2365 2368 fastpath = True
2366 2369 if type(fp) is file:
2367 2370 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2368 2371 if fastpath:
2369 2372 return fp
2370 2373 else:
2371 2374 return _safeiterfile(fp)
2372 2375 else:
2373 2376 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2374 2377 def iterfile(fp):
2375 2378 return fp
2376 2379
2377 2380 def iterlines(iterator):
2378 2381 for chunk in iterator:
2379 2382 for line in chunk.splitlines():
2380 2383 yield line
2381 2384
2382 2385 def expandpath(path):
2383 2386 return os.path.expanduser(os.path.expandvars(path))
2384 2387
2385 2388 def hgcmd():
2386 2389 """Return the command used to execute current hg
2387 2390
2388 2391 This is different from hgexecutable() because on Windows we want
2389 2392 to avoid things opening new shell windows like batch files, so we
2390 2393 get either the python call or current executable.
2391 2394 """
2392 2395 if mainfrozen():
2393 2396 if getattr(sys, 'frozen', None) == 'macosx_app':
2394 2397 # Env variable set by py2app
2395 2398 return [encoding.environ['EXECUTABLEPATH']]
2396 2399 else:
2397 2400 return [pycompat.sysexecutable]
2398 2401 return gethgcmd()
2399 2402
2400 2403 def rundetached(args, condfn):
2401 2404 """Execute the argument list in a detached process.
2402 2405
2403 2406 condfn is a callable which is called repeatedly and should return
2404 2407 True once the child process is known to have started successfully.
2405 2408 At this point, the child process PID is returned. If the child
2406 2409 process fails to start or finishes before condfn() evaluates to
2407 2410 True, return -1.
2408 2411 """
2409 2412 # Windows case is easier because the child process is either
2410 2413 # successfully starting and validating the condition or exiting
2411 2414 # on failure. We just poll on its PID. On Unix, if the child
2412 2415 # process fails to start, it will be left in a zombie state until
2413 2416 # the parent wait on it, which we cannot do since we expect a long
2414 2417 # running process on success. Instead we listen for SIGCHLD telling
2415 2418 # us our child process terminated.
2416 2419 terminated = set()
2417 2420 def handler(signum, frame):
2418 2421 terminated.add(os.wait())
2419 2422 prevhandler = None
2420 2423 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2421 2424 if SIGCHLD is not None:
2422 2425 prevhandler = signal.signal(SIGCHLD, handler)
2423 2426 try:
2424 2427 pid = spawndetached(args)
2425 2428 while not condfn():
2426 2429 if ((pid in terminated or not testpid(pid))
2427 2430 and not condfn()):
2428 2431 return -1
2429 2432 time.sleep(0.1)
2430 2433 return pid
2431 2434 finally:
2432 2435 if prevhandler is not None:
2433 2436 signal.signal(signal.SIGCHLD, prevhandler)
2434 2437
2435 2438 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2436 2439 """Return the result of interpolating items in the mapping into string s.
2437 2440
2438 2441 prefix is a single character string, or a two character string with
2439 2442 a backslash as the first character if the prefix needs to be escaped in
2440 2443 a regular expression.
2441 2444
2442 2445 fn is an optional function that will be applied to the replacement text
2443 2446 just before replacement.
2444 2447
2445 2448 escape_prefix is an optional flag that allows using doubled prefix for
2446 2449 its escaping.
2447 2450 """
2448 2451 fn = fn or (lambda s: s)
2449 2452 patterns = '|'.join(mapping.keys())
2450 2453 if escape_prefix:
2451 2454 patterns += '|' + prefix
2452 2455 if len(prefix) > 1:
2453 2456 prefix_char = prefix[1:]
2454 2457 else:
2455 2458 prefix_char = prefix
2456 2459 mapping[prefix_char] = prefix_char
2457 2460 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2458 2461 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2459 2462
2460 2463 def getport(port):
2461 2464 """Return the port for a given network service.
2462 2465
2463 2466 If port is an integer, it's returned as is. If it's a string, it's
2464 2467 looked up using socket.getservbyname(). If there's no matching
2465 2468 service, error.Abort is raised.
2466 2469 """
2467 2470 try:
2468 2471 return int(port)
2469 2472 except ValueError:
2470 2473 pass
2471 2474
2472 2475 try:
2473 2476 return socket.getservbyname(port)
2474 2477 except socket.error:
2475 2478 raise Abort(_("no port number associated with service '%s'") % port)
2476 2479
2477 2480 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2478 2481 '0': False, 'no': False, 'false': False, 'off': False,
2479 2482 'never': False}
2480 2483
2481 2484 def parsebool(s):
2482 2485 """Parse s into a boolean.
2483 2486
2484 2487 If s is not a valid boolean, returns None.
2485 2488 """
2486 2489 return _booleans.get(s.lower(), None)
2487 2490
2488 2491 _hextochr = dict((a + b, chr(int(a + b, 16)))
2489 2492 for a in string.hexdigits for b in string.hexdigits)
2490 2493
2491 2494 class url(object):
2492 2495 r"""Reliable URL parser.
2493 2496
2494 2497 This parses URLs and provides attributes for the following
2495 2498 components:
2496 2499
2497 2500 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2498 2501
2499 2502 Missing components are set to None. The only exception is
2500 2503 fragment, which is set to '' if present but empty.
2501 2504
2502 2505 If parsefragment is False, fragment is included in query. If
2503 2506 parsequery is False, query is included in path. If both are
2504 2507 False, both fragment and query are included in path.
2505 2508
2506 2509 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2507 2510
2508 2511 Note that for backward compatibility reasons, bundle URLs do not
2509 2512 take host names. That means 'bundle://../' has a path of '../'.
2510 2513
2511 2514 Examples:
2512 2515
2513 2516 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2514 2517 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2515 2518 >>> url('ssh://[::1]:2200//home/joe/repo')
2516 2519 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2517 2520 >>> url('file:///home/joe/repo')
2518 2521 <url scheme: 'file', path: '/home/joe/repo'>
2519 2522 >>> url('file:///c:/temp/foo/')
2520 2523 <url scheme: 'file', path: 'c:/temp/foo/'>
2521 2524 >>> url('bundle:foo')
2522 2525 <url scheme: 'bundle', path: 'foo'>
2523 2526 >>> url('bundle://../foo')
2524 2527 <url scheme: 'bundle', path: '../foo'>
2525 2528 >>> url(r'c:\foo\bar')
2526 2529 <url path: 'c:\\foo\\bar'>
2527 2530 >>> url(r'\\blah\blah\blah')
2528 2531 <url path: '\\\\blah\\blah\\blah'>
2529 2532 >>> url(r'\\blah\blah\blah#baz')
2530 2533 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2531 2534 >>> url(r'file:///C:\users\me')
2532 2535 <url scheme: 'file', path: 'C:\\users\\me'>
2533 2536
2534 2537 Authentication credentials:
2535 2538
2536 2539 >>> url('ssh://joe:xyz@x/repo')
2537 2540 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2538 2541 >>> url('ssh://joe@x/repo')
2539 2542 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2540 2543
2541 2544 Query strings and fragments:
2542 2545
2543 2546 >>> url('http://host/a?b#c')
2544 2547 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2545 2548 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2546 2549 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2547 2550
2548 2551 Empty path:
2549 2552
2550 2553 >>> url('')
2551 2554 <url path: ''>
2552 2555 >>> url('#a')
2553 2556 <url path: '', fragment: 'a'>
2554 2557 >>> url('http://host/')
2555 2558 <url scheme: 'http', host: 'host', path: ''>
2556 2559 >>> url('http://host/#a')
2557 2560 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2558 2561
2559 2562 Only scheme:
2560 2563
2561 2564 >>> url('http:')
2562 2565 <url scheme: 'http'>
2563 2566 """
2564 2567
2565 2568 _safechars = "!~*'()+"
2566 2569 _safepchars = "/!~*'()+:\\"
2567 2570 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2568 2571
2569 2572 def __init__(self, path, parsequery=True, parsefragment=True):
2570 2573 # We slowly chomp away at path until we have only the path left
2571 2574 self.scheme = self.user = self.passwd = self.host = None
2572 2575 self.port = self.path = self.query = self.fragment = None
2573 2576 self._localpath = True
2574 2577 self._hostport = ''
2575 2578 self._origpath = path
2576 2579
2577 2580 if parsefragment and '#' in path:
2578 2581 path, self.fragment = path.split('#', 1)
2579 2582
2580 2583 # special case for Windows drive letters and UNC paths
2581 2584 if hasdriveletter(path) or path.startswith('\\\\'):
2582 2585 self.path = path
2583 2586 return
2584 2587
2585 2588 # For compatibility reasons, we can't handle bundle paths as
2586 2589 # normal URLS
2587 2590 if path.startswith('bundle:'):
2588 2591 self.scheme = 'bundle'
2589 2592 path = path[7:]
2590 2593 if path.startswith('//'):
2591 2594 path = path[2:]
2592 2595 self.path = path
2593 2596 return
2594 2597
2595 2598 if self._matchscheme(path):
2596 2599 parts = path.split(':', 1)
2597 2600 if parts[0]:
2598 2601 self.scheme, path = parts
2599 2602 self._localpath = False
2600 2603
2601 2604 if not path:
2602 2605 path = None
2603 2606 if self._localpath:
2604 2607 self.path = ''
2605 2608 return
2606 2609 else:
2607 2610 if self._localpath:
2608 2611 self.path = path
2609 2612 return
2610 2613
2611 2614 if parsequery and '?' in path:
2612 2615 path, self.query = path.split('?', 1)
2613 2616 if not path:
2614 2617 path = None
2615 2618 if not self.query:
2616 2619 self.query = None
2617 2620
2618 2621 # // is required to specify a host/authority
2619 2622 if path and path.startswith('//'):
2620 2623 parts = path[2:].split('/', 1)
2621 2624 if len(parts) > 1:
2622 2625 self.host, path = parts
2623 2626 else:
2624 2627 self.host = parts[0]
2625 2628 path = None
2626 2629 if not self.host:
2627 2630 self.host = None
2628 2631 # path of file:///d is /d
2629 2632 # path of file:///d:/ is d:/, not /d:/
2630 2633 if path and not hasdriveletter(path):
2631 2634 path = '/' + path
2632 2635
2633 2636 if self.host and '@' in self.host:
2634 2637 self.user, self.host = self.host.rsplit('@', 1)
2635 2638 if ':' in self.user:
2636 2639 self.user, self.passwd = self.user.split(':', 1)
2637 2640 if not self.host:
2638 2641 self.host = None
2639 2642
2640 2643 # Don't split on colons in IPv6 addresses without ports
2641 2644 if (self.host and ':' in self.host and
2642 2645 not (self.host.startswith('[') and self.host.endswith(']'))):
2643 2646 self._hostport = self.host
2644 2647 self.host, self.port = self.host.rsplit(':', 1)
2645 2648 if not self.host:
2646 2649 self.host = None
2647 2650
2648 2651 if (self.host and self.scheme == 'file' and
2649 2652 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2650 2653 raise Abort(_('file:// URLs can only refer to localhost'))
2651 2654
2652 2655 self.path = path
2653 2656
2654 2657 # leave the query string escaped
2655 2658 for a in ('user', 'passwd', 'host', 'port',
2656 2659 'path', 'fragment'):
2657 2660 v = getattr(self, a)
2658 2661 if v is not None:
2659 2662 setattr(self, a, urlreq.unquote(v))
2660 2663
2661 2664 def __repr__(self):
2662 2665 attrs = []
2663 2666 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2664 2667 'query', 'fragment'):
2665 2668 v = getattr(self, a)
2666 2669 if v is not None:
2667 2670 attrs.append('%s: %r' % (a, v))
2668 2671 return '<url %s>' % ', '.join(attrs)
2669 2672
2670 2673 def __str__(self):
2671 2674 r"""Join the URL's components back into a URL string.
2672 2675
2673 2676 Examples:
2674 2677
2675 2678 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2676 2679 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2677 2680 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2678 2681 'http://user:pw@host:80/?foo=bar&baz=42'
2679 2682 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2680 2683 'http://user:pw@host:80/?foo=bar%3dbaz'
2681 2684 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2682 2685 'ssh://user:pw@[::1]:2200//home/joe#'
2683 2686 >>> str(url('http://localhost:80//'))
2684 2687 'http://localhost:80//'
2685 2688 >>> str(url('http://localhost:80/'))
2686 2689 'http://localhost:80/'
2687 2690 >>> str(url('http://localhost:80'))
2688 2691 'http://localhost:80/'
2689 2692 >>> str(url('bundle:foo'))
2690 2693 'bundle:foo'
2691 2694 >>> str(url('bundle://../foo'))
2692 2695 'bundle:../foo'
2693 2696 >>> str(url('path'))
2694 2697 'path'
2695 2698 >>> str(url('file:///tmp/foo/bar'))
2696 2699 'file:///tmp/foo/bar'
2697 2700 >>> str(url('file:///c:/tmp/foo/bar'))
2698 2701 'file:///c:/tmp/foo/bar'
2699 2702 >>> print url(r'bundle:foo\bar')
2700 2703 bundle:foo\bar
2701 2704 >>> print url(r'file:///D:\data\hg')
2702 2705 file:///D:\data\hg
2703 2706 """
2704 2707 return encoding.strfromlocal(self.__bytes__())
2705 2708
2706 2709 def __bytes__(self):
2707 2710 if self._localpath:
2708 2711 s = self.path
2709 2712 if self.scheme == 'bundle':
2710 2713 s = 'bundle:' + s
2711 2714 if self.fragment:
2712 2715 s += '#' + self.fragment
2713 2716 return s
2714 2717
2715 2718 s = self.scheme + ':'
2716 2719 if self.user or self.passwd or self.host:
2717 2720 s += '//'
2718 2721 elif self.scheme and (not self.path or self.path.startswith('/')
2719 2722 or hasdriveletter(self.path)):
2720 2723 s += '//'
2721 2724 if hasdriveletter(self.path):
2722 2725 s += '/'
2723 2726 if self.user:
2724 2727 s += urlreq.quote(self.user, safe=self._safechars)
2725 2728 if self.passwd:
2726 2729 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2727 2730 if self.user or self.passwd:
2728 2731 s += '@'
2729 2732 if self.host:
2730 2733 if not (self.host.startswith('[') and self.host.endswith(']')):
2731 2734 s += urlreq.quote(self.host)
2732 2735 else:
2733 2736 s += self.host
2734 2737 if self.port:
2735 2738 s += ':' + urlreq.quote(self.port)
2736 2739 if self.host:
2737 2740 s += '/'
2738 2741 if self.path:
2739 2742 # TODO: similar to the query string, we should not unescape the
2740 2743 # path when we store it, the path might contain '%2f' = '/',
2741 2744 # which we should *not* escape.
2742 2745 s += urlreq.quote(self.path, safe=self._safepchars)
2743 2746 if self.query:
2744 2747 # we store the query in escaped form.
2745 2748 s += '?' + self.query
2746 2749 if self.fragment is not None:
2747 2750 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2748 2751 return s
2749 2752
2750 2753 def authinfo(self):
2751 2754 user, passwd = self.user, self.passwd
2752 2755 try:
2753 2756 self.user, self.passwd = None, None
2754 2757 s = str(self)
2755 2758 finally:
2756 2759 self.user, self.passwd = user, passwd
2757 2760 if not self.user:
2758 2761 return (s, None)
2759 2762 # authinfo[1] is passed to urllib2 password manager, and its
2760 2763 # URIs must not contain credentials. The host is passed in the
2761 2764 # URIs list because Python < 2.4.3 uses only that to search for
2762 2765 # a password.
2763 2766 return (s, (None, (s, self.host),
2764 2767 self.user, self.passwd or ''))
2765 2768
2766 2769 def isabs(self):
2767 2770 if self.scheme and self.scheme != 'file':
2768 2771 return True # remote URL
2769 2772 if hasdriveletter(self.path):
2770 2773 return True # absolute for our purposes - can't be joined()
2771 2774 if self.path.startswith(r'\\'):
2772 2775 return True # Windows UNC path
2773 2776 if self.path.startswith('/'):
2774 2777 return True # POSIX-style
2775 2778 return False
2776 2779
2777 2780 def localpath(self):
2778 2781 if self.scheme == 'file' or self.scheme == 'bundle':
2779 2782 path = self.path or '/'
2780 2783 # For Windows, we need to promote hosts containing drive
2781 2784 # letters to paths with drive letters.
2782 2785 if hasdriveletter(self._hostport):
2783 2786 path = self._hostport + '/' + self.path
2784 2787 elif (self.host is not None and self.path
2785 2788 and not hasdriveletter(path)):
2786 2789 path = '/' + path
2787 2790 return path
2788 2791 return self._origpath
2789 2792
2790 2793 def islocal(self):
2791 2794 '''whether localpath will return something that posixfile can open'''
2792 2795 return (not self.scheme or self.scheme == 'file'
2793 2796 or self.scheme == 'bundle')
2794 2797
2795 2798 def hasscheme(path):
2796 2799 return bool(url(path).scheme)
2797 2800
2798 2801 def hasdriveletter(path):
2799 2802 return path and path[1:2] == ':' and path[0:1].isalpha()
2800 2803
2801 2804 def urllocalpath(path):
2802 2805 return url(path, parsequery=False, parsefragment=False).localpath()
2803 2806
2804 2807 def hidepassword(u):
2805 2808 '''hide user credential in a url string'''
2806 2809 u = url(u)
2807 2810 if u.passwd:
2808 2811 u.passwd = '***'
2809 2812 return str(u)
2810 2813
2811 2814 def removeauth(u):
2812 2815 '''remove all authentication information from a url string'''
2813 2816 u = url(u)
2814 2817 u.user = u.passwd = None
2815 2818 return str(u)
2816 2819
2817 2820 timecount = unitcountfn(
2818 2821 (1, 1e3, _('%.0f s')),
2819 2822 (100, 1, _('%.1f s')),
2820 2823 (10, 1, _('%.2f s')),
2821 2824 (1, 1, _('%.3f s')),
2822 2825 (100, 0.001, _('%.1f ms')),
2823 2826 (10, 0.001, _('%.2f ms')),
2824 2827 (1, 0.001, _('%.3f ms')),
2825 2828 (100, 0.000001, _('%.1f us')),
2826 2829 (10, 0.000001, _('%.2f us')),
2827 2830 (1, 0.000001, _('%.3f us')),
2828 2831 (100, 0.000000001, _('%.1f ns')),
2829 2832 (10, 0.000000001, _('%.2f ns')),
2830 2833 (1, 0.000000001, _('%.3f ns')),
2831 2834 )
2832 2835
2833 2836 _timenesting = [0]
2834 2837
2835 2838 def timed(func):
2836 2839 '''Report the execution time of a function call to stderr.
2837 2840
2838 2841 During development, use as a decorator when you need to measure
2839 2842 the cost of a function, e.g. as follows:
2840 2843
2841 2844 @util.timed
2842 2845 def foo(a, b, c):
2843 2846 pass
2844 2847 '''
2845 2848
2846 2849 def wrapper(*args, **kwargs):
2847 2850 start = timer()
2848 2851 indent = 2
2849 2852 _timenesting[0] += indent
2850 2853 try:
2851 2854 return func(*args, **kwargs)
2852 2855 finally:
2853 2856 elapsed = timer() - start
2854 2857 _timenesting[0] -= indent
2855 2858 stderr.write('%s%s: %s\n' %
2856 2859 (' ' * _timenesting[0], func.__name__,
2857 2860 timecount(elapsed)))
2858 2861 return wrapper
2859 2862
2860 2863 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2861 2864 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2862 2865
2863 2866 def sizetoint(s):
2864 2867 '''Convert a space specifier to a byte count.
2865 2868
2866 2869 >>> sizetoint('30')
2867 2870 30
2868 2871 >>> sizetoint('2.2kb')
2869 2872 2252
2870 2873 >>> sizetoint('6M')
2871 2874 6291456
2872 2875 '''
2873 2876 t = s.strip().lower()
2874 2877 try:
2875 2878 for k, u in _sizeunits:
2876 2879 if t.endswith(k):
2877 2880 return int(float(t[:-len(k)]) * u)
2878 2881 return int(t)
2879 2882 except ValueError:
2880 2883 raise error.ParseError(_("couldn't parse size: %s") % s)
2881 2884
2882 2885 class hooks(object):
2883 2886 '''A collection of hook functions that can be used to extend a
2884 2887 function's behavior. Hooks are called in lexicographic order,
2885 2888 based on the names of their sources.'''
2886 2889
2887 2890 def __init__(self):
2888 2891 self._hooks = []
2889 2892
2890 2893 def add(self, source, hook):
2891 2894 self._hooks.append((source, hook))
2892 2895
2893 2896 def __call__(self, *args):
2894 2897 self._hooks.sort(key=lambda x: x[0])
2895 2898 results = []
2896 2899 for source, hook in self._hooks:
2897 2900 results.append(hook(*args))
2898 2901 return results
2899 2902
2900 2903 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2901 2904 '''Yields lines for a nicely formatted stacktrace.
2902 2905 Skips the 'skip' last entries, then return the last 'depth' entries.
2903 2906 Each file+linenumber is formatted according to fileline.
2904 2907 Each line is formatted according to line.
2905 2908 If line is None, it yields:
2906 2909 length of longest filepath+line number,
2907 2910 filepath+linenumber,
2908 2911 function
2909 2912
2910 2913 Not be used in production code but very convenient while developing.
2911 2914 '''
2912 2915 entries = [(fileline % (fn, ln), func)
2913 2916 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2914 2917 ][-depth:]
2915 2918 if entries:
2916 2919 fnmax = max(len(entry[0]) for entry in entries)
2917 2920 for fnln, func in entries:
2918 2921 if line is None:
2919 2922 yield (fnmax, fnln, func)
2920 2923 else:
2921 2924 yield line % (fnmax, fnln, func)
2922 2925
2923 2926 def debugstacktrace(msg='stacktrace', skip=0,
2924 2927 f=stderr, otherf=stdout, depth=0):
2925 2928 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2926 2929 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2927 2930 By default it will flush stdout first.
2928 2931 It can be used everywhere and intentionally does not require an ui object.
2929 2932 Not be used in production code but very convenient while developing.
2930 2933 '''
2931 2934 if otherf:
2932 2935 otherf.flush()
2933 2936 f.write('%s at:\n' % msg.rstrip())
2934 2937 for line in getstackframes(skip + 1, depth=depth):
2935 2938 f.write(line)
2936 2939 f.flush()
2937 2940
2938 2941 class dirs(object):
2939 2942 '''a multiset of directory names from a dirstate or manifest'''
2940 2943
2941 2944 def __init__(self, map, skip=None):
2942 2945 self._dirs = {}
2943 2946 addpath = self.addpath
2944 2947 if safehasattr(map, 'iteritems') and skip is not None:
2945 2948 for f, s in map.iteritems():
2946 2949 if s[0] != skip:
2947 2950 addpath(f)
2948 2951 else:
2949 2952 for f in map:
2950 2953 addpath(f)
2951 2954
2952 2955 def addpath(self, path):
2953 2956 dirs = self._dirs
2954 2957 for base in finddirs(path):
2955 2958 if base in dirs:
2956 2959 dirs[base] += 1
2957 2960 return
2958 2961 dirs[base] = 1
2959 2962
2960 2963 def delpath(self, path):
2961 2964 dirs = self._dirs
2962 2965 for base in finddirs(path):
2963 2966 if dirs[base] > 1:
2964 2967 dirs[base] -= 1
2965 2968 return
2966 2969 del dirs[base]
2967 2970
2968 2971 def __iter__(self):
2969 2972 return iter(self._dirs)
2970 2973
2971 2974 def __contains__(self, d):
2972 2975 return d in self._dirs
2973 2976
2974 2977 if safehasattr(parsers, 'dirs'):
2975 2978 dirs = parsers.dirs
2976 2979
2977 2980 def finddirs(path):
2978 2981 pos = path.rfind('/')
2979 2982 while pos != -1:
2980 2983 yield path[:pos]
2981 2984 pos = path.rfind('/', 0, pos)
2982 2985
2983 2986 class ctxmanager(object):
2984 2987 '''A context manager for use in 'with' blocks to allow multiple
2985 2988 contexts to be entered at once. This is both safer and more
2986 2989 flexible than contextlib.nested.
2987 2990
2988 2991 Once Mercurial supports Python 2.7+, this will become mostly
2989 2992 unnecessary.
2990 2993 '''
2991 2994
2992 2995 def __init__(self, *args):
2993 2996 '''Accepts a list of no-argument functions that return context
2994 2997 managers. These will be invoked at __call__ time.'''
2995 2998 self._pending = args
2996 2999 self._atexit = []
2997 3000
2998 3001 def __enter__(self):
2999 3002 return self
3000 3003
3001 3004 def enter(self):
3002 3005 '''Create and enter context managers in the order in which they were
3003 3006 passed to the constructor.'''
3004 3007 values = []
3005 3008 for func in self._pending:
3006 3009 obj = func()
3007 3010 values.append(obj.__enter__())
3008 3011 self._atexit.append(obj.__exit__)
3009 3012 del self._pending
3010 3013 return values
3011 3014
3012 3015 def atexit(self, func, *args, **kwargs):
3013 3016 '''Add a function to call when this context manager exits. The
3014 3017 ordering of multiple atexit calls is unspecified, save that
3015 3018 they will happen before any __exit__ functions.'''
3016 3019 def wrapper(exc_type, exc_val, exc_tb):
3017 3020 func(*args, **kwargs)
3018 3021 self._atexit.append(wrapper)
3019 3022 return func
3020 3023
3021 3024 def __exit__(self, exc_type, exc_val, exc_tb):
3022 3025 '''Context managers are exited in the reverse order from which
3023 3026 they were created.'''
3024 3027 received = exc_type is not None
3025 3028 suppressed = False
3026 3029 pending = None
3027 3030 self._atexit.reverse()
3028 3031 for exitfunc in self._atexit:
3029 3032 try:
3030 3033 if exitfunc(exc_type, exc_val, exc_tb):
3031 3034 suppressed = True
3032 3035 exc_type = None
3033 3036 exc_val = None
3034 3037 exc_tb = None
3035 3038 except BaseException:
3036 3039 pending = sys.exc_info()
3037 3040 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3038 3041 del self._atexit
3039 3042 if pending:
3040 3043 raise exc_val
3041 3044 return received and suppressed
3042 3045
3043 3046 # compression code
3044 3047
3045 3048 SERVERROLE = 'server'
3046 3049 CLIENTROLE = 'client'
3047 3050
3048 3051 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3049 3052 (u'name', u'serverpriority',
3050 3053 u'clientpriority'))
3051 3054
3052 3055 class compressormanager(object):
3053 3056 """Holds registrations of various compression engines.
3054 3057
3055 3058 This class essentially abstracts the differences between compression
3056 3059 engines to allow new compression formats to be added easily, possibly from
3057 3060 extensions.
3058 3061
3059 3062 Compressors are registered against the global instance by calling its
3060 3063 ``register()`` method.
3061 3064 """
3062 3065 def __init__(self):
3063 3066 self._engines = {}
3064 3067 # Bundle spec human name to engine name.
3065 3068 self._bundlenames = {}
3066 3069 # Internal bundle identifier to engine name.
3067 3070 self._bundletypes = {}
3068 3071 # Revlog header to engine name.
3069 3072 self._revlogheaders = {}
3070 3073 # Wire proto identifier to engine name.
3071 3074 self._wiretypes = {}
3072 3075
3073 3076 def __getitem__(self, key):
3074 3077 return self._engines[key]
3075 3078
3076 3079 def __contains__(self, key):
3077 3080 return key in self._engines
3078 3081
3079 3082 def __iter__(self):
3080 3083 return iter(self._engines.keys())
3081 3084
3082 3085 def register(self, engine):
3083 3086 """Register a compression engine with the manager.
3084 3087
3085 3088 The argument must be a ``compressionengine`` instance.
3086 3089 """
3087 3090 if not isinstance(engine, compressionengine):
3088 3091 raise ValueError(_('argument must be a compressionengine'))
3089 3092
3090 3093 name = engine.name()
3091 3094
3092 3095 if name in self._engines:
3093 3096 raise error.Abort(_('compression engine %s already registered') %
3094 3097 name)
3095 3098
3096 3099 bundleinfo = engine.bundletype()
3097 3100 if bundleinfo:
3098 3101 bundlename, bundletype = bundleinfo
3099 3102
3100 3103 if bundlename in self._bundlenames:
3101 3104 raise error.Abort(_('bundle name %s already registered') %
3102 3105 bundlename)
3103 3106 if bundletype in self._bundletypes:
3104 3107 raise error.Abort(_('bundle type %s already registered by %s') %
3105 3108 (bundletype, self._bundletypes[bundletype]))
3106 3109
3107 3110 # No external facing name declared.
3108 3111 if bundlename:
3109 3112 self._bundlenames[bundlename] = name
3110 3113
3111 3114 self._bundletypes[bundletype] = name
3112 3115
3113 3116 wiresupport = engine.wireprotosupport()
3114 3117 if wiresupport:
3115 3118 wiretype = wiresupport.name
3116 3119 if wiretype in self._wiretypes:
3117 3120 raise error.Abort(_('wire protocol compression %s already '
3118 3121 'registered by %s') %
3119 3122 (wiretype, self._wiretypes[wiretype]))
3120 3123
3121 3124 self._wiretypes[wiretype] = name
3122 3125
3123 3126 revlogheader = engine.revlogheader()
3124 3127 if revlogheader and revlogheader in self._revlogheaders:
3125 3128 raise error.Abort(_('revlog header %s already registered by %s') %
3126 3129 (revlogheader, self._revlogheaders[revlogheader]))
3127 3130
3128 3131 if revlogheader:
3129 3132 self._revlogheaders[revlogheader] = name
3130 3133
3131 3134 self._engines[name] = engine
3132 3135
3133 3136 @property
3134 3137 def supportedbundlenames(self):
3135 3138 return set(self._bundlenames.keys())
3136 3139
3137 3140 @property
3138 3141 def supportedbundletypes(self):
3139 3142 return set(self._bundletypes.keys())
3140 3143
3141 3144 def forbundlename(self, bundlename):
3142 3145 """Obtain a compression engine registered to a bundle name.
3143 3146
3144 3147 Will raise KeyError if the bundle type isn't registered.
3145 3148
3146 3149 Will abort if the engine is known but not available.
3147 3150 """
3148 3151 engine = self._engines[self._bundlenames[bundlename]]
3149 3152 if not engine.available():
3150 3153 raise error.Abort(_('compression engine %s could not be loaded') %
3151 3154 engine.name())
3152 3155 return engine
3153 3156
3154 3157 def forbundletype(self, bundletype):
3155 3158 """Obtain a compression engine registered to a bundle type.
3156 3159
3157 3160 Will raise KeyError if the bundle type isn't registered.
3158 3161
3159 3162 Will abort if the engine is known but not available.
3160 3163 """
3161 3164 engine = self._engines[self._bundletypes[bundletype]]
3162 3165 if not engine.available():
3163 3166 raise error.Abort(_('compression engine %s could not be loaded') %
3164 3167 engine.name())
3165 3168 return engine
3166 3169
3167 3170 def supportedwireengines(self, role, onlyavailable=True):
3168 3171 """Obtain compression engines that support the wire protocol.
3169 3172
3170 3173 Returns a list of engines in prioritized order, most desired first.
3171 3174
3172 3175 If ``onlyavailable`` is set, filter out engines that can't be
3173 3176 loaded.
3174 3177 """
3175 3178 assert role in (SERVERROLE, CLIENTROLE)
3176 3179
3177 3180 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3178 3181
3179 3182 engines = [self._engines[e] for e in self._wiretypes.values()]
3180 3183 if onlyavailable:
3181 3184 engines = [e for e in engines if e.available()]
3182 3185
3183 3186 def getkey(e):
3184 3187 # Sort first by priority, highest first. In case of tie, sort
3185 3188 # alphabetically. This is arbitrary, but ensures output is
3186 3189 # stable.
3187 3190 w = e.wireprotosupport()
3188 3191 return -1 * getattr(w, attr), w.name
3189 3192
3190 3193 return list(sorted(engines, key=getkey))
3191 3194
3192 3195 def forwiretype(self, wiretype):
3193 3196 engine = self._engines[self._wiretypes[wiretype]]
3194 3197 if not engine.available():
3195 3198 raise error.Abort(_('compression engine %s could not be loaded') %
3196 3199 engine.name())
3197 3200 return engine
3198 3201
3199 3202 def forrevlogheader(self, header):
3200 3203 """Obtain a compression engine registered to a revlog header.
3201 3204
3202 3205 Will raise KeyError if the revlog header value isn't registered.
3203 3206 """
3204 3207 return self._engines[self._revlogheaders[header]]
3205 3208
3206 3209 compengines = compressormanager()
3207 3210
3208 3211 class compressionengine(object):
3209 3212 """Base class for compression engines.
3210 3213
3211 3214 Compression engines must implement the interface defined by this class.
3212 3215 """
3213 3216 def name(self):
3214 3217 """Returns the name of the compression engine.
3215 3218
3216 3219 This is the key the engine is registered under.
3217 3220
3218 3221 This method must be implemented.
3219 3222 """
3220 3223 raise NotImplementedError()
3221 3224
3222 3225 def available(self):
3223 3226 """Whether the compression engine is available.
3224 3227
3225 3228 The intent of this method is to allow optional compression engines
3226 3229 that may not be available in all installations (such as engines relying
3227 3230 on C extensions that may not be present).
3228 3231 """
3229 3232 return True
3230 3233
3231 3234 def bundletype(self):
3232 3235 """Describes bundle identifiers for this engine.
3233 3236
3234 3237 If this compression engine isn't supported for bundles, returns None.
3235 3238
3236 3239 If this engine can be used for bundles, returns a 2-tuple of strings of
3237 3240 the user-facing "bundle spec" compression name and an internal
3238 3241 identifier used to denote the compression format within bundles. To
3239 3242 exclude the name from external usage, set the first element to ``None``.
3240 3243
3241 3244 If bundle compression is supported, the class must also implement
3242 3245 ``compressstream`` and `decompressorreader``.
3243 3246 """
3244 3247 return None
3245 3248
3246 3249 def wireprotosupport(self):
3247 3250 """Declare support for this compression format on the wire protocol.
3248 3251
3249 3252 If this compression engine isn't supported for compressing wire
3250 3253 protocol payloads, returns None.
3251 3254
3252 3255 Otherwise, returns ``compenginewireprotosupport`` with the following
3253 3256 fields:
3254 3257
3255 3258 * String format identifier
3256 3259 * Integer priority for the server
3257 3260 * Integer priority for the client
3258 3261
3259 3262 The integer priorities are used to order the advertisement of format
3260 3263 support by server and client. The highest integer is advertised
3261 3264 first. Integers with non-positive values aren't advertised.
3262 3265
3263 3266 The priority values are somewhat arbitrary and only used for default
3264 3267 ordering. The relative order can be changed via config options.
3265 3268
3266 3269 If wire protocol compression is supported, the class must also implement
3267 3270 ``compressstream`` and ``decompressorreader``.
3268 3271 """
3269 3272 return None
3270 3273
3271 3274 def revlogheader(self):
3272 3275 """Header added to revlog chunks that identifies this engine.
3273 3276
3274 3277 If this engine can be used to compress revlogs, this method should
3275 3278 return the bytes used to identify chunks compressed with this engine.
3276 3279 Else, the method should return ``None`` to indicate it does not
3277 3280 participate in revlog compression.
3278 3281 """
3279 3282 return None
3280 3283
3281 3284 def compressstream(self, it, opts=None):
3282 3285 """Compress an iterator of chunks.
3283 3286
3284 3287 The method receives an iterator (ideally a generator) of chunks of
3285 3288 bytes to be compressed. It returns an iterator (ideally a generator)
3286 3289 of bytes of chunks representing the compressed output.
3287 3290
3288 3291 Optionally accepts an argument defining how to perform compression.
3289 3292 Each engine treats this argument differently.
3290 3293 """
3291 3294 raise NotImplementedError()
3292 3295
3293 3296 def decompressorreader(self, fh):
3294 3297 """Perform decompression on a file object.
3295 3298
3296 3299 Argument is an object with a ``read(size)`` method that returns
3297 3300 compressed data. Return value is an object with a ``read(size)`` that
3298 3301 returns uncompressed data.
3299 3302 """
3300 3303 raise NotImplementedError()
3301 3304
3302 3305 def revlogcompressor(self, opts=None):
3303 3306 """Obtain an object that can be used to compress revlog entries.
3304 3307
3305 3308 The object has a ``compress(data)`` method that compresses binary
3306 3309 data. This method returns compressed binary data or ``None`` if
3307 3310 the data could not be compressed (too small, not compressible, etc).
3308 3311 The returned data should have a header uniquely identifying this
3309 3312 compression format so decompression can be routed to this engine.
3310 3313 This header should be identified by the ``revlogheader()`` return
3311 3314 value.
3312 3315
3313 3316 The object has a ``decompress(data)`` method that decompresses
3314 3317 data. The method will only be called if ``data`` begins with
3315 3318 ``revlogheader()``. The method should return the raw, uncompressed
3316 3319 data or raise a ``RevlogError``.
3317 3320
3318 3321 The object is reusable but is not thread safe.
3319 3322 """
3320 3323 raise NotImplementedError()
3321 3324
3322 3325 class _zlibengine(compressionengine):
3323 3326 def name(self):
3324 3327 return 'zlib'
3325 3328
3326 3329 def bundletype(self):
3327 3330 return 'gzip', 'GZ'
3328 3331
3329 3332 def wireprotosupport(self):
3330 3333 return compewireprotosupport('zlib', 20, 20)
3331 3334
3332 3335 def revlogheader(self):
3333 3336 return 'x'
3334 3337
3335 3338 def compressstream(self, it, opts=None):
3336 3339 opts = opts or {}
3337 3340
3338 3341 z = zlib.compressobj(opts.get('level', -1))
3339 3342 for chunk in it:
3340 3343 data = z.compress(chunk)
3341 3344 # Not all calls to compress emit data. It is cheaper to inspect
3342 3345 # here than to feed empty chunks through generator.
3343 3346 if data:
3344 3347 yield data
3345 3348
3346 3349 yield z.flush()
3347 3350
3348 3351 def decompressorreader(self, fh):
3349 3352 def gen():
3350 3353 d = zlib.decompressobj()
3351 3354 for chunk in filechunkiter(fh):
3352 3355 while chunk:
3353 3356 # Limit output size to limit memory.
3354 3357 yield d.decompress(chunk, 2 ** 18)
3355 3358 chunk = d.unconsumed_tail
3356 3359
3357 3360 return chunkbuffer(gen())
3358 3361
3359 3362 class zlibrevlogcompressor(object):
3360 3363 def compress(self, data):
3361 3364 insize = len(data)
3362 3365 # Caller handles empty input case.
3363 3366 assert insize > 0
3364 3367
3365 3368 if insize < 44:
3366 3369 return None
3367 3370
3368 3371 elif insize <= 1000000:
3369 3372 compressed = zlib.compress(data)
3370 3373 if len(compressed) < insize:
3371 3374 return compressed
3372 3375 return None
3373 3376
3374 3377 # zlib makes an internal copy of the input buffer, doubling
3375 3378 # memory usage for large inputs. So do streaming compression
3376 3379 # on large inputs.
3377 3380 else:
3378 3381 z = zlib.compressobj()
3379 3382 parts = []
3380 3383 pos = 0
3381 3384 while pos < insize:
3382 3385 pos2 = pos + 2**20
3383 3386 parts.append(z.compress(data[pos:pos2]))
3384 3387 pos = pos2
3385 3388 parts.append(z.flush())
3386 3389
3387 3390 if sum(map(len, parts)) < insize:
3388 3391 return ''.join(parts)
3389 3392 return None
3390 3393
3391 3394 def decompress(self, data):
3392 3395 try:
3393 3396 return zlib.decompress(data)
3394 3397 except zlib.error as e:
3395 3398 raise error.RevlogError(_('revlog decompress error: %s') %
3396 3399 str(e))
3397 3400
3398 3401 def revlogcompressor(self, opts=None):
3399 3402 return self.zlibrevlogcompressor()
3400 3403
3401 3404 compengines.register(_zlibengine())
3402 3405
3403 3406 class _bz2engine(compressionengine):
3404 3407 def name(self):
3405 3408 return 'bz2'
3406 3409
3407 3410 def bundletype(self):
3408 3411 return 'bzip2', 'BZ'
3409 3412
3410 3413 # We declare a protocol name but don't advertise by default because
3411 3414 # it is slow.
3412 3415 def wireprotosupport(self):
3413 3416 return compewireprotosupport('bzip2', 0, 0)
3414 3417
3415 3418 def compressstream(self, it, opts=None):
3416 3419 opts = opts or {}
3417 3420 z = bz2.BZ2Compressor(opts.get('level', 9))
3418 3421 for chunk in it:
3419 3422 data = z.compress(chunk)
3420 3423 if data:
3421 3424 yield data
3422 3425
3423 3426 yield z.flush()
3424 3427
3425 3428 def decompressorreader(self, fh):
3426 3429 def gen():
3427 3430 d = bz2.BZ2Decompressor()
3428 3431 for chunk in filechunkiter(fh):
3429 3432 yield d.decompress(chunk)
3430 3433
3431 3434 return chunkbuffer(gen())
3432 3435
3433 3436 compengines.register(_bz2engine())
3434 3437
3435 3438 class _truncatedbz2engine(compressionengine):
3436 3439 def name(self):
3437 3440 return 'bz2truncated'
3438 3441
3439 3442 def bundletype(self):
3440 3443 return None, '_truncatedBZ'
3441 3444
3442 3445 # We don't implement compressstream because it is hackily handled elsewhere.
3443 3446
3444 3447 def decompressorreader(self, fh):
3445 3448 def gen():
3446 3449 # The input stream doesn't have the 'BZ' header. So add it back.
3447 3450 d = bz2.BZ2Decompressor()
3448 3451 d.decompress('BZ')
3449 3452 for chunk in filechunkiter(fh):
3450 3453 yield d.decompress(chunk)
3451 3454
3452 3455 return chunkbuffer(gen())
3453 3456
3454 3457 compengines.register(_truncatedbz2engine())
3455 3458
3456 3459 class _noopengine(compressionengine):
3457 3460 def name(self):
3458 3461 return 'none'
3459 3462
3460 3463 def bundletype(self):
3461 3464 return 'none', 'UN'
3462 3465
3463 3466 # Clients always support uncompressed payloads. Servers don't because
3464 3467 # unless you are on a fast network, uncompressed payloads can easily
3465 3468 # saturate your network pipe.
3466 3469 def wireprotosupport(self):
3467 3470 return compewireprotosupport('none', 0, 10)
3468 3471
3469 3472 # We don't implement revlogheader because it is handled specially
3470 3473 # in the revlog class.
3471 3474
3472 3475 def compressstream(self, it, opts=None):
3473 3476 return it
3474 3477
3475 3478 def decompressorreader(self, fh):
3476 3479 return fh
3477 3480
3478 3481 class nooprevlogcompressor(object):
3479 3482 def compress(self, data):
3480 3483 return None
3481 3484
3482 3485 def revlogcompressor(self, opts=None):
3483 3486 return self.nooprevlogcompressor()
3484 3487
3485 3488 compengines.register(_noopengine())
3486 3489
3487 3490 class _zstdengine(compressionengine):
3488 3491 def name(self):
3489 3492 return 'zstd'
3490 3493
3491 3494 @propertycache
3492 3495 def _module(self):
3493 3496 # Not all installs have the zstd module available. So defer importing
3494 3497 # until first access.
3495 3498 try:
3496 3499 from . import zstd
3497 3500 # Force delayed import.
3498 3501 zstd.__version__
3499 3502 return zstd
3500 3503 except ImportError:
3501 3504 return None
3502 3505
3503 3506 def available(self):
3504 3507 return bool(self._module)
3505 3508
3506 3509 def bundletype(self):
3507 3510 return 'zstd', 'ZS'
3508 3511
3509 3512 def wireprotosupport(self):
3510 3513 return compewireprotosupport('zstd', 50, 50)
3511 3514
3512 3515 def revlogheader(self):
3513 3516 return '\x28'
3514 3517
3515 3518 def compressstream(self, it, opts=None):
3516 3519 opts = opts or {}
3517 3520 # zstd level 3 is almost always significantly faster than zlib
3518 3521 # while providing no worse compression. It strikes a good balance
3519 3522 # between speed and compression.
3520 3523 level = opts.get('level', 3)
3521 3524
3522 3525 zstd = self._module
3523 3526 z = zstd.ZstdCompressor(level=level).compressobj()
3524 3527 for chunk in it:
3525 3528 data = z.compress(chunk)
3526 3529 if data:
3527 3530 yield data
3528 3531
3529 3532 yield z.flush()
3530 3533
3531 3534 def decompressorreader(self, fh):
3532 3535 zstd = self._module
3533 3536 dctx = zstd.ZstdDecompressor()
3534 3537 return chunkbuffer(dctx.read_from(fh))
3535 3538
3536 3539 class zstdrevlogcompressor(object):
3537 3540 def __init__(self, zstd, level=3):
3538 3541 # Writing the content size adds a few bytes to the output. However,
3539 3542 # it allows decompression to be more optimal since we can
3540 3543 # pre-allocate a buffer to hold the result.
3541 3544 self._cctx = zstd.ZstdCompressor(level=level,
3542 3545 write_content_size=True)
3543 3546 self._dctx = zstd.ZstdDecompressor()
3544 3547 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3545 3548 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3546 3549
3547 3550 def compress(self, data):
3548 3551 insize = len(data)
3549 3552 # Caller handles empty input case.
3550 3553 assert insize > 0
3551 3554
3552 3555 if insize < 50:
3553 3556 return None
3554 3557
3555 3558 elif insize <= 1000000:
3556 3559 compressed = self._cctx.compress(data)
3557 3560 if len(compressed) < insize:
3558 3561 return compressed
3559 3562 return None
3560 3563 else:
3561 3564 z = self._cctx.compressobj()
3562 3565 chunks = []
3563 3566 pos = 0
3564 3567 while pos < insize:
3565 3568 pos2 = pos + self._compinsize
3566 3569 chunk = z.compress(data[pos:pos2])
3567 3570 if chunk:
3568 3571 chunks.append(chunk)
3569 3572 pos = pos2
3570 3573 chunks.append(z.flush())
3571 3574
3572 3575 if sum(map(len, chunks)) < insize:
3573 3576 return ''.join(chunks)
3574 3577 return None
3575 3578
3576 3579 def decompress(self, data):
3577 3580 insize = len(data)
3578 3581
3579 3582 try:
3580 3583 # This was measured to be faster than other streaming
3581 3584 # decompressors.
3582 3585 dobj = self._dctx.decompressobj()
3583 3586 chunks = []
3584 3587 pos = 0
3585 3588 while pos < insize:
3586 3589 pos2 = pos + self._decompinsize
3587 3590 chunk = dobj.decompress(data[pos:pos2])
3588 3591 if chunk:
3589 3592 chunks.append(chunk)
3590 3593 pos = pos2
3591 3594 # Frame should be exhausted, so no finish() API.
3592 3595
3593 3596 return ''.join(chunks)
3594 3597 except Exception as e:
3595 3598 raise error.RevlogError(_('revlog decompress error: %s') %
3596 3599 str(e))
3597 3600
3598 3601 def revlogcompressor(self, opts=None):
3599 3602 opts = opts or {}
3600 3603 return self.zstdrevlogcompressor(self._module,
3601 3604 level=opts.get('level', 3))
3602 3605
3603 3606 compengines.register(_zstdengine())
3604 3607
3605 3608 # convenient shortcut
3606 3609 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now