##// END OF EJS Templates
hardlink: duplicate hardlink detection for copying files and directories...
Jun Wu -
r31719:456efd1b default
parent child Browse files
Show More
@@ -1,3632 +1,3636 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 username = platform.username
139 139
140 140 # Python compatibility
141 141
142 142 _notset = object()
143 143
144 144 # disable Python's problematic floating point timestamps (issue4836)
145 145 # (Python hypocritically says you shouldn't change this behavior in
146 146 # libraries, and sure enough Mercurial is not a library.)
147 147 os.stat_float_times(False)
148 148
149 149 def safehasattr(thing, attr):
150 150 return getattr(thing, attr, _notset) is not _notset
151 151
152 152 def bitsfrom(container):
153 153 bits = 0
154 154 for bit in container:
155 155 bits |= bit
156 156 return bits
157 157
158 158 DIGESTS = {
159 159 'md5': hashlib.md5,
160 160 'sha1': hashlib.sha1,
161 161 'sha512': hashlib.sha512,
162 162 }
163 163 # List of digest types from strongest to weakest
164 164 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
165 165
166 166 for k in DIGESTS_BY_STRENGTH:
167 167 assert k in DIGESTS
168 168
169 169 class digester(object):
170 170 """helper to compute digests.
171 171
172 172 This helper can be used to compute one or more digests given their name.
173 173
174 174 >>> d = digester(['md5', 'sha1'])
175 175 >>> d.update('foo')
176 176 >>> [k for k in sorted(d)]
177 177 ['md5', 'sha1']
178 178 >>> d['md5']
179 179 'acbd18db4cc2f85cedef654fccc4a4d8'
180 180 >>> d['sha1']
181 181 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
182 182 >>> digester.preferred(['md5', 'sha1'])
183 183 'sha1'
184 184 """
185 185
186 186 def __init__(self, digests, s=''):
187 187 self._hashes = {}
188 188 for k in digests:
189 189 if k not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 self._hashes[k] = DIGESTS[k]()
192 192 if s:
193 193 self.update(s)
194 194
195 195 def update(self, data):
196 196 for h in self._hashes.values():
197 197 h.update(data)
198 198
199 199 def __getitem__(self, key):
200 200 if key not in DIGESTS:
201 201 raise Abort(_('unknown digest type: %s') % k)
202 202 return self._hashes[key].hexdigest()
203 203
204 204 def __iter__(self):
205 205 return iter(self._hashes)
206 206
207 207 @staticmethod
208 208 def preferred(supported):
209 209 """returns the strongest digest type in both supported and DIGESTS."""
210 210
211 211 for k in DIGESTS_BY_STRENGTH:
212 212 if k in supported:
213 213 return k
214 214 return None
215 215
216 216 class digestchecker(object):
217 217 """file handle wrapper that additionally checks content against a given
218 218 size and digests.
219 219
220 220 d = digestchecker(fh, size, {'md5': '...'})
221 221
222 222 When multiple digests are given, all of them are validated.
223 223 """
224 224
225 225 def __init__(self, fh, size, digests):
226 226 self._fh = fh
227 227 self._size = size
228 228 self._got = 0
229 229 self._digests = dict(digests)
230 230 self._digester = digester(self._digests.keys())
231 231
232 232 def read(self, length=-1):
233 233 content = self._fh.read(length)
234 234 self._digester.update(content)
235 235 self._got += len(content)
236 236 return content
237 237
238 238 def validate(self):
239 239 if self._size != self._got:
240 240 raise Abort(_('size mismatch: expected %d, got %d') %
241 241 (self._size, self._got))
242 242 for k, v in self._digests.items():
243 243 if v != self._digester[k]:
244 244 # i18n: first parameter is a digest name
245 245 raise Abort(_('%s mismatch: expected %s, got %s') %
246 246 (k, v, self._digester[k]))
247 247
248 248 try:
249 249 buffer = buffer
250 250 except NameError:
251 251 if not pycompat.ispy3:
252 252 def buffer(sliceable, offset=0, length=None):
253 253 if length is not None:
254 254 return sliceable[offset:offset + length]
255 255 return sliceable[offset:]
256 256 else:
257 257 def buffer(sliceable, offset=0, length=None):
258 258 if length is not None:
259 259 return memoryview(sliceable)[offset:offset + length]
260 260 return memoryview(sliceable)[offset:]
261 261
262 262 closefds = pycompat.osname == 'posix'
263 263
264 264 _chunksize = 4096
265 265
266 266 class bufferedinputpipe(object):
267 267 """a manually buffered input pipe
268 268
269 269 Python will not let us use buffered IO and lazy reading with 'polling' at
270 270 the same time. We cannot probe the buffer state and select will not detect
271 271 that data are ready to read if they are already buffered.
272 272
273 273 This class let us work around that by implementing its own buffering
274 274 (allowing efficient readline) while offering a way to know if the buffer is
275 275 empty from the output (allowing collaboration of the buffer with polling).
276 276
277 277 This class lives in the 'util' module because it makes use of the 'os'
278 278 module from the python stdlib.
279 279 """
280 280
281 281 def __init__(self, input):
282 282 self._input = input
283 283 self._buffer = []
284 284 self._eof = False
285 285 self._lenbuf = 0
286 286
287 287 @property
288 288 def hasbuffer(self):
289 289 """True is any data is currently buffered
290 290
291 291 This will be used externally a pre-step for polling IO. If there is
292 292 already data then no polling should be set in place."""
293 293 return bool(self._buffer)
294 294
295 295 @property
296 296 def closed(self):
297 297 return self._input.closed
298 298
299 299 def fileno(self):
300 300 return self._input.fileno()
301 301
302 302 def close(self):
303 303 return self._input.close()
304 304
305 305 def read(self, size):
306 306 while (not self._eof) and (self._lenbuf < size):
307 307 self._fillbuffer()
308 308 return self._frombuffer(size)
309 309
310 310 def readline(self, *args, **kwargs):
311 311 if 1 < len(self._buffer):
312 312 # this should not happen because both read and readline end with a
313 313 # _frombuffer call that collapse it.
314 314 self._buffer = [''.join(self._buffer)]
315 315 self._lenbuf = len(self._buffer[0])
316 316 lfi = -1
317 317 if self._buffer:
318 318 lfi = self._buffer[-1].find('\n')
319 319 while (not self._eof) and lfi < 0:
320 320 self._fillbuffer()
321 321 if self._buffer:
322 322 lfi = self._buffer[-1].find('\n')
323 323 size = lfi + 1
324 324 if lfi < 0: # end of file
325 325 size = self._lenbuf
326 326 elif 1 < len(self._buffer):
327 327 # we need to take previous chunks into account
328 328 size += self._lenbuf - len(self._buffer[-1])
329 329 return self._frombuffer(size)
330 330
331 331 def _frombuffer(self, size):
332 332 """return at most 'size' data from the buffer
333 333
334 334 The data are removed from the buffer."""
335 335 if size == 0 or not self._buffer:
336 336 return ''
337 337 buf = self._buffer[0]
338 338 if 1 < len(self._buffer):
339 339 buf = ''.join(self._buffer)
340 340
341 341 data = buf[:size]
342 342 buf = buf[len(data):]
343 343 if buf:
344 344 self._buffer = [buf]
345 345 self._lenbuf = len(buf)
346 346 else:
347 347 self._buffer = []
348 348 self._lenbuf = 0
349 349 return data
350 350
351 351 def _fillbuffer(self):
352 352 """read data to the buffer"""
353 353 data = os.read(self._input.fileno(), _chunksize)
354 354 if not data:
355 355 self._eof = True
356 356 else:
357 357 self._lenbuf += len(data)
358 358 self._buffer.append(data)
359 359
360 360 def popen2(cmd, env=None, newlines=False):
361 361 # Setting bufsize to -1 lets the system decide the buffer size.
362 362 # The default for bufsize is 0, meaning unbuffered. This leads to
363 363 # poor performance on Mac OS X: http://bugs.python.org/issue4194
364 364 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
365 365 close_fds=closefds,
366 366 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
367 367 universal_newlines=newlines,
368 368 env=env)
369 369 return p.stdin, p.stdout
370 370
371 371 def popen3(cmd, env=None, newlines=False):
372 372 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
373 373 return stdin, stdout, stderr
374 374
375 375 def popen4(cmd, env=None, newlines=False, bufsize=-1):
376 376 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
377 377 close_fds=closefds,
378 378 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
379 379 stderr=subprocess.PIPE,
380 380 universal_newlines=newlines,
381 381 env=env)
382 382 return p.stdin, p.stdout, p.stderr, p
383 383
384 384 def version():
385 385 """Return version information if available."""
386 386 try:
387 387 from . import __version__
388 388 return __version__.version
389 389 except ImportError:
390 390 return 'unknown'
391 391
392 392 def versiontuple(v=None, n=4):
393 393 """Parses a Mercurial version string into an N-tuple.
394 394
395 395 The version string to be parsed is specified with the ``v`` argument.
396 396 If it isn't defined, the current Mercurial version string will be parsed.
397 397
398 398 ``n`` can be 2, 3, or 4. Here is how some version strings map to
399 399 returned values:
400 400
401 401 >>> v = '3.6.1+190-df9b73d2d444'
402 402 >>> versiontuple(v, 2)
403 403 (3, 6)
404 404 >>> versiontuple(v, 3)
405 405 (3, 6, 1)
406 406 >>> versiontuple(v, 4)
407 407 (3, 6, 1, '190-df9b73d2d444')
408 408
409 409 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
410 410 (3, 6, 1, '190-df9b73d2d444+20151118')
411 411
412 412 >>> v = '3.6'
413 413 >>> versiontuple(v, 2)
414 414 (3, 6)
415 415 >>> versiontuple(v, 3)
416 416 (3, 6, None)
417 417 >>> versiontuple(v, 4)
418 418 (3, 6, None, None)
419 419
420 420 >>> v = '3.9-rc'
421 421 >>> versiontuple(v, 2)
422 422 (3, 9)
423 423 >>> versiontuple(v, 3)
424 424 (3, 9, None)
425 425 >>> versiontuple(v, 4)
426 426 (3, 9, None, 'rc')
427 427
428 428 >>> v = '3.9-rc+2-02a8fea4289b'
429 429 >>> versiontuple(v, 2)
430 430 (3, 9)
431 431 >>> versiontuple(v, 3)
432 432 (3, 9, None)
433 433 >>> versiontuple(v, 4)
434 434 (3, 9, None, 'rc+2-02a8fea4289b')
435 435 """
436 436 if not v:
437 437 v = version()
438 438 parts = remod.split('[\+-]', v, 1)
439 439 if len(parts) == 1:
440 440 vparts, extra = parts[0], None
441 441 else:
442 442 vparts, extra = parts
443 443
444 444 vints = []
445 445 for i in vparts.split('.'):
446 446 try:
447 447 vints.append(int(i))
448 448 except ValueError:
449 449 break
450 450 # (3, 6) -> (3, 6, None)
451 451 while len(vints) < 3:
452 452 vints.append(None)
453 453
454 454 if n == 2:
455 455 return (vints[0], vints[1])
456 456 if n == 3:
457 457 return (vints[0], vints[1], vints[2])
458 458 if n == 4:
459 459 return (vints[0], vints[1], vints[2], extra)
460 460
461 461 # used by parsedate
462 462 defaultdateformats = (
463 463 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
464 464 '%Y-%m-%dT%H:%M', # without seconds
465 465 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
466 466 '%Y-%m-%dT%H%M', # without seconds
467 467 '%Y-%m-%d %H:%M:%S', # our common legal variant
468 468 '%Y-%m-%d %H:%M', # without seconds
469 469 '%Y-%m-%d %H%M%S', # without :
470 470 '%Y-%m-%d %H%M', # without seconds
471 471 '%Y-%m-%d %I:%M:%S%p',
472 472 '%Y-%m-%d %H:%M',
473 473 '%Y-%m-%d %I:%M%p',
474 474 '%Y-%m-%d',
475 475 '%m-%d',
476 476 '%m/%d',
477 477 '%m/%d/%y',
478 478 '%m/%d/%Y',
479 479 '%a %b %d %H:%M:%S %Y',
480 480 '%a %b %d %I:%M:%S%p %Y',
481 481 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
482 482 '%b %d %H:%M:%S %Y',
483 483 '%b %d %I:%M:%S%p %Y',
484 484 '%b %d %H:%M:%S',
485 485 '%b %d %I:%M:%S%p',
486 486 '%b %d %H:%M',
487 487 '%b %d %I:%M%p',
488 488 '%b %d %Y',
489 489 '%b %d',
490 490 '%H:%M:%S',
491 491 '%I:%M:%S%p',
492 492 '%H:%M',
493 493 '%I:%M%p',
494 494 )
495 495
496 496 extendeddateformats = defaultdateformats + (
497 497 "%Y",
498 498 "%Y-%m",
499 499 "%b",
500 500 "%b %Y",
501 501 )
502 502
503 503 def cachefunc(func):
504 504 '''cache the result of function calls'''
505 505 # XXX doesn't handle keywords args
506 506 if func.__code__.co_argcount == 0:
507 507 cache = []
508 508 def f():
509 509 if len(cache) == 0:
510 510 cache.append(func())
511 511 return cache[0]
512 512 return f
513 513 cache = {}
514 514 if func.__code__.co_argcount == 1:
515 515 # we gain a small amount of time because
516 516 # we don't need to pack/unpack the list
517 517 def f(arg):
518 518 if arg not in cache:
519 519 cache[arg] = func(arg)
520 520 return cache[arg]
521 521 else:
522 522 def f(*args):
523 523 if args not in cache:
524 524 cache[args] = func(*args)
525 525 return cache[args]
526 526
527 527 return f
528 528
529 529 class sortdict(dict):
530 530 '''a simple sorted dictionary'''
531 531 def __init__(self, data=None):
532 532 self._list = []
533 533 if data:
534 534 self.update(data)
535 535 def copy(self):
536 536 return sortdict(self)
537 537 def __setitem__(self, key, val):
538 538 if key in self:
539 539 self._list.remove(key)
540 540 self._list.append(key)
541 541 dict.__setitem__(self, key, val)
542 542 def __iter__(self):
543 543 return self._list.__iter__()
544 544 def update(self, src):
545 545 if isinstance(src, dict):
546 546 src = src.iteritems()
547 547 for k, v in src:
548 548 self[k] = v
549 549 def clear(self):
550 550 dict.clear(self)
551 551 self._list = []
552 552 def items(self):
553 553 return [(k, self[k]) for k in self._list]
554 554 def __delitem__(self, key):
555 555 dict.__delitem__(self, key)
556 556 self._list.remove(key)
557 557 def pop(self, key, *args, **kwargs):
558 558 dict.pop(self, key, *args, **kwargs)
559 559 try:
560 560 self._list.remove(key)
561 561 except ValueError:
562 562 pass
563 563 def keys(self):
564 564 return self._list[:]
565 565 def iterkeys(self):
566 566 return self._list.__iter__()
567 567 def iteritems(self):
568 568 for k in self._list:
569 569 yield k, self[k]
570 570 def insert(self, index, key, val):
571 571 self._list.insert(index, key)
572 572 dict.__setitem__(self, key, val)
573 573 def __repr__(self):
574 574 if not self:
575 575 return '%s()' % self.__class__.__name__
576 576 return '%s(%r)' % (self.__class__.__name__, self.items())
577 577
578 578 class _lrucachenode(object):
579 579 """A node in a doubly linked list.
580 580
581 581 Holds a reference to nodes on either side as well as a key-value
582 582 pair for the dictionary entry.
583 583 """
584 584 __slots__ = (u'next', u'prev', u'key', u'value')
585 585
586 586 def __init__(self):
587 587 self.next = None
588 588 self.prev = None
589 589
590 590 self.key = _notset
591 591 self.value = None
592 592
593 593 def markempty(self):
594 594 """Mark the node as emptied."""
595 595 self.key = _notset
596 596
597 597 class lrucachedict(object):
598 598 """Dict that caches most recent accesses and sets.
599 599
600 600 The dict consists of an actual backing dict - indexed by original
601 601 key - and a doubly linked circular list defining the order of entries in
602 602 the cache.
603 603
604 604 The head node is the newest entry in the cache. If the cache is full,
605 605 we recycle head.prev and make it the new head. Cache accesses result in
606 606 the node being moved to before the existing head and being marked as the
607 607 new head node.
608 608 """
609 609 def __init__(self, max):
610 610 self._cache = {}
611 611
612 612 self._head = head = _lrucachenode()
613 613 head.prev = head
614 614 head.next = head
615 615 self._size = 1
616 616 self._capacity = max
617 617
618 618 def __len__(self):
619 619 return len(self._cache)
620 620
621 621 def __contains__(self, k):
622 622 return k in self._cache
623 623
624 624 def __iter__(self):
625 625 # We don't have to iterate in cache order, but why not.
626 626 n = self._head
627 627 for i in range(len(self._cache)):
628 628 yield n.key
629 629 n = n.next
630 630
631 631 def __getitem__(self, k):
632 632 node = self._cache[k]
633 633 self._movetohead(node)
634 634 return node.value
635 635
636 636 def __setitem__(self, k, v):
637 637 node = self._cache.get(k)
638 638 # Replace existing value and mark as newest.
639 639 if node is not None:
640 640 node.value = v
641 641 self._movetohead(node)
642 642 return
643 643
644 644 if self._size < self._capacity:
645 645 node = self._addcapacity()
646 646 else:
647 647 # Grab the last/oldest item.
648 648 node = self._head.prev
649 649
650 650 # At capacity. Kill the old entry.
651 651 if node.key is not _notset:
652 652 del self._cache[node.key]
653 653
654 654 node.key = k
655 655 node.value = v
656 656 self._cache[k] = node
657 657 # And mark it as newest entry. No need to adjust order since it
658 658 # is already self._head.prev.
659 659 self._head = node
660 660
661 661 def __delitem__(self, k):
662 662 node = self._cache.pop(k)
663 663 node.markempty()
664 664
665 665 # Temporarily mark as newest item before re-adjusting head to make
666 666 # this node the oldest item.
667 667 self._movetohead(node)
668 668 self._head = node.next
669 669
670 670 # Additional dict methods.
671 671
672 672 def get(self, k, default=None):
673 673 try:
674 674 return self._cache[k].value
675 675 except KeyError:
676 676 return default
677 677
678 678 def clear(self):
679 679 n = self._head
680 680 while n.key is not _notset:
681 681 n.markempty()
682 682 n = n.next
683 683
684 684 self._cache.clear()
685 685
686 686 def copy(self):
687 687 result = lrucachedict(self._capacity)
688 688 n = self._head.prev
689 689 # Iterate in oldest-to-newest order, so the copy has the right ordering
690 690 for i in range(len(self._cache)):
691 691 result[n.key] = n.value
692 692 n = n.prev
693 693 return result
694 694
695 695 def _movetohead(self, node):
696 696 """Mark a node as the newest, making it the new head.
697 697
698 698 When a node is accessed, it becomes the freshest entry in the LRU
699 699 list, which is denoted by self._head.
700 700
701 701 Visually, let's make ``N`` the new head node (* denotes head):
702 702
703 703 previous/oldest <-> head <-> next/next newest
704 704
705 705 ----<->--- A* ---<->-----
706 706 | |
707 707 E <-> D <-> N <-> C <-> B
708 708
709 709 To:
710 710
711 711 ----<->--- N* ---<->-----
712 712 | |
713 713 E <-> D <-> C <-> B <-> A
714 714
715 715 This requires the following moves:
716 716
717 717 C.next = D (node.prev.next = node.next)
718 718 D.prev = C (node.next.prev = node.prev)
719 719 E.next = N (head.prev.next = node)
720 720 N.prev = E (node.prev = head.prev)
721 721 N.next = A (node.next = head)
722 722 A.prev = N (head.prev = node)
723 723 """
724 724 head = self._head
725 725 # C.next = D
726 726 node.prev.next = node.next
727 727 # D.prev = C
728 728 node.next.prev = node.prev
729 729 # N.prev = E
730 730 node.prev = head.prev
731 731 # N.next = A
732 732 # It is tempting to do just "head" here, however if node is
733 733 # adjacent to head, this will do bad things.
734 734 node.next = head.prev.next
735 735 # E.next = N
736 736 node.next.prev = node
737 737 # A.prev = N
738 738 node.prev.next = node
739 739
740 740 self._head = node
741 741
742 742 def _addcapacity(self):
743 743 """Add a node to the circular linked list.
744 744
745 745 The new node is inserted before the head node.
746 746 """
747 747 head = self._head
748 748 node = _lrucachenode()
749 749 head.prev.next = node
750 750 node.prev = head.prev
751 751 node.next = head
752 752 head.prev = node
753 753 self._size += 1
754 754 return node
755 755
756 756 def lrucachefunc(func):
757 757 '''cache most recent results of function calls'''
758 758 cache = {}
759 759 order = collections.deque()
760 760 if func.__code__.co_argcount == 1:
761 761 def f(arg):
762 762 if arg not in cache:
763 763 if len(cache) > 20:
764 764 del cache[order.popleft()]
765 765 cache[arg] = func(arg)
766 766 else:
767 767 order.remove(arg)
768 768 order.append(arg)
769 769 return cache[arg]
770 770 else:
771 771 def f(*args):
772 772 if args not in cache:
773 773 if len(cache) > 20:
774 774 del cache[order.popleft()]
775 775 cache[args] = func(*args)
776 776 else:
777 777 order.remove(args)
778 778 order.append(args)
779 779 return cache[args]
780 780
781 781 return f
782 782
783 783 class propertycache(object):
784 784 def __init__(self, func):
785 785 self.func = func
786 786 self.name = func.__name__
787 787 def __get__(self, obj, type=None):
788 788 result = self.func(obj)
789 789 self.cachevalue(obj, result)
790 790 return result
791 791
792 792 def cachevalue(self, obj, value):
793 793 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
794 794 obj.__dict__[self.name] = value
795 795
796 796 def pipefilter(s, cmd):
797 797 '''filter string S through command CMD, returning its output'''
798 798 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
799 799 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
800 800 pout, perr = p.communicate(s)
801 801 return pout
802 802
803 803 def tempfilter(s, cmd):
804 804 '''filter string S through a pair of temporary files with CMD.
805 805 CMD is used as a template to create the real command to be run,
806 806 with the strings INFILE and OUTFILE replaced by the real names of
807 807 the temporary files generated.'''
808 808 inname, outname = None, None
809 809 try:
810 810 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
811 811 fp = os.fdopen(infd, pycompat.sysstr('wb'))
812 812 fp.write(s)
813 813 fp.close()
814 814 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
815 815 os.close(outfd)
816 816 cmd = cmd.replace('INFILE', inname)
817 817 cmd = cmd.replace('OUTFILE', outname)
818 818 code = os.system(cmd)
819 819 if pycompat.sysplatform == 'OpenVMS' and code & 1:
820 820 code = 0
821 821 if code:
822 822 raise Abort(_("command '%s' failed: %s") %
823 823 (cmd, explainexit(code)))
824 824 return readfile(outname)
825 825 finally:
826 826 try:
827 827 if inname:
828 828 os.unlink(inname)
829 829 except OSError:
830 830 pass
831 831 try:
832 832 if outname:
833 833 os.unlink(outname)
834 834 except OSError:
835 835 pass
836 836
837 837 filtertable = {
838 838 'tempfile:': tempfilter,
839 839 'pipe:': pipefilter,
840 840 }
841 841
842 842 def filter(s, cmd):
843 843 "filter a string through a command that transforms its input to its output"
844 844 for name, fn in filtertable.iteritems():
845 845 if cmd.startswith(name):
846 846 return fn(s, cmd[len(name):].lstrip())
847 847 return pipefilter(s, cmd)
848 848
849 849 def binary(s):
850 850 """return true if a string is binary data"""
851 851 return bool(s and '\0' in s)
852 852
853 853 def increasingchunks(source, min=1024, max=65536):
854 854 '''return no less than min bytes per chunk while data remains,
855 855 doubling min after each chunk until it reaches max'''
856 856 def log2(x):
857 857 if not x:
858 858 return 0
859 859 i = 0
860 860 while x:
861 861 x >>= 1
862 862 i += 1
863 863 return i - 1
864 864
865 865 buf = []
866 866 blen = 0
867 867 for chunk in source:
868 868 buf.append(chunk)
869 869 blen += len(chunk)
870 870 if blen >= min:
871 871 if min < max:
872 872 min = min << 1
873 873 nmin = 1 << log2(blen)
874 874 if nmin > min:
875 875 min = nmin
876 876 if min > max:
877 877 min = max
878 878 yield ''.join(buf)
879 879 blen = 0
880 880 buf = []
881 881 if buf:
882 882 yield ''.join(buf)
883 883
884 884 Abort = error.Abort
885 885
886 886 def always(fn):
887 887 return True
888 888
889 889 def never(fn):
890 890 return False
891 891
892 892 def nogc(func):
893 893 """disable garbage collector
894 894
895 895 Python's garbage collector triggers a GC each time a certain number of
896 896 container objects (the number being defined by gc.get_threshold()) are
897 897 allocated even when marked not to be tracked by the collector. Tracking has
898 898 no effect on when GCs are triggered, only on what objects the GC looks
899 899 into. As a workaround, disable GC while building complex (huge)
900 900 containers.
901 901
902 902 This garbage collector issue have been fixed in 2.7.
903 903 """
904 904 if sys.version_info >= (2, 7):
905 905 return func
906 906 def wrapper(*args, **kwargs):
907 907 gcenabled = gc.isenabled()
908 908 gc.disable()
909 909 try:
910 910 return func(*args, **kwargs)
911 911 finally:
912 912 if gcenabled:
913 913 gc.enable()
914 914 return wrapper
915 915
916 916 def pathto(root, n1, n2):
917 917 '''return the relative path from one place to another.
918 918 root should use os.sep to separate directories
919 919 n1 should use os.sep to separate directories
920 920 n2 should use "/" to separate directories
921 921 returns an os.sep-separated path.
922 922
923 923 If n1 is a relative path, it's assumed it's
924 924 relative to root.
925 925 n2 should always be relative to root.
926 926 '''
927 927 if not n1:
928 928 return localpath(n2)
929 929 if os.path.isabs(n1):
930 930 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
931 931 return os.path.join(root, localpath(n2))
932 932 n2 = '/'.join((pconvert(root), n2))
933 933 a, b = splitpath(n1), n2.split('/')
934 934 a.reverse()
935 935 b.reverse()
936 936 while a and b and a[-1] == b[-1]:
937 937 a.pop()
938 938 b.pop()
939 939 b.reverse()
940 940 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
941 941
942 942 def mainfrozen():
943 943 """return True if we are a frozen executable.
944 944
945 945 The code supports py2exe (most common, Windows only) and tools/freeze
946 946 (portable, not much used).
947 947 """
948 948 return (safehasattr(sys, "frozen") or # new py2exe
949 949 safehasattr(sys, "importers") or # old py2exe
950 950 imp.is_frozen(u"__main__")) # tools/freeze
951 951
952 952 # the location of data files matching the source code
953 953 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
954 954 # executable version (py2exe) doesn't support __file__
955 955 datapath = os.path.dirname(pycompat.sysexecutable)
956 956 else:
957 957 datapath = os.path.dirname(pycompat.fsencode(__file__))
958 958
959 959 i18n.setdatapath(datapath)
960 960
961 961 _hgexecutable = None
962 962
963 963 def hgexecutable():
964 964 """return location of the 'hg' executable.
965 965
966 966 Defaults to $HG or 'hg' in the search path.
967 967 """
968 968 if _hgexecutable is None:
969 969 hg = encoding.environ.get('HG')
970 970 mainmod = sys.modules[pycompat.sysstr('__main__')]
971 971 if hg:
972 972 _sethgexecutable(hg)
973 973 elif mainfrozen():
974 974 if getattr(sys, 'frozen', None) == 'macosx_app':
975 975 # Env variable set by py2app
976 976 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
977 977 else:
978 978 _sethgexecutable(pycompat.sysexecutable)
979 979 elif (os.path.basename(
980 980 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
981 981 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
982 982 else:
983 983 exe = findexe('hg') or os.path.basename(sys.argv[0])
984 984 _sethgexecutable(exe)
985 985 return _hgexecutable
986 986
987 987 def _sethgexecutable(path):
988 988 """set location of the 'hg' executable"""
989 989 global _hgexecutable
990 990 _hgexecutable = path
991 991
992 992 def _isstdout(f):
993 993 fileno = getattr(f, 'fileno', None)
994 994 return fileno and fileno() == sys.__stdout__.fileno()
995 995
996 996 def shellenviron(environ=None):
997 997 """return environ with optional override, useful for shelling out"""
998 998 def py2shell(val):
999 999 'convert python object into string that is useful to shell'
1000 1000 if val is None or val is False:
1001 1001 return '0'
1002 1002 if val is True:
1003 1003 return '1'
1004 1004 return str(val)
1005 1005 env = dict(encoding.environ)
1006 1006 if environ:
1007 1007 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1008 1008 env['HG'] = hgexecutable()
1009 1009 return env
1010 1010
1011 1011 def system(cmd, environ=None, cwd=None, out=None):
1012 1012 '''enhanced shell command execution.
1013 1013 run with environment maybe modified, maybe in different dir.
1014 1014
1015 1015 if out is specified, it is assumed to be a file-like object that has a
1016 1016 write() method. stdout and stderr will be redirected to out.'''
1017 1017 try:
1018 1018 stdout.flush()
1019 1019 except Exception:
1020 1020 pass
1021 1021 cmd = quotecommand(cmd)
1022 1022 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1023 1023 and sys.version_info[1] < 7):
1024 1024 # subprocess kludge to work around issues in half-baked Python
1025 1025 # ports, notably bichued/python:
1026 1026 if not cwd is None:
1027 1027 os.chdir(cwd)
1028 1028 rc = os.system(cmd)
1029 1029 else:
1030 1030 env = shellenviron(environ)
1031 1031 if out is None or _isstdout(out):
1032 1032 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1033 1033 env=env, cwd=cwd)
1034 1034 else:
1035 1035 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1036 1036 env=env, cwd=cwd, stdout=subprocess.PIPE,
1037 1037 stderr=subprocess.STDOUT)
1038 1038 for line in iter(proc.stdout.readline, ''):
1039 1039 out.write(line)
1040 1040 proc.wait()
1041 1041 rc = proc.returncode
1042 1042 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1043 1043 rc = 0
1044 1044 return rc
1045 1045
1046 1046 def checksignature(func):
1047 1047 '''wrap a function with code to check for calling errors'''
1048 1048 def check(*args, **kwargs):
1049 1049 try:
1050 1050 return func(*args, **kwargs)
1051 1051 except TypeError:
1052 1052 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1053 1053 raise error.SignatureError
1054 1054 raise
1055 1055
1056 1056 return check
1057 1057
1058 1058 # a whilelist of known filesystems where hardlink works reliably
1059 1059 _hardlinkfswhitelist = set([
1060 1060 'btrfs',
1061 1061 'ext2',
1062 1062 'ext3',
1063 1063 'ext4',
1064 1064 'hfs',
1065 1065 'jfs',
1066 1066 'reiserfs',
1067 1067 'tmpfs',
1068 1068 'ufs',
1069 1069 'xfs',
1070 1070 'zfs',
1071 1071 ])
1072 1072
1073 1073 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1074 1074 '''copy a file, preserving mode and optionally other stat info like
1075 1075 atime/mtime
1076 1076
1077 1077 checkambig argument is used with filestat, and is useful only if
1078 1078 destination file is guarded by any lock (e.g. repo.lock or
1079 1079 repo.wlock).
1080 1080
1081 1081 copystat and checkambig should be exclusive.
1082 1082 '''
1083 1083 assert not (copystat and checkambig)
1084 1084 oldstat = None
1085 1085 if os.path.lexists(dest):
1086 1086 if checkambig:
1087 1087 oldstat = checkambig and filestat(dest)
1088 1088 unlink(dest)
1089 1089 if hardlink:
1090 1090 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1091 1091 # unless we are confident that dest is on a whitelisted filesystem.
1092 1092 try:
1093 1093 fstype = getfstype(os.path.dirname(dest))
1094 1094 except OSError:
1095 1095 fstype = None
1096 1096 if fstype not in _hardlinkfswhitelist:
1097 1097 hardlink = False
1098 1098 if hardlink:
1099 1099 try:
1100 1100 oslink(src, dest)
1101 1101 return
1102 1102 except (IOError, OSError):
1103 1103 pass # fall back to normal copy
1104 1104 if os.path.islink(src):
1105 1105 os.symlink(os.readlink(src), dest)
1106 1106 # copytime is ignored for symlinks, but in general copytime isn't needed
1107 1107 # for them anyway
1108 1108 else:
1109 1109 try:
1110 1110 shutil.copyfile(src, dest)
1111 1111 if copystat:
1112 1112 # copystat also copies mode
1113 1113 shutil.copystat(src, dest)
1114 1114 else:
1115 1115 shutil.copymode(src, dest)
1116 1116 if oldstat and oldstat.stat:
1117 1117 newstat = filestat(dest)
1118 1118 if newstat.isambig(oldstat):
1119 1119 # stat of copied file is ambiguous to original one
1120 1120 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1121 1121 os.utime(dest, (advanced, advanced))
1122 1122 except shutil.Error as inst:
1123 1123 raise Abort(str(inst))
1124 1124
1125 1125 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1126 1126 """Copy a directory tree using hardlinks if possible."""
1127 1127 num = 0
1128 1128
1129 if hardlink is None:
1130 hardlink = (os.stat(src).st_dev ==
1131 os.stat(os.path.dirname(dst)).st_dev)
1132
1133 1129 gettopic = lambda: hardlink and _('linking') or _('copying')
1134 topic = gettopic()
1135 1130
1136 1131 if os.path.isdir(src):
1132 if hardlink is None:
1133 hardlink = (os.stat(src).st_dev ==
1134 os.stat(os.path.dirname(dst)).st_dev)
1135 topic = gettopic()
1137 1136 os.mkdir(dst)
1138 1137 for name, kind in osutil.listdir(src):
1139 1138 srcname = os.path.join(src, name)
1140 1139 dstname = os.path.join(dst, name)
1141 1140 def nprog(t, pos):
1142 1141 if pos is not None:
1143 1142 return progress(t, pos + num)
1144 1143 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1145 1144 num += n
1146 1145 else:
1146 if hardlink is None:
1147 hardlink = (os.stat(src).st_dev ==
1148 os.stat(os.path.dirname(dst)).st_dev)
1149 topic = gettopic()
1150
1147 1151 if hardlink:
1148 1152 try:
1149 1153 oslink(src, dst)
1150 1154 except (IOError, OSError):
1151 1155 hardlink = False
1152 1156 shutil.copy(src, dst)
1153 1157 else:
1154 1158 shutil.copy(src, dst)
1155 1159 num += 1
1156 1160 progress(topic, num)
1157 1161 progress(topic, None)
1158 1162
1159 1163 return hardlink, num
1160 1164
1161 1165 _winreservednames = '''con prn aux nul
1162 1166 com1 com2 com3 com4 com5 com6 com7 com8 com9
1163 1167 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1164 1168 _winreservedchars = ':*?"<>|'
1165 1169 def checkwinfilename(path):
1166 1170 r'''Check that the base-relative path is a valid filename on Windows.
1167 1171 Returns None if the path is ok, or a UI string describing the problem.
1168 1172
1169 1173 >>> checkwinfilename("just/a/normal/path")
1170 1174 >>> checkwinfilename("foo/bar/con.xml")
1171 1175 "filename contains 'con', which is reserved on Windows"
1172 1176 >>> checkwinfilename("foo/con.xml/bar")
1173 1177 "filename contains 'con', which is reserved on Windows"
1174 1178 >>> checkwinfilename("foo/bar/xml.con")
1175 1179 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1176 1180 "filename contains 'AUX', which is reserved on Windows"
1177 1181 >>> checkwinfilename("foo/bar/bla:.txt")
1178 1182 "filename contains ':', which is reserved on Windows"
1179 1183 >>> checkwinfilename("foo/bar/b\07la.txt")
1180 1184 "filename contains '\\x07', which is invalid on Windows"
1181 1185 >>> checkwinfilename("foo/bar/bla ")
1182 1186 "filename ends with ' ', which is not allowed on Windows"
1183 1187 >>> checkwinfilename("../bar")
1184 1188 >>> checkwinfilename("foo\\")
1185 1189 "filename ends with '\\', which is invalid on Windows"
1186 1190 >>> checkwinfilename("foo\\/bar")
1187 1191 "directory name ends with '\\', which is invalid on Windows"
1188 1192 '''
1189 1193 if path.endswith('\\'):
1190 1194 return _("filename ends with '\\', which is invalid on Windows")
1191 1195 if '\\/' in path:
1192 1196 return _("directory name ends with '\\', which is invalid on Windows")
1193 1197 for n in path.replace('\\', '/').split('/'):
1194 1198 if not n:
1195 1199 continue
1196 1200 for c in pycompat.bytestr(n):
1197 1201 if c in _winreservedchars:
1198 1202 return _("filename contains '%s', which is reserved "
1199 1203 "on Windows") % c
1200 1204 if ord(c) <= 31:
1201 1205 return _("filename contains %r, which is invalid "
1202 1206 "on Windows") % c
1203 1207 base = n.split('.')[0]
1204 1208 if base and base.lower() in _winreservednames:
1205 1209 return _("filename contains '%s', which is reserved "
1206 1210 "on Windows") % base
1207 1211 t = n[-1]
1208 1212 if t in '. ' and n not in '..':
1209 1213 return _("filename ends with '%s', which is not allowed "
1210 1214 "on Windows") % t
1211 1215
1212 1216 if pycompat.osname == 'nt':
1213 1217 checkosfilename = checkwinfilename
1214 1218 timer = time.clock
1215 1219 else:
1216 1220 checkosfilename = platform.checkosfilename
1217 1221 timer = time.time
1218 1222
1219 1223 if safehasattr(time, "perf_counter"):
1220 1224 timer = time.perf_counter
1221 1225
1222 1226 def makelock(info, pathname):
1223 1227 try:
1224 1228 return os.symlink(info, pathname)
1225 1229 except OSError as why:
1226 1230 if why.errno == errno.EEXIST:
1227 1231 raise
1228 1232 except AttributeError: # no symlink in os
1229 1233 pass
1230 1234
1231 1235 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1232 1236 os.write(ld, info)
1233 1237 os.close(ld)
1234 1238
1235 1239 def readlock(pathname):
1236 1240 try:
1237 1241 return os.readlink(pathname)
1238 1242 except OSError as why:
1239 1243 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1240 1244 raise
1241 1245 except AttributeError: # no symlink in os
1242 1246 pass
1243 1247 fp = posixfile(pathname)
1244 1248 r = fp.read()
1245 1249 fp.close()
1246 1250 return r
1247 1251
1248 1252 def fstat(fp):
1249 1253 '''stat file object that may not have fileno method.'''
1250 1254 try:
1251 1255 return os.fstat(fp.fileno())
1252 1256 except AttributeError:
1253 1257 return os.stat(fp.name)
1254 1258
1255 1259 # File system features
1256 1260
1257 1261 def fscasesensitive(path):
1258 1262 """
1259 1263 Return true if the given path is on a case-sensitive filesystem
1260 1264
1261 1265 Requires a path (like /foo/.hg) ending with a foldable final
1262 1266 directory component.
1263 1267 """
1264 1268 s1 = os.lstat(path)
1265 1269 d, b = os.path.split(path)
1266 1270 b2 = b.upper()
1267 1271 if b == b2:
1268 1272 b2 = b.lower()
1269 1273 if b == b2:
1270 1274 return True # no evidence against case sensitivity
1271 1275 p2 = os.path.join(d, b2)
1272 1276 try:
1273 1277 s2 = os.lstat(p2)
1274 1278 if s2 == s1:
1275 1279 return False
1276 1280 return True
1277 1281 except OSError:
1278 1282 return True
1279 1283
1280 1284 try:
1281 1285 import re2
1282 1286 _re2 = None
1283 1287 except ImportError:
1284 1288 _re2 = False
1285 1289
1286 1290 class _re(object):
1287 1291 def _checkre2(self):
1288 1292 global _re2
1289 1293 try:
1290 1294 # check if match works, see issue3964
1291 1295 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1292 1296 except ImportError:
1293 1297 _re2 = False
1294 1298
1295 1299 def compile(self, pat, flags=0):
1296 1300 '''Compile a regular expression, using re2 if possible
1297 1301
1298 1302 For best performance, use only re2-compatible regexp features. The
1299 1303 only flags from the re module that are re2-compatible are
1300 1304 IGNORECASE and MULTILINE.'''
1301 1305 if _re2 is None:
1302 1306 self._checkre2()
1303 1307 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1304 1308 if flags & remod.IGNORECASE:
1305 1309 pat = '(?i)' + pat
1306 1310 if flags & remod.MULTILINE:
1307 1311 pat = '(?m)' + pat
1308 1312 try:
1309 1313 return re2.compile(pat)
1310 1314 except re2.error:
1311 1315 pass
1312 1316 return remod.compile(pat, flags)
1313 1317
1314 1318 @propertycache
1315 1319 def escape(self):
1316 1320 '''Return the version of escape corresponding to self.compile.
1317 1321
1318 1322 This is imperfect because whether re2 or re is used for a particular
1319 1323 function depends on the flags, etc, but it's the best we can do.
1320 1324 '''
1321 1325 global _re2
1322 1326 if _re2 is None:
1323 1327 self._checkre2()
1324 1328 if _re2:
1325 1329 return re2.escape
1326 1330 else:
1327 1331 return remod.escape
1328 1332
1329 1333 re = _re()
1330 1334
1331 1335 _fspathcache = {}
1332 1336 def fspath(name, root):
1333 1337 '''Get name in the case stored in the filesystem
1334 1338
1335 1339 The name should be relative to root, and be normcase-ed for efficiency.
1336 1340
1337 1341 Note that this function is unnecessary, and should not be
1338 1342 called, for case-sensitive filesystems (simply because it's expensive).
1339 1343
1340 1344 The root should be normcase-ed, too.
1341 1345 '''
1342 1346 def _makefspathcacheentry(dir):
1343 1347 return dict((normcase(n), n) for n in os.listdir(dir))
1344 1348
1345 1349 seps = pycompat.ossep
1346 1350 if pycompat.osaltsep:
1347 1351 seps = seps + pycompat.osaltsep
1348 1352 # Protect backslashes. This gets silly very quickly.
1349 1353 seps.replace('\\','\\\\')
1350 1354 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1351 1355 dir = os.path.normpath(root)
1352 1356 result = []
1353 1357 for part, sep in pattern.findall(name):
1354 1358 if sep:
1355 1359 result.append(sep)
1356 1360 continue
1357 1361
1358 1362 if dir not in _fspathcache:
1359 1363 _fspathcache[dir] = _makefspathcacheentry(dir)
1360 1364 contents = _fspathcache[dir]
1361 1365
1362 1366 found = contents.get(part)
1363 1367 if not found:
1364 1368 # retry "once per directory" per "dirstate.walk" which
1365 1369 # may take place for each patches of "hg qpush", for example
1366 1370 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1367 1371 found = contents.get(part)
1368 1372
1369 1373 result.append(found or part)
1370 1374 dir = os.path.join(dir, part)
1371 1375
1372 1376 return ''.join(result)
1373 1377
1374 1378 def getfstype(dirpath):
1375 1379 '''Get the filesystem type name from a directory (best-effort)
1376 1380
1377 1381 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1378 1382 '''
1379 1383 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1380 1384
1381 1385 def checknlink(testfile):
1382 1386 '''check whether hardlink count reporting works properly'''
1383 1387
1384 1388 # testfile may be open, so we need a separate file for checking to
1385 1389 # work around issue2543 (or testfile may get lost on Samba shares)
1386 1390 f1 = testfile + ".hgtmp1"
1387 1391 if os.path.lexists(f1):
1388 1392 return False
1389 1393 try:
1390 1394 posixfile(f1, 'w').close()
1391 1395 except IOError:
1392 1396 try:
1393 1397 os.unlink(f1)
1394 1398 except OSError:
1395 1399 pass
1396 1400 return False
1397 1401
1398 1402 f2 = testfile + ".hgtmp2"
1399 1403 fd = None
1400 1404 try:
1401 1405 oslink(f1, f2)
1402 1406 # nlinks() may behave differently for files on Windows shares if
1403 1407 # the file is open.
1404 1408 fd = posixfile(f2)
1405 1409 return nlinks(f2) > 1
1406 1410 except OSError:
1407 1411 return False
1408 1412 finally:
1409 1413 if fd is not None:
1410 1414 fd.close()
1411 1415 for f in (f1, f2):
1412 1416 try:
1413 1417 os.unlink(f)
1414 1418 except OSError:
1415 1419 pass
1416 1420
1417 1421 def endswithsep(path):
1418 1422 '''Check path ends with os.sep or os.altsep.'''
1419 1423 return (path.endswith(pycompat.ossep)
1420 1424 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1421 1425
1422 1426 def splitpath(path):
1423 1427 '''Split path by os.sep.
1424 1428 Note that this function does not use os.altsep because this is
1425 1429 an alternative of simple "xxx.split(os.sep)".
1426 1430 It is recommended to use os.path.normpath() before using this
1427 1431 function if need.'''
1428 1432 return path.split(pycompat.ossep)
1429 1433
1430 1434 def gui():
1431 1435 '''Are we running in a GUI?'''
1432 1436 if pycompat.sysplatform == 'darwin':
1433 1437 if 'SSH_CONNECTION' in encoding.environ:
1434 1438 # handle SSH access to a box where the user is logged in
1435 1439 return False
1436 1440 elif getattr(osutil, 'isgui', None):
1437 1441 # check if a CoreGraphics session is available
1438 1442 return osutil.isgui()
1439 1443 else:
1440 1444 # pure build; use a safe default
1441 1445 return True
1442 1446 else:
1443 1447 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1444 1448
1445 1449 def mktempcopy(name, emptyok=False, createmode=None):
1446 1450 """Create a temporary file with the same contents from name
1447 1451
1448 1452 The permission bits are copied from the original file.
1449 1453
1450 1454 If the temporary file is going to be truncated immediately, you
1451 1455 can use emptyok=True as an optimization.
1452 1456
1453 1457 Returns the name of the temporary file.
1454 1458 """
1455 1459 d, fn = os.path.split(name)
1456 1460 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1457 1461 os.close(fd)
1458 1462 # Temporary files are created with mode 0600, which is usually not
1459 1463 # what we want. If the original file already exists, just copy
1460 1464 # its mode. Otherwise, manually obey umask.
1461 1465 copymode(name, temp, createmode)
1462 1466 if emptyok:
1463 1467 return temp
1464 1468 try:
1465 1469 try:
1466 1470 ifp = posixfile(name, "rb")
1467 1471 except IOError as inst:
1468 1472 if inst.errno == errno.ENOENT:
1469 1473 return temp
1470 1474 if not getattr(inst, 'filename', None):
1471 1475 inst.filename = name
1472 1476 raise
1473 1477 ofp = posixfile(temp, "wb")
1474 1478 for chunk in filechunkiter(ifp):
1475 1479 ofp.write(chunk)
1476 1480 ifp.close()
1477 1481 ofp.close()
1478 1482 except: # re-raises
1479 1483 try: os.unlink(temp)
1480 1484 except OSError: pass
1481 1485 raise
1482 1486 return temp
1483 1487
1484 1488 class filestat(object):
1485 1489 """help to exactly detect change of a file
1486 1490
1487 1491 'stat' attribute is result of 'os.stat()' if specified 'path'
1488 1492 exists. Otherwise, it is None. This can avoid preparative
1489 1493 'exists()' examination on client side of this class.
1490 1494 """
1491 1495 def __init__(self, path):
1492 1496 try:
1493 1497 self.stat = os.stat(path)
1494 1498 except OSError as err:
1495 1499 if err.errno != errno.ENOENT:
1496 1500 raise
1497 1501 self.stat = None
1498 1502
1499 1503 __hash__ = object.__hash__
1500 1504
1501 1505 def __eq__(self, old):
1502 1506 try:
1503 1507 # if ambiguity between stat of new and old file is
1504 1508 # avoided, comparison of size, ctime and mtime is enough
1505 1509 # to exactly detect change of a file regardless of platform
1506 1510 return (self.stat.st_size == old.stat.st_size and
1507 1511 self.stat.st_ctime == old.stat.st_ctime and
1508 1512 self.stat.st_mtime == old.stat.st_mtime)
1509 1513 except AttributeError:
1510 1514 return False
1511 1515
1512 1516 def isambig(self, old):
1513 1517 """Examine whether new (= self) stat is ambiguous against old one
1514 1518
1515 1519 "S[N]" below means stat of a file at N-th change:
1516 1520
1517 1521 - S[n-1].ctime < S[n].ctime: can detect change of a file
1518 1522 - S[n-1].ctime == S[n].ctime
1519 1523 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1520 1524 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1521 1525 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1522 1526 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1523 1527
1524 1528 Case (*2) above means that a file was changed twice or more at
1525 1529 same time in sec (= S[n-1].ctime), and comparison of timestamp
1526 1530 is ambiguous.
1527 1531
1528 1532 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1529 1533 timestamp is ambiguous".
1530 1534
1531 1535 But advancing mtime only in case (*2) doesn't work as
1532 1536 expected, because naturally advanced S[n].mtime in case (*1)
1533 1537 might be equal to manually advanced S[n-1 or earlier].mtime.
1534 1538
1535 1539 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1536 1540 treated as ambiguous regardless of mtime, to avoid overlooking
1537 1541 by confliction between such mtime.
1538 1542
1539 1543 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1540 1544 S[n].mtime", even if size of a file isn't changed.
1541 1545 """
1542 1546 try:
1543 1547 return (self.stat.st_ctime == old.stat.st_ctime)
1544 1548 except AttributeError:
1545 1549 return False
1546 1550
1547 1551 def avoidambig(self, path, old):
1548 1552 """Change file stat of specified path to avoid ambiguity
1549 1553
1550 1554 'old' should be previous filestat of 'path'.
1551 1555
1552 1556 This skips avoiding ambiguity, if a process doesn't have
1553 1557 appropriate privileges for 'path'.
1554 1558 """
1555 1559 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1556 1560 try:
1557 1561 os.utime(path, (advanced, advanced))
1558 1562 except OSError as inst:
1559 1563 if inst.errno == errno.EPERM:
1560 1564 # utime() on the file created by another user causes EPERM,
1561 1565 # if a process doesn't have appropriate privileges
1562 1566 return
1563 1567 raise
1564 1568
1565 1569 def __ne__(self, other):
1566 1570 return not self == other
1567 1571
1568 1572 class atomictempfile(object):
1569 1573 '''writable file object that atomically updates a file
1570 1574
1571 1575 All writes will go to a temporary copy of the original file. Call
1572 1576 close() when you are done writing, and atomictempfile will rename
1573 1577 the temporary copy to the original name, making the changes
1574 1578 visible. If the object is destroyed without being closed, all your
1575 1579 writes are discarded.
1576 1580
1577 1581 checkambig argument of constructor is used with filestat, and is
1578 1582 useful only if target file is guarded by any lock (e.g. repo.lock
1579 1583 or repo.wlock).
1580 1584 '''
1581 1585 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1582 1586 self.__name = name # permanent name
1583 1587 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1584 1588 createmode=createmode)
1585 1589 self._fp = posixfile(self._tempname, mode)
1586 1590 self._checkambig = checkambig
1587 1591
1588 1592 # delegated methods
1589 1593 self.read = self._fp.read
1590 1594 self.write = self._fp.write
1591 1595 self.seek = self._fp.seek
1592 1596 self.tell = self._fp.tell
1593 1597 self.fileno = self._fp.fileno
1594 1598
1595 1599 def close(self):
1596 1600 if not self._fp.closed:
1597 1601 self._fp.close()
1598 1602 filename = localpath(self.__name)
1599 1603 oldstat = self._checkambig and filestat(filename)
1600 1604 if oldstat and oldstat.stat:
1601 1605 rename(self._tempname, filename)
1602 1606 newstat = filestat(filename)
1603 1607 if newstat.isambig(oldstat):
1604 1608 # stat of changed file is ambiguous to original one
1605 1609 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1606 1610 os.utime(filename, (advanced, advanced))
1607 1611 else:
1608 1612 rename(self._tempname, filename)
1609 1613
1610 1614 def discard(self):
1611 1615 if not self._fp.closed:
1612 1616 try:
1613 1617 os.unlink(self._tempname)
1614 1618 except OSError:
1615 1619 pass
1616 1620 self._fp.close()
1617 1621
1618 1622 def __del__(self):
1619 1623 if safehasattr(self, '_fp'): # constructor actually did something
1620 1624 self.discard()
1621 1625
1622 1626 def __enter__(self):
1623 1627 return self
1624 1628
1625 1629 def __exit__(self, exctype, excvalue, traceback):
1626 1630 if exctype is not None:
1627 1631 self.discard()
1628 1632 else:
1629 1633 self.close()
1630 1634
1631 1635 def unlinkpath(f, ignoremissing=False):
1632 1636 """unlink and remove the directory if it is empty"""
1633 1637 if ignoremissing:
1634 1638 tryunlink(f)
1635 1639 else:
1636 1640 unlink(f)
1637 1641 # try removing directories that might now be empty
1638 1642 try:
1639 1643 removedirs(os.path.dirname(f))
1640 1644 except OSError:
1641 1645 pass
1642 1646
1643 1647 def tryunlink(f):
1644 1648 """Attempt to remove a file, ignoring ENOENT errors."""
1645 1649 try:
1646 1650 unlink(f)
1647 1651 except OSError as e:
1648 1652 if e.errno != errno.ENOENT:
1649 1653 raise
1650 1654
1651 1655 def makedirs(name, mode=None, notindexed=False):
1652 1656 """recursive directory creation with parent mode inheritance
1653 1657
1654 1658 Newly created directories are marked as "not to be indexed by
1655 1659 the content indexing service", if ``notindexed`` is specified
1656 1660 for "write" mode access.
1657 1661 """
1658 1662 try:
1659 1663 makedir(name, notindexed)
1660 1664 except OSError as err:
1661 1665 if err.errno == errno.EEXIST:
1662 1666 return
1663 1667 if err.errno != errno.ENOENT or not name:
1664 1668 raise
1665 1669 parent = os.path.dirname(os.path.abspath(name))
1666 1670 if parent == name:
1667 1671 raise
1668 1672 makedirs(parent, mode, notindexed)
1669 1673 try:
1670 1674 makedir(name, notindexed)
1671 1675 except OSError as err:
1672 1676 # Catch EEXIST to handle races
1673 1677 if err.errno == errno.EEXIST:
1674 1678 return
1675 1679 raise
1676 1680 if mode is not None:
1677 1681 os.chmod(name, mode)
1678 1682
1679 1683 def readfile(path):
1680 1684 with open(path, 'rb') as fp:
1681 1685 return fp.read()
1682 1686
1683 1687 def writefile(path, text):
1684 1688 with open(path, 'wb') as fp:
1685 1689 fp.write(text)
1686 1690
1687 1691 def appendfile(path, text):
1688 1692 with open(path, 'ab') as fp:
1689 1693 fp.write(text)
1690 1694
1691 1695 class chunkbuffer(object):
1692 1696 """Allow arbitrary sized chunks of data to be efficiently read from an
1693 1697 iterator over chunks of arbitrary size."""
1694 1698
1695 1699 def __init__(self, in_iter):
1696 1700 """in_iter is the iterator that's iterating over the input chunks.
1697 1701 targetsize is how big a buffer to try to maintain."""
1698 1702 def splitbig(chunks):
1699 1703 for chunk in chunks:
1700 1704 if len(chunk) > 2**20:
1701 1705 pos = 0
1702 1706 while pos < len(chunk):
1703 1707 end = pos + 2 ** 18
1704 1708 yield chunk[pos:end]
1705 1709 pos = end
1706 1710 else:
1707 1711 yield chunk
1708 1712 self.iter = splitbig(in_iter)
1709 1713 self._queue = collections.deque()
1710 1714 self._chunkoffset = 0
1711 1715
1712 1716 def read(self, l=None):
1713 1717 """Read L bytes of data from the iterator of chunks of data.
1714 1718 Returns less than L bytes if the iterator runs dry.
1715 1719
1716 1720 If size parameter is omitted, read everything"""
1717 1721 if l is None:
1718 1722 return ''.join(self.iter)
1719 1723
1720 1724 left = l
1721 1725 buf = []
1722 1726 queue = self._queue
1723 1727 while left > 0:
1724 1728 # refill the queue
1725 1729 if not queue:
1726 1730 target = 2**18
1727 1731 for chunk in self.iter:
1728 1732 queue.append(chunk)
1729 1733 target -= len(chunk)
1730 1734 if target <= 0:
1731 1735 break
1732 1736 if not queue:
1733 1737 break
1734 1738
1735 1739 # The easy way to do this would be to queue.popleft(), modify the
1736 1740 # chunk (if necessary), then queue.appendleft(). However, for cases
1737 1741 # where we read partial chunk content, this incurs 2 dequeue
1738 1742 # mutations and creates a new str for the remaining chunk in the
1739 1743 # queue. Our code below avoids this overhead.
1740 1744
1741 1745 chunk = queue[0]
1742 1746 chunkl = len(chunk)
1743 1747 offset = self._chunkoffset
1744 1748
1745 1749 # Use full chunk.
1746 1750 if offset == 0 and left >= chunkl:
1747 1751 left -= chunkl
1748 1752 queue.popleft()
1749 1753 buf.append(chunk)
1750 1754 # self._chunkoffset remains at 0.
1751 1755 continue
1752 1756
1753 1757 chunkremaining = chunkl - offset
1754 1758
1755 1759 # Use all of unconsumed part of chunk.
1756 1760 if left >= chunkremaining:
1757 1761 left -= chunkremaining
1758 1762 queue.popleft()
1759 1763 # offset == 0 is enabled by block above, so this won't merely
1760 1764 # copy via ``chunk[0:]``.
1761 1765 buf.append(chunk[offset:])
1762 1766 self._chunkoffset = 0
1763 1767
1764 1768 # Partial chunk needed.
1765 1769 else:
1766 1770 buf.append(chunk[offset:offset + left])
1767 1771 self._chunkoffset += left
1768 1772 left -= chunkremaining
1769 1773
1770 1774 return ''.join(buf)
1771 1775
1772 1776 def filechunkiter(f, size=131072, limit=None):
1773 1777 """Create a generator that produces the data in the file size
1774 1778 (default 131072) bytes at a time, up to optional limit (default is
1775 1779 to read all data). Chunks may be less than size bytes if the
1776 1780 chunk is the last chunk in the file, or the file is a socket or
1777 1781 some other type of file that sometimes reads less data than is
1778 1782 requested."""
1779 1783 assert size >= 0
1780 1784 assert limit is None or limit >= 0
1781 1785 while True:
1782 1786 if limit is None:
1783 1787 nbytes = size
1784 1788 else:
1785 1789 nbytes = min(limit, size)
1786 1790 s = nbytes and f.read(nbytes)
1787 1791 if not s:
1788 1792 break
1789 1793 if limit:
1790 1794 limit -= len(s)
1791 1795 yield s
1792 1796
1793 1797 def makedate(timestamp=None):
1794 1798 '''Return a unix timestamp (or the current time) as a (unixtime,
1795 1799 offset) tuple based off the local timezone.'''
1796 1800 if timestamp is None:
1797 1801 timestamp = time.time()
1798 1802 if timestamp < 0:
1799 1803 hint = _("check your clock")
1800 1804 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1801 1805 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1802 1806 datetime.datetime.fromtimestamp(timestamp))
1803 1807 tz = delta.days * 86400 + delta.seconds
1804 1808 return timestamp, tz
1805 1809
1806 1810 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1807 1811 """represent a (unixtime, offset) tuple as a localized time.
1808 1812 unixtime is seconds since the epoch, and offset is the time zone's
1809 1813 number of seconds away from UTC.
1810 1814
1811 1815 >>> datestr((0, 0))
1812 1816 'Thu Jan 01 00:00:00 1970 +0000'
1813 1817 >>> datestr((42, 0))
1814 1818 'Thu Jan 01 00:00:42 1970 +0000'
1815 1819 >>> datestr((-42, 0))
1816 1820 'Wed Dec 31 23:59:18 1969 +0000'
1817 1821 >>> datestr((0x7fffffff, 0))
1818 1822 'Tue Jan 19 03:14:07 2038 +0000'
1819 1823 >>> datestr((-0x80000000, 0))
1820 1824 'Fri Dec 13 20:45:52 1901 +0000'
1821 1825 """
1822 1826 t, tz = date or makedate()
1823 1827 if "%1" in format or "%2" in format or "%z" in format:
1824 1828 sign = (tz > 0) and "-" or "+"
1825 1829 minutes = abs(tz) // 60
1826 1830 q, r = divmod(minutes, 60)
1827 1831 format = format.replace("%z", "%1%2")
1828 1832 format = format.replace("%1", "%c%02d" % (sign, q))
1829 1833 format = format.replace("%2", "%02d" % r)
1830 1834 d = t - tz
1831 1835 if d > 0x7fffffff:
1832 1836 d = 0x7fffffff
1833 1837 elif d < -0x80000000:
1834 1838 d = -0x80000000
1835 1839 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1836 1840 # because they use the gmtime() system call which is buggy on Windows
1837 1841 # for negative values.
1838 1842 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1839 1843 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1840 1844 return s
1841 1845
1842 1846 def shortdate(date=None):
1843 1847 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1844 1848 return datestr(date, format='%Y-%m-%d')
1845 1849
1846 1850 def parsetimezone(s):
1847 1851 """find a trailing timezone, if any, in string, and return a
1848 1852 (offset, remainder) pair"""
1849 1853
1850 1854 if s.endswith("GMT") or s.endswith("UTC"):
1851 1855 return 0, s[:-3].rstrip()
1852 1856
1853 1857 # Unix-style timezones [+-]hhmm
1854 1858 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1855 1859 sign = (s[-5] == "+") and 1 or -1
1856 1860 hours = int(s[-4:-2])
1857 1861 minutes = int(s[-2:])
1858 1862 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1859 1863
1860 1864 # ISO8601 trailing Z
1861 1865 if s.endswith("Z") and s[-2:-1].isdigit():
1862 1866 return 0, s[:-1]
1863 1867
1864 1868 # ISO8601-style [+-]hh:mm
1865 1869 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1866 1870 s[-5:-3].isdigit() and s[-2:].isdigit()):
1867 1871 sign = (s[-6] == "+") and 1 or -1
1868 1872 hours = int(s[-5:-3])
1869 1873 minutes = int(s[-2:])
1870 1874 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1871 1875
1872 1876 return None, s
1873 1877
1874 1878 def strdate(string, format, defaults=None):
1875 1879 """parse a localized time string and return a (unixtime, offset) tuple.
1876 1880 if the string cannot be parsed, ValueError is raised."""
1877 1881 if defaults is None:
1878 1882 defaults = {}
1879 1883
1880 1884 # NOTE: unixtime = localunixtime + offset
1881 1885 offset, date = parsetimezone(string)
1882 1886
1883 1887 # add missing elements from defaults
1884 1888 usenow = False # default to using biased defaults
1885 1889 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1886 1890 found = [True for p in part if ("%"+p) in format]
1887 1891 if not found:
1888 1892 date += "@" + defaults[part][usenow]
1889 1893 format += "@%" + part[0]
1890 1894 else:
1891 1895 # We've found a specific time element, less specific time
1892 1896 # elements are relative to today
1893 1897 usenow = True
1894 1898
1895 1899 timetuple = time.strptime(date, format)
1896 1900 localunixtime = int(calendar.timegm(timetuple))
1897 1901 if offset is None:
1898 1902 # local timezone
1899 1903 unixtime = int(time.mktime(timetuple))
1900 1904 offset = unixtime - localunixtime
1901 1905 else:
1902 1906 unixtime = localunixtime + offset
1903 1907 return unixtime, offset
1904 1908
1905 1909 def parsedate(date, formats=None, bias=None):
1906 1910 """parse a localized date/time and return a (unixtime, offset) tuple.
1907 1911
1908 1912 The date may be a "unixtime offset" string or in one of the specified
1909 1913 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1910 1914
1911 1915 >>> parsedate(' today ') == parsedate(\
1912 1916 datetime.date.today().strftime('%b %d'))
1913 1917 True
1914 1918 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1915 1919 datetime.timedelta(days=1)\
1916 1920 ).strftime('%b %d'))
1917 1921 True
1918 1922 >>> now, tz = makedate()
1919 1923 >>> strnow, strtz = parsedate('now')
1920 1924 >>> (strnow - now) < 1
1921 1925 True
1922 1926 >>> tz == strtz
1923 1927 True
1924 1928 """
1925 1929 if bias is None:
1926 1930 bias = {}
1927 1931 if not date:
1928 1932 return 0, 0
1929 1933 if isinstance(date, tuple) and len(date) == 2:
1930 1934 return date
1931 1935 if not formats:
1932 1936 formats = defaultdateformats
1933 1937 date = date.strip()
1934 1938
1935 1939 if date == 'now' or date == _('now'):
1936 1940 return makedate()
1937 1941 if date == 'today' or date == _('today'):
1938 1942 date = datetime.date.today().strftime('%b %d')
1939 1943 elif date == 'yesterday' or date == _('yesterday'):
1940 1944 date = (datetime.date.today() -
1941 1945 datetime.timedelta(days=1)).strftime('%b %d')
1942 1946
1943 1947 try:
1944 1948 when, offset = map(int, date.split(' '))
1945 1949 except ValueError:
1946 1950 # fill out defaults
1947 1951 now = makedate()
1948 1952 defaults = {}
1949 1953 for part in ("d", "mb", "yY", "HI", "M", "S"):
1950 1954 # this piece is for rounding the specific end of unknowns
1951 1955 b = bias.get(part)
1952 1956 if b is None:
1953 1957 if part[0] in "HMS":
1954 1958 b = "00"
1955 1959 else:
1956 1960 b = "0"
1957 1961
1958 1962 # this piece is for matching the generic end to today's date
1959 1963 n = datestr(now, "%" + part[0])
1960 1964
1961 1965 defaults[part] = (b, n)
1962 1966
1963 1967 for format in formats:
1964 1968 try:
1965 1969 when, offset = strdate(date, format, defaults)
1966 1970 except (ValueError, OverflowError):
1967 1971 pass
1968 1972 else:
1969 1973 break
1970 1974 else:
1971 1975 raise Abort(_('invalid date: %r') % date)
1972 1976 # validate explicit (probably user-specified) date and
1973 1977 # time zone offset. values must fit in signed 32 bits for
1974 1978 # current 32-bit linux runtimes. timezones go from UTC-12
1975 1979 # to UTC+14
1976 1980 if when < -0x80000000 or when > 0x7fffffff:
1977 1981 raise Abort(_('date exceeds 32 bits: %d') % when)
1978 1982 if offset < -50400 or offset > 43200:
1979 1983 raise Abort(_('impossible time zone offset: %d') % offset)
1980 1984 return when, offset
1981 1985
1982 1986 def matchdate(date):
1983 1987 """Return a function that matches a given date match specifier
1984 1988
1985 1989 Formats include:
1986 1990
1987 1991 '{date}' match a given date to the accuracy provided
1988 1992
1989 1993 '<{date}' on or before a given date
1990 1994
1991 1995 '>{date}' on or after a given date
1992 1996
1993 1997 >>> p1 = parsedate("10:29:59")
1994 1998 >>> p2 = parsedate("10:30:00")
1995 1999 >>> p3 = parsedate("10:30:59")
1996 2000 >>> p4 = parsedate("10:31:00")
1997 2001 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1998 2002 >>> f = matchdate("10:30")
1999 2003 >>> f(p1[0])
2000 2004 False
2001 2005 >>> f(p2[0])
2002 2006 True
2003 2007 >>> f(p3[0])
2004 2008 True
2005 2009 >>> f(p4[0])
2006 2010 False
2007 2011 >>> f(p5[0])
2008 2012 False
2009 2013 """
2010 2014
2011 2015 def lower(date):
2012 2016 d = {'mb': "1", 'd': "1"}
2013 2017 return parsedate(date, extendeddateformats, d)[0]
2014 2018
2015 2019 def upper(date):
2016 2020 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2017 2021 for days in ("31", "30", "29"):
2018 2022 try:
2019 2023 d["d"] = days
2020 2024 return parsedate(date, extendeddateformats, d)[0]
2021 2025 except Abort:
2022 2026 pass
2023 2027 d["d"] = "28"
2024 2028 return parsedate(date, extendeddateformats, d)[0]
2025 2029
2026 2030 date = date.strip()
2027 2031
2028 2032 if not date:
2029 2033 raise Abort(_("dates cannot consist entirely of whitespace"))
2030 2034 elif date[0] == "<":
2031 2035 if not date[1:]:
2032 2036 raise Abort(_("invalid day spec, use '<DATE'"))
2033 2037 when = upper(date[1:])
2034 2038 return lambda x: x <= when
2035 2039 elif date[0] == ">":
2036 2040 if not date[1:]:
2037 2041 raise Abort(_("invalid day spec, use '>DATE'"))
2038 2042 when = lower(date[1:])
2039 2043 return lambda x: x >= when
2040 2044 elif date[0] == "-":
2041 2045 try:
2042 2046 days = int(date[1:])
2043 2047 except ValueError:
2044 2048 raise Abort(_("invalid day spec: %s") % date[1:])
2045 2049 if days < 0:
2046 2050 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2047 2051 % date[1:])
2048 2052 when = makedate()[0] - days * 3600 * 24
2049 2053 return lambda x: x >= when
2050 2054 elif " to " in date:
2051 2055 a, b = date.split(" to ")
2052 2056 start, stop = lower(a), upper(b)
2053 2057 return lambda x: x >= start and x <= stop
2054 2058 else:
2055 2059 start, stop = lower(date), upper(date)
2056 2060 return lambda x: x >= start and x <= stop
2057 2061
2058 2062 def stringmatcher(pattern, casesensitive=True):
2059 2063 """
2060 2064 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2061 2065 returns the matcher name, pattern, and matcher function.
2062 2066 missing or unknown prefixes are treated as literal matches.
2063 2067
2064 2068 helper for tests:
2065 2069 >>> def test(pattern, *tests):
2066 2070 ... kind, pattern, matcher = stringmatcher(pattern)
2067 2071 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2068 2072 >>> def itest(pattern, *tests):
2069 2073 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2070 2074 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2071 2075
2072 2076 exact matching (no prefix):
2073 2077 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2074 2078 ('literal', 'abcdefg', [False, False, True])
2075 2079
2076 2080 regex matching ('re:' prefix)
2077 2081 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2078 2082 ('re', 'a.+b', [False, False, True])
2079 2083
2080 2084 force exact matches ('literal:' prefix)
2081 2085 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2082 2086 ('literal', 're:foobar', [False, True])
2083 2087
2084 2088 unknown prefixes are ignored and treated as literals
2085 2089 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2086 2090 ('literal', 'foo:bar', [False, False, True])
2087 2091
2088 2092 case insensitive regex matches
2089 2093 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2090 2094 ('re', 'A.+b', [False, False, True])
2091 2095
2092 2096 case insensitive literal matches
2093 2097 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2094 2098 ('literal', 'ABCDEFG', [False, False, True])
2095 2099 """
2096 2100 if pattern.startswith('re:'):
2097 2101 pattern = pattern[3:]
2098 2102 try:
2099 2103 flags = 0
2100 2104 if not casesensitive:
2101 2105 flags = remod.I
2102 2106 regex = remod.compile(pattern, flags)
2103 2107 except remod.error as e:
2104 2108 raise error.ParseError(_('invalid regular expression: %s')
2105 2109 % e)
2106 2110 return 're', pattern, regex.search
2107 2111 elif pattern.startswith('literal:'):
2108 2112 pattern = pattern[8:]
2109 2113
2110 2114 match = pattern.__eq__
2111 2115
2112 2116 if not casesensitive:
2113 2117 ipat = encoding.lower(pattern)
2114 2118 match = lambda s: ipat == encoding.lower(s)
2115 2119 return 'literal', pattern, match
2116 2120
2117 2121 def shortuser(user):
2118 2122 """Return a short representation of a user name or email address."""
2119 2123 f = user.find('@')
2120 2124 if f >= 0:
2121 2125 user = user[:f]
2122 2126 f = user.find('<')
2123 2127 if f >= 0:
2124 2128 user = user[f + 1:]
2125 2129 f = user.find(' ')
2126 2130 if f >= 0:
2127 2131 user = user[:f]
2128 2132 f = user.find('.')
2129 2133 if f >= 0:
2130 2134 user = user[:f]
2131 2135 return user
2132 2136
2133 2137 def emailuser(user):
2134 2138 """Return the user portion of an email address."""
2135 2139 f = user.find('@')
2136 2140 if f >= 0:
2137 2141 user = user[:f]
2138 2142 f = user.find('<')
2139 2143 if f >= 0:
2140 2144 user = user[f + 1:]
2141 2145 return user
2142 2146
2143 2147 def email(author):
2144 2148 '''get email of author.'''
2145 2149 r = author.find('>')
2146 2150 if r == -1:
2147 2151 r = None
2148 2152 return author[author.find('<') + 1:r]
2149 2153
2150 2154 def ellipsis(text, maxlength=400):
2151 2155 """Trim string to at most maxlength (default: 400) columns in display."""
2152 2156 return encoding.trim(text, maxlength, ellipsis='...')
2153 2157
2154 2158 def unitcountfn(*unittable):
2155 2159 '''return a function that renders a readable count of some quantity'''
2156 2160
2157 2161 def go(count):
2158 2162 for multiplier, divisor, format in unittable:
2159 2163 if count >= divisor * multiplier:
2160 2164 return format % (count / float(divisor))
2161 2165 return unittable[-1][2] % count
2162 2166
2163 2167 return go
2164 2168
2165 2169 def processlinerange(fromline, toline):
2166 2170 """Check that linerange <fromline>:<toline> makes sense and return a
2167 2171 0-based range.
2168 2172
2169 2173 >>> processlinerange(10, 20)
2170 2174 (9, 20)
2171 2175 >>> processlinerange(2, 1)
2172 2176 Traceback (most recent call last):
2173 2177 ...
2174 2178 ParseError: line range must be positive
2175 2179 >>> processlinerange(0, 5)
2176 2180 Traceback (most recent call last):
2177 2181 ...
2178 2182 ParseError: fromline must be strictly positive
2179 2183 """
2180 2184 if toline - fromline < 0:
2181 2185 raise error.ParseError(_("line range must be positive"))
2182 2186 if fromline < 1:
2183 2187 raise error.ParseError(_("fromline must be strictly positive"))
2184 2188 return fromline - 1, toline
2185 2189
2186 2190 bytecount = unitcountfn(
2187 2191 (100, 1 << 30, _('%.0f GB')),
2188 2192 (10, 1 << 30, _('%.1f GB')),
2189 2193 (1, 1 << 30, _('%.2f GB')),
2190 2194 (100, 1 << 20, _('%.0f MB')),
2191 2195 (10, 1 << 20, _('%.1f MB')),
2192 2196 (1, 1 << 20, _('%.2f MB')),
2193 2197 (100, 1 << 10, _('%.0f KB')),
2194 2198 (10, 1 << 10, _('%.1f KB')),
2195 2199 (1, 1 << 10, _('%.2f KB')),
2196 2200 (1, 1, _('%.0f bytes')),
2197 2201 )
2198 2202
2199 2203 def escapestr(s):
2200 2204 # call underlying function of s.encode('string_escape') directly for
2201 2205 # Python 3 compatibility
2202 2206 return codecs.escape_encode(s)[0]
2203 2207
2204 2208 def unescapestr(s):
2205 2209 return codecs.escape_decode(s)[0]
2206 2210
2207 2211 def uirepr(s):
2208 2212 # Avoid double backslash in Windows path repr()
2209 2213 return repr(s).replace('\\\\', '\\')
2210 2214
2211 2215 # delay import of textwrap
2212 2216 def MBTextWrapper(**kwargs):
2213 2217 class tw(textwrap.TextWrapper):
2214 2218 """
2215 2219 Extend TextWrapper for width-awareness.
2216 2220
2217 2221 Neither number of 'bytes' in any encoding nor 'characters' is
2218 2222 appropriate to calculate terminal columns for specified string.
2219 2223
2220 2224 Original TextWrapper implementation uses built-in 'len()' directly,
2221 2225 so overriding is needed to use width information of each characters.
2222 2226
2223 2227 In addition, characters classified into 'ambiguous' width are
2224 2228 treated as wide in East Asian area, but as narrow in other.
2225 2229
2226 2230 This requires use decision to determine width of such characters.
2227 2231 """
2228 2232 def _cutdown(self, ucstr, space_left):
2229 2233 l = 0
2230 2234 colwidth = encoding.ucolwidth
2231 2235 for i in xrange(len(ucstr)):
2232 2236 l += colwidth(ucstr[i])
2233 2237 if space_left < l:
2234 2238 return (ucstr[:i], ucstr[i:])
2235 2239 return ucstr, ''
2236 2240
2237 2241 # overriding of base class
2238 2242 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2239 2243 space_left = max(width - cur_len, 1)
2240 2244
2241 2245 if self.break_long_words:
2242 2246 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2243 2247 cur_line.append(cut)
2244 2248 reversed_chunks[-1] = res
2245 2249 elif not cur_line:
2246 2250 cur_line.append(reversed_chunks.pop())
2247 2251
2248 2252 # this overriding code is imported from TextWrapper of Python 2.6
2249 2253 # to calculate columns of string by 'encoding.ucolwidth()'
2250 2254 def _wrap_chunks(self, chunks):
2251 2255 colwidth = encoding.ucolwidth
2252 2256
2253 2257 lines = []
2254 2258 if self.width <= 0:
2255 2259 raise ValueError("invalid width %r (must be > 0)" % self.width)
2256 2260
2257 2261 # Arrange in reverse order so items can be efficiently popped
2258 2262 # from a stack of chucks.
2259 2263 chunks.reverse()
2260 2264
2261 2265 while chunks:
2262 2266
2263 2267 # Start the list of chunks that will make up the current line.
2264 2268 # cur_len is just the length of all the chunks in cur_line.
2265 2269 cur_line = []
2266 2270 cur_len = 0
2267 2271
2268 2272 # Figure out which static string will prefix this line.
2269 2273 if lines:
2270 2274 indent = self.subsequent_indent
2271 2275 else:
2272 2276 indent = self.initial_indent
2273 2277
2274 2278 # Maximum width for this line.
2275 2279 width = self.width - len(indent)
2276 2280
2277 2281 # First chunk on line is whitespace -- drop it, unless this
2278 2282 # is the very beginning of the text (i.e. no lines started yet).
2279 2283 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2280 2284 del chunks[-1]
2281 2285
2282 2286 while chunks:
2283 2287 l = colwidth(chunks[-1])
2284 2288
2285 2289 # Can at least squeeze this chunk onto the current line.
2286 2290 if cur_len + l <= width:
2287 2291 cur_line.append(chunks.pop())
2288 2292 cur_len += l
2289 2293
2290 2294 # Nope, this line is full.
2291 2295 else:
2292 2296 break
2293 2297
2294 2298 # The current line is full, and the next chunk is too big to
2295 2299 # fit on *any* line (not just this one).
2296 2300 if chunks and colwidth(chunks[-1]) > width:
2297 2301 self._handle_long_word(chunks, cur_line, cur_len, width)
2298 2302
2299 2303 # If the last chunk on this line is all whitespace, drop it.
2300 2304 if (self.drop_whitespace and
2301 2305 cur_line and cur_line[-1].strip() == ''):
2302 2306 del cur_line[-1]
2303 2307
2304 2308 # Convert current line back to a string and store it in list
2305 2309 # of all lines (return value).
2306 2310 if cur_line:
2307 2311 lines.append(indent + ''.join(cur_line))
2308 2312
2309 2313 return lines
2310 2314
2311 2315 global MBTextWrapper
2312 2316 MBTextWrapper = tw
2313 2317 return tw(**kwargs)
2314 2318
2315 2319 def wrap(line, width, initindent='', hangindent=''):
2316 2320 maxindent = max(len(hangindent), len(initindent))
2317 2321 if width <= maxindent:
2318 2322 # adjust for weird terminal size
2319 2323 width = max(78, maxindent + 1)
2320 2324 line = line.decode(pycompat.sysstr(encoding.encoding),
2321 2325 pycompat.sysstr(encoding.encodingmode))
2322 2326 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2323 2327 pycompat.sysstr(encoding.encodingmode))
2324 2328 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2325 2329 pycompat.sysstr(encoding.encodingmode))
2326 2330 wrapper = MBTextWrapper(width=width,
2327 2331 initial_indent=initindent,
2328 2332 subsequent_indent=hangindent)
2329 2333 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2330 2334
2331 2335 if (pyplatform.python_implementation() == 'CPython' and
2332 2336 sys.version_info < (3, 0)):
2333 2337 # There is an issue in CPython that some IO methods do not handle EINTR
2334 2338 # correctly. The following table shows what CPython version (and functions)
2335 2339 # are affected (buggy: has the EINTR bug, okay: otherwise):
2336 2340 #
2337 2341 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2338 2342 # --------------------------------------------------
2339 2343 # fp.__iter__ | buggy | buggy | okay
2340 2344 # fp.read* | buggy | okay [1] | okay
2341 2345 #
2342 2346 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2343 2347 #
2344 2348 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2345 2349 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2346 2350 #
2347 2351 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2348 2352 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2349 2353 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2350 2354 # fp.__iter__ but not other fp.read* methods.
2351 2355 #
2352 2356 # On modern systems like Linux, the "read" syscall cannot be interrupted
2353 2357 # when reading "fast" files like on-disk files. So the EINTR issue only
2354 2358 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2355 2359 # files approximately as "fast" files and use the fast (unsafe) code path,
2356 2360 # to minimize the performance impact.
2357 2361 if sys.version_info >= (2, 7, 4):
2358 2362 # fp.readline deals with EINTR correctly, use it as a workaround.
2359 2363 def _safeiterfile(fp):
2360 2364 return iter(fp.readline, '')
2361 2365 else:
2362 2366 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2363 2367 # note: this may block longer than necessary because of bufsize.
2364 2368 def _safeiterfile(fp, bufsize=4096):
2365 2369 fd = fp.fileno()
2366 2370 line = ''
2367 2371 while True:
2368 2372 try:
2369 2373 buf = os.read(fd, bufsize)
2370 2374 except OSError as ex:
2371 2375 # os.read only raises EINTR before any data is read
2372 2376 if ex.errno == errno.EINTR:
2373 2377 continue
2374 2378 else:
2375 2379 raise
2376 2380 line += buf
2377 2381 if '\n' in buf:
2378 2382 splitted = line.splitlines(True)
2379 2383 line = ''
2380 2384 for l in splitted:
2381 2385 if l[-1] == '\n':
2382 2386 yield l
2383 2387 else:
2384 2388 line = l
2385 2389 if not buf:
2386 2390 break
2387 2391 if line:
2388 2392 yield line
2389 2393
2390 2394 def iterfile(fp):
2391 2395 fastpath = True
2392 2396 if type(fp) is file:
2393 2397 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2394 2398 if fastpath:
2395 2399 return fp
2396 2400 else:
2397 2401 return _safeiterfile(fp)
2398 2402 else:
2399 2403 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2400 2404 def iterfile(fp):
2401 2405 return fp
2402 2406
2403 2407 def iterlines(iterator):
2404 2408 for chunk in iterator:
2405 2409 for line in chunk.splitlines():
2406 2410 yield line
2407 2411
2408 2412 def expandpath(path):
2409 2413 return os.path.expanduser(os.path.expandvars(path))
2410 2414
2411 2415 def hgcmd():
2412 2416 """Return the command used to execute current hg
2413 2417
2414 2418 This is different from hgexecutable() because on Windows we want
2415 2419 to avoid things opening new shell windows like batch files, so we
2416 2420 get either the python call or current executable.
2417 2421 """
2418 2422 if mainfrozen():
2419 2423 if getattr(sys, 'frozen', None) == 'macosx_app':
2420 2424 # Env variable set by py2app
2421 2425 return [encoding.environ['EXECUTABLEPATH']]
2422 2426 else:
2423 2427 return [pycompat.sysexecutable]
2424 2428 return gethgcmd()
2425 2429
2426 2430 def rundetached(args, condfn):
2427 2431 """Execute the argument list in a detached process.
2428 2432
2429 2433 condfn is a callable which is called repeatedly and should return
2430 2434 True once the child process is known to have started successfully.
2431 2435 At this point, the child process PID is returned. If the child
2432 2436 process fails to start or finishes before condfn() evaluates to
2433 2437 True, return -1.
2434 2438 """
2435 2439 # Windows case is easier because the child process is either
2436 2440 # successfully starting and validating the condition or exiting
2437 2441 # on failure. We just poll on its PID. On Unix, if the child
2438 2442 # process fails to start, it will be left in a zombie state until
2439 2443 # the parent wait on it, which we cannot do since we expect a long
2440 2444 # running process on success. Instead we listen for SIGCHLD telling
2441 2445 # us our child process terminated.
2442 2446 terminated = set()
2443 2447 def handler(signum, frame):
2444 2448 terminated.add(os.wait())
2445 2449 prevhandler = None
2446 2450 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2447 2451 if SIGCHLD is not None:
2448 2452 prevhandler = signal.signal(SIGCHLD, handler)
2449 2453 try:
2450 2454 pid = spawndetached(args)
2451 2455 while not condfn():
2452 2456 if ((pid in terminated or not testpid(pid))
2453 2457 and not condfn()):
2454 2458 return -1
2455 2459 time.sleep(0.1)
2456 2460 return pid
2457 2461 finally:
2458 2462 if prevhandler is not None:
2459 2463 signal.signal(signal.SIGCHLD, prevhandler)
2460 2464
2461 2465 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2462 2466 """Return the result of interpolating items in the mapping into string s.
2463 2467
2464 2468 prefix is a single character string, or a two character string with
2465 2469 a backslash as the first character if the prefix needs to be escaped in
2466 2470 a regular expression.
2467 2471
2468 2472 fn is an optional function that will be applied to the replacement text
2469 2473 just before replacement.
2470 2474
2471 2475 escape_prefix is an optional flag that allows using doubled prefix for
2472 2476 its escaping.
2473 2477 """
2474 2478 fn = fn or (lambda s: s)
2475 2479 patterns = '|'.join(mapping.keys())
2476 2480 if escape_prefix:
2477 2481 patterns += '|' + prefix
2478 2482 if len(prefix) > 1:
2479 2483 prefix_char = prefix[1:]
2480 2484 else:
2481 2485 prefix_char = prefix
2482 2486 mapping[prefix_char] = prefix_char
2483 2487 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2484 2488 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2485 2489
2486 2490 def getport(port):
2487 2491 """Return the port for a given network service.
2488 2492
2489 2493 If port is an integer, it's returned as is. If it's a string, it's
2490 2494 looked up using socket.getservbyname(). If there's no matching
2491 2495 service, error.Abort is raised.
2492 2496 """
2493 2497 try:
2494 2498 return int(port)
2495 2499 except ValueError:
2496 2500 pass
2497 2501
2498 2502 try:
2499 2503 return socket.getservbyname(port)
2500 2504 except socket.error:
2501 2505 raise Abort(_("no port number associated with service '%s'") % port)
2502 2506
2503 2507 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2504 2508 '0': False, 'no': False, 'false': False, 'off': False,
2505 2509 'never': False}
2506 2510
2507 2511 def parsebool(s):
2508 2512 """Parse s into a boolean.
2509 2513
2510 2514 If s is not a valid boolean, returns None.
2511 2515 """
2512 2516 return _booleans.get(s.lower(), None)
2513 2517
2514 2518 _hextochr = dict((a + b, chr(int(a + b, 16)))
2515 2519 for a in string.hexdigits for b in string.hexdigits)
2516 2520
2517 2521 class url(object):
2518 2522 r"""Reliable URL parser.
2519 2523
2520 2524 This parses URLs and provides attributes for the following
2521 2525 components:
2522 2526
2523 2527 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2524 2528
2525 2529 Missing components are set to None. The only exception is
2526 2530 fragment, which is set to '' if present but empty.
2527 2531
2528 2532 If parsefragment is False, fragment is included in query. If
2529 2533 parsequery is False, query is included in path. If both are
2530 2534 False, both fragment and query are included in path.
2531 2535
2532 2536 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2533 2537
2534 2538 Note that for backward compatibility reasons, bundle URLs do not
2535 2539 take host names. That means 'bundle://../' has a path of '../'.
2536 2540
2537 2541 Examples:
2538 2542
2539 2543 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2540 2544 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2541 2545 >>> url('ssh://[::1]:2200//home/joe/repo')
2542 2546 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2543 2547 >>> url('file:///home/joe/repo')
2544 2548 <url scheme: 'file', path: '/home/joe/repo'>
2545 2549 >>> url('file:///c:/temp/foo/')
2546 2550 <url scheme: 'file', path: 'c:/temp/foo/'>
2547 2551 >>> url('bundle:foo')
2548 2552 <url scheme: 'bundle', path: 'foo'>
2549 2553 >>> url('bundle://../foo')
2550 2554 <url scheme: 'bundle', path: '../foo'>
2551 2555 >>> url(r'c:\foo\bar')
2552 2556 <url path: 'c:\\foo\\bar'>
2553 2557 >>> url(r'\\blah\blah\blah')
2554 2558 <url path: '\\\\blah\\blah\\blah'>
2555 2559 >>> url(r'\\blah\blah\blah#baz')
2556 2560 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2557 2561 >>> url(r'file:///C:\users\me')
2558 2562 <url scheme: 'file', path: 'C:\\users\\me'>
2559 2563
2560 2564 Authentication credentials:
2561 2565
2562 2566 >>> url('ssh://joe:xyz@x/repo')
2563 2567 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2564 2568 >>> url('ssh://joe@x/repo')
2565 2569 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2566 2570
2567 2571 Query strings and fragments:
2568 2572
2569 2573 >>> url('http://host/a?b#c')
2570 2574 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2571 2575 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2572 2576 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2573 2577
2574 2578 Empty path:
2575 2579
2576 2580 >>> url('')
2577 2581 <url path: ''>
2578 2582 >>> url('#a')
2579 2583 <url path: '', fragment: 'a'>
2580 2584 >>> url('http://host/')
2581 2585 <url scheme: 'http', host: 'host', path: ''>
2582 2586 >>> url('http://host/#a')
2583 2587 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2584 2588
2585 2589 Only scheme:
2586 2590
2587 2591 >>> url('http:')
2588 2592 <url scheme: 'http'>
2589 2593 """
2590 2594
2591 2595 _safechars = "!~*'()+"
2592 2596 _safepchars = "/!~*'()+:\\"
2593 2597 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2594 2598
2595 2599 def __init__(self, path, parsequery=True, parsefragment=True):
2596 2600 # We slowly chomp away at path until we have only the path left
2597 2601 self.scheme = self.user = self.passwd = self.host = None
2598 2602 self.port = self.path = self.query = self.fragment = None
2599 2603 self._localpath = True
2600 2604 self._hostport = ''
2601 2605 self._origpath = path
2602 2606
2603 2607 if parsefragment and '#' in path:
2604 2608 path, self.fragment = path.split('#', 1)
2605 2609
2606 2610 # special case for Windows drive letters and UNC paths
2607 2611 if hasdriveletter(path) or path.startswith('\\\\'):
2608 2612 self.path = path
2609 2613 return
2610 2614
2611 2615 # For compatibility reasons, we can't handle bundle paths as
2612 2616 # normal URLS
2613 2617 if path.startswith('bundle:'):
2614 2618 self.scheme = 'bundle'
2615 2619 path = path[7:]
2616 2620 if path.startswith('//'):
2617 2621 path = path[2:]
2618 2622 self.path = path
2619 2623 return
2620 2624
2621 2625 if self._matchscheme(path):
2622 2626 parts = path.split(':', 1)
2623 2627 if parts[0]:
2624 2628 self.scheme, path = parts
2625 2629 self._localpath = False
2626 2630
2627 2631 if not path:
2628 2632 path = None
2629 2633 if self._localpath:
2630 2634 self.path = ''
2631 2635 return
2632 2636 else:
2633 2637 if self._localpath:
2634 2638 self.path = path
2635 2639 return
2636 2640
2637 2641 if parsequery and '?' in path:
2638 2642 path, self.query = path.split('?', 1)
2639 2643 if not path:
2640 2644 path = None
2641 2645 if not self.query:
2642 2646 self.query = None
2643 2647
2644 2648 # // is required to specify a host/authority
2645 2649 if path and path.startswith('//'):
2646 2650 parts = path[2:].split('/', 1)
2647 2651 if len(parts) > 1:
2648 2652 self.host, path = parts
2649 2653 else:
2650 2654 self.host = parts[0]
2651 2655 path = None
2652 2656 if not self.host:
2653 2657 self.host = None
2654 2658 # path of file:///d is /d
2655 2659 # path of file:///d:/ is d:/, not /d:/
2656 2660 if path and not hasdriveletter(path):
2657 2661 path = '/' + path
2658 2662
2659 2663 if self.host and '@' in self.host:
2660 2664 self.user, self.host = self.host.rsplit('@', 1)
2661 2665 if ':' in self.user:
2662 2666 self.user, self.passwd = self.user.split(':', 1)
2663 2667 if not self.host:
2664 2668 self.host = None
2665 2669
2666 2670 # Don't split on colons in IPv6 addresses without ports
2667 2671 if (self.host and ':' in self.host and
2668 2672 not (self.host.startswith('[') and self.host.endswith(']'))):
2669 2673 self._hostport = self.host
2670 2674 self.host, self.port = self.host.rsplit(':', 1)
2671 2675 if not self.host:
2672 2676 self.host = None
2673 2677
2674 2678 if (self.host and self.scheme == 'file' and
2675 2679 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2676 2680 raise Abort(_('file:// URLs can only refer to localhost'))
2677 2681
2678 2682 self.path = path
2679 2683
2680 2684 # leave the query string escaped
2681 2685 for a in ('user', 'passwd', 'host', 'port',
2682 2686 'path', 'fragment'):
2683 2687 v = getattr(self, a)
2684 2688 if v is not None:
2685 2689 setattr(self, a, urlreq.unquote(v))
2686 2690
2687 2691 def __repr__(self):
2688 2692 attrs = []
2689 2693 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2690 2694 'query', 'fragment'):
2691 2695 v = getattr(self, a)
2692 2696 if v is not None:
2693 2697 attrs.append('%s: %r' % (a, v))
2694 2698 return '<url %s>' % ', '.join(attrs)
2695 2699
2696 2700 def __str__(self):
2697 2701 r"""Join the URL's components back into a URL string.
2698 2702
2699 2703 Examples:
2700 2704
2701 2705 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2702 2706 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2703 2707 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2704 2708 'http://user:pw@host:80/?foo=bar&baz=42'
2705 2709 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2706 2710 'http://user:pw@host:80/?foo=bar%3dbaz'
2707 2711 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2708 2712 'ssh://user:pw@[::1]:2200//home/joe#'
2709 2713 >>> str(url('http://localhost:80//'))
2710 2714 'http://localhost:80//'
2711 2715 >>> str(url('http://localhost:80/'))
2712 2716 'http://localhost:80/'
2713 2717 >>> str(url('http://localhost:80'))
2714 2718 'http://localhost:80/'
2715 2719 >>> str(url('bundle:foo'))
2716 2720 'bundle:foo'
2717 2721 >>> str(url('bundle://../foo'))
2718 2722 'bundle:../foo'
2719 2723 >>> str(url('path'))
2720 2724 'path'
2721 2725 >>> str(url('file:///tmp/foo/bar'))
2722 2726 'file:///tmp/foo/bar'
2723 2727 >>> str(url('file:///c:/tmp/foo/bar'))
2724 2728 'file:///c:/tmp/foo/bar'
2725 2729 >>> print url(r'bundle:foo\bar')
2726 2730 bundle:foo\bar
2727 2731 >>> print url(r'file:///D:\data\hg')
2728 2732 file:///D:\data\hg
2729 2733 """
2730 2734 return encoding.strfromlocal(self.__bytes__())
2731 2735
2732 2736 def __bytes__(self):
2733 2737 if self._localpath:
2734 2738 s = self.path
2735 2739 if self.scheme == 'bundle':
2736 2740 s = 'bundle:' + s
2737 2741 if self.fragment:
2738 2742 s += '#' + self.fragment
2739 2743 return s
2740 2744
2741 2745 s = self.scheme + ':'
2742 2746 if self.user or self.passwd or self.host:
2743 2747 s += '//'
2744 2748 elif self.scheme and (not self.path or self.path.startswith('/')
2745 2749 or hasdriveletter(self.path)):
2746 2750 s += '//'
2747 2751 if hasdriveletter(self.path):
2748 2752 s += '/'
2749 2753 if self.user:
2750 2754 s += urlreq.quote(self.user, safe=self._safechars)
2751 2755 if self.passwd:
2752 2756 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2753 2757 if self.user or self.passwd:
2754 2758 s += '@'
2755 2759 if self.host:
2756 2760 if not (self.host.startswith('[') and self.host.endswith(']')):
2757 2761 s += urlreq.quote(self.host)
2758 2762 else:
2759 2763 s += self.host
2760 2764 if self.port:
2761 2765 s += ':' + urlreq.quote(self.port)
2762 2766 if self.host:
2763 2767 s += '/'
2764 2768 if self.path:
2765 2769 # TODO: similar to the query string, we should not unescape the
2766 2770 # path when we store it, the path might contain '%2f' = '/',
2767 2771 # which we should *not* escape.
2768 2772 s += urlreq.quote(self.path, safe=self._safepchars)
2769 2773 if self.query:
2770 2774 # we store the query in escaped form.
2771 2775 s += '?' + self.query
2772 2776 if self.fragment is not None:
2773 2777 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2774 2778 return s
2775 2779
2776 2780 def authinfo(self):
2777 2781 user, passwd = self.user, self.passwd
2778 2782 try:
2779 2783 self.user, self.passwd = None, None
2780 2784 s = str(self)
2781 2785 finally:
2782 2786 self.user, self.passwd = user, passwd
2783 2787 if not self.user:
2784 2788 return (s, None)
2785 2789 # authinfo[1] is passed to urllib2 password manager, and its
2786 2790 # URIs must not contain credentials. The host is passed in the
2787 2791 # URIs list because Python < 2.4.3 uses only that to search for
2788 2792 # a password.
2789 2793 return (s, (None, (s, self.host),
2790 2794 self.user, self.passwd or ''))
2791 2795
2792 2796 def isabs(self):
2793 2797 if self.scheme and self.scheme != 'file':
2794 2798 return True # remote URL
2795 2799 if hasdriveletter(self.path):
2796 2800 return True # absolute for our purposes - can't be joined()
2797 2801 if self.path.startswith(r'\\'):
2798 2802 return True # Windows UNC path
2799 2803 if self.path.startswith('/'):
2800 2804 return True # POSIX-style
2801 2805 return False
2802 2806
2803 2807 def localpath(self):
2804 2808 if self.scheme == 'file' or self.scheme == 'bundle':
2805 2809 path = self.path or '/'
2806 2810 # For Windows, we need to promote hosts containing drive
2807 2811 # letters to paths with drive letters.
2808 2812 if hasdriveletter(self._hostport):
2809 2813 path = self._hostport + '/' + self.path
2810 2814 elif (self.host is not None and self.path
2811 2815 and not hasdriveletter(path)):
2812 2816 path = '/' + path
2813 2817 return path
2814 2818 return self._origpath
2815 2819
2816 2820 def islocal(self):
2817 2821 '''whether localpath will return something that posixfile can open'''
2818 2822 return (not self.scheme or self.scheme == 'file'
2819 2823 or self.scheme == 'bundle')
2820 2824
2821 2825 def hasscheme(path):
2822 2826 return bool(url(path).scheme)
2823 2827
2824 2828 def hasdriveletter(path):
2825 2829 return path and path[1:2] == ':' and path[0:1].isalpha()
2826 2830
2827 2831 def urllocalpath(path):
2828 2832 return url(path, parsequery=False, parsefragment=False).localpath()
2829 2833
2830 2834 def hidepassword(u):
2831 2835 '''hide user credential in a url string'''
2832 2836 u = url(u)
2833 2837 if u.passwd:
2834 2838 u.passwd = '***'
2835 2839 return str(u)
2836 2840
2837 2841 def removeauth(u):
2838 2842 '''remove all authentication information from a url string'''
2839 2843 u = url(u)
2840 2844 u.user = u.passwd = None
2841 2845 return str(u)
2842 2846
2843 2847 timecount = unitcountfn(
2844 2848 (1, 1e3, _('%.0f s')),
2845 2849 (100, 1, _('%.1f s')),
2846 2850 (10, 1, _('%.2f s')),
2847 2851 (1, 1, _('%.3f s')),
2848 2852 (100, 0.001, _('%.1f ms')),
2849 2853 (10, 0.001, _('%.2f ms')),
2850 2854 (1, 0.001, _('%.3f ms')),
2851 2855 (100, 0.000001, _('%.1f us')),
2852 2856 (10, 0.000001, _('%.2f us')),
2853 2857 (1, 0.000001, _('%.3f us')),
2854 2858 (100, 0.000000001, _('%.1f ns')),
2855 2859 (10, 0.000000001, _('%.2f ns')),
2856 2860 (1, 0.000000001, _('%.3f ns')),
2857 2861 )
2858 2862
2859 2863 _timenesting = [0]
2860 2864
2861 2865 def timed(func):
2862 2866 '''Report the execution time of a function call to stderr.
2863 2867
2864 2868 During development, use as a decorator when you need to measure
2865 2869 the cost of a function, e.g. as follows:
2866 2870
2867 2871 @util.timed
2868 2872 def foo(a, b, c):
2869 2873 pass
2870 2874 '''
2871 2875
2872 2876 def wrapper(*args, **kwargs):
2873 2877 start = timer()
2874 2878 indent = 2
2875 2879 _timenesting[0] += indent
2876 2880 try:
2877 2881 return func(*args, **kwargs)
2878 2882 finally:
2879 2883 elapsed = timer() - start
2880 2884 _timenesting[0] -= indent
2881 2885 stderr.write('%s%s: %s\n' %
2882 2886 (' ' * _timenesting[0], func.__name__,
2883 2887 timecount(elapsed)))
2884 2888 return wrapper
2885 2889
2886 2890 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2887 2891 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2888 2892
2889 2893 def sizetoint(s):
2890 2894 '''Convert a space specifier to a byte count.
2891 2895
2892 2896 >>> sizetoint('30')
2893 2897 30
2894 2898 >>> sizetoint('2.2kb')
2895 2899 2252
2896 2900 >>> sizetoint('6M')
2897 2901 6291456
2898 2902 '''
2899 2903 t = s.strip().lower()
2900 2904 try:
2901 2905 for k, u in _sizeunits:
2902 2906 if t.endswith(k):
2903 2907 return int(float(t[:-len(k)]) * u)
2904 2908 return int(t)
2905 2909 except ValueError:
2906 2910 raise error.ParseError(_("couldn't parse size: %s") % s)
2907 2911
2908 2912 class hooks(object):
2909 2913 '''A collection of hook functions that can be used to extend a
2910 2914 function's behavior. Hooks are called in lexicographic order,
2911 2915 based on the names of their sources.'''
2912 2916
2913 2917 def __init__(self):
2914 2918 self._hooks = []
2915 2919
2916 2920 def add(self, source, hook):
2917 2921 self._hooks.append((source, hook))
2918 2922
2919 2923 def __call__(self, *args):
2920 2924 self._hooks.sort(key=lambda x: x[0])
2921 2925 results = []
2922 2926 for source, hook in self._hooks:
2923 2927 results.append(hook(*args))
2924 2928 return results
2925 2929
2926 2930 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2927 2931 '''Yields lines for a nicely formatted stacktrace.
2928 2932 Skips the 'skip' last entries, then return the last 'depth' entries.
2929 2933 Each file+linenumber is formatted according to fileline.
2930 2934 Each line is formatted according to line.
2931 2935 If line is None, it yields:
2932 2936 length of longest filepath+line number,
2933 2937 filepath+linenumber,
2934 2938 function
2935 2939
2936 2940 Not be used in production code but very convenient while developing.
2937 2941 '''
2938 2942 entries = [(fileline % (fn, ln), func)
2939 2943 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2940 2944 ][-depth:]
2941 2945 if entries:
2942 2946 fnmax = max(len(entry[0]) for entry in entries)
2943 2947 for fnln, func in entries:
2944 2948 if line is None:
2945 2949 yield (fnmax, fnln, func)
2946 2950 else:
2947 2951 yield line % (fnmax, fnln, func)
2948 2952
2949 2953 def debugstacktrace(msg='stacktrace', skip=0,
2950 2954 f=stderr, otherf=stdout, depth=0):
2951 2955 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2952 2956 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2953 2957 By default it will flush stdout first.
2954 2958 It can be used everywhere and intentionally does not require an ui object.
2955 2959 Not be used in production code but very convenient while developing.
2956 2960 '''
2957 2961 if otherf:
2958 2962 otherf.flush()
2959 2963 f.write('%s at:\n' % msg.rstrip())
2960 2964 for line in getstackframes(skip + 1, depth=depth):
2961 2965 f.write(line)
2962 2966 f.flush()
2963 2967
2964 2968 class dirs(object):
2965 2969 '''a multiset of directory names from a dirstate or manifest'''
2966 2970
2967 2971 def __init__(self, map, skip=None):
2968 2972 self._dirs = {}
2969 2973 addpath = self.addpath
2970 2974 if safehasattr(map, 'iteritems') and skip is not None:
2971 2975 for f, s in map.iteritems():
2972 2976 if s[0] != skip:
2973 2977 addpath(f)
2974 2978 else:
2975 2979 for f in map:
2976 2980 addpath(f)
2977 2981
2978 2982 def addpath(self, path):
2979 2983 dirs = self._dirs
2980 2984 for base in finddirs(path):
2981 2985 if base in dirs:
2982 2986 dirs[base] += 1
2983 2987 return
2984 2988 dirs[base] = 1
2985 2989
2986 2990 def delpath(self, path):
2987 2991 dirs = self._dirs
2988 2992 for base in finddirs(path):
2989 2993 if dirs[base] > 1:
2990 2994 dirs[base] -= 1
2991 2995 return
2992 2996 del dirs[base]
2993 2997
2994 2998 def __iter__(self):
2995 2999 return iter(self._dirs)
2996 3000
2997 3001 def __contains__(self, d):
2998 3002 return d in self._dirs
2999 3003
3000 3004 if safehasattr(parsers, 'dirs'):
3001 3005 dirs = parsers.dirs
3002 3006
3003 3007 def finddirs(path):
3004 3008 pos = path.rfind('/')
3005 3009 while pos != -1:
3006 3010 yield path[:pos]
3007 3011 pos = path.rfind('/', 0, pos)
3008 3012
3009 3013 class ctxmanager(object):
3010 3014 '''A context manager for use in 'with' blocks to allow multiple
3011 3015 contexts to be entered at once. This is both safer and more
3012 3016 flexible than contextlib.nested.
3013 3017
3014 3018 Once Mercurial supports Python 2.7+, this will become mostly
3015 3019 unnecessary.
3016 3020 '''
3017 3021
3018 3022 def __init__(self, *args):
3019 3023 '''Accepts a list of no-argument functions that return context
3020 3024 managers. These will be invoked at __call__ time.'''
3021 3025 self._pending = args
3022 3026 self._atexit = []
3023 3027
3024 3028 def __enter__(self):
3025 3029 return self
3026 3030
3027 3031 def enter(self):
3028 3032 '''Create and enter context managers in the order in which they were
3029 3033 passed to the constructor.'''
3030 3034 values = []
3031 3035 for func in self._pending:
3032 3036 obj = func()
3033 3037 values.append(obj.__enter__())
3034 3038 self._atexit.append(obj.__exit__)
3035 3039 del self._pending
3036 3040 return values
3037 3041
3038 3042 def atexit(self, func, *args, **kwargs):
3039 3043 '''Add a function to call when this context manager exits. The
3040 3044 ordering of multiple atexit calls is unspecified, save that
3041 3045 they will happen before any __exit__ functions.'''
3042 3046 def wrapper(exc_type, exc_val, exc_tb):
3043 3047 func(*args, **kwargs)
3044 3048 self._atexit.append(wrapper)
3045 3049 return func
3046 3050
3047 3051 def __exit__(self, exc_type, exc_val, exc_tb):
3048 3052 '''Context managers are exited in the reverse order from which
3049 3053 they were created.'''
3050 3054 received = exc_type is not None
3051 3055 suppressed = False
3052 3056 pending = None
3053 3057 self._atexit.reverse()
3054 3058 for exitfunc in self._atexit:
3055 3059 try:
3056 3060 if exitfunc(exc_type, exc_val, exc_tb):
3057 3061 suppressed = True
3058 3062 exc_type = None
3059 3063 exc_val = None
3060 3064 exc_tb = None
3061 3065 except BaseException:
3062 3066 pending = sys.exc_info()
3063 3067 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3064 3068 del self._atexit
3065 3069 if pending:
3066 3070 raise exc_val
3067 3071 return received and suppressed
3068 3072
3069 3073 # compression code
3070 3074
3071 3075 SERVERROLE = 'server'
3072 3076 CLIENTROLE = 'client'
3073 3077
3074 3078 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3075 3079 (u'name', u'serverpriority',
3076 3080 u'clientpriority'))
3077 3081
3078 3082 class compressormanager(object):
3079 3083 """Holds registrations of various compression engines.
3080 3084
3081 3085 This class essentially abstracts the differences between compression
3082 3086 engines to allow new compression formats to be added easily, possibly from
3083 3087 extensions.
3084 3088
3085 3089 Compressors are registered against the global instance by calling its
3086 3090 ``register()`` method.
3087 3091 """
3088 3092 def __init__(self):
3089 3093 self._engines = {}
3090 3094 # Bundle spec human name to engine name.
3091 3095 self._bundlenames = {}
3092 3096 # Internal bundle identifier to engine name.
3093 3097 self._bundletypes = {}
3094 3098 # Revlog header to engine name.
3095 3099 self._revlogheaders = {}
3096 3100 # Wire proto identifier to engine name.
3097 3101 self._wiretypes = {}
3098 3102
3099 3103 def __getitem__(self, key):
3100 3104 return self._engines[key]
3101 3105
3102 3106 def __contains__(self, key):
3103 3107 return key in self._engines
3104 3108
3105 3109 def __iter__(self):
3106 3110 return iter(self._engines.keys())
3107 3111
3108 3112 def register(self, engine):
3109 3113 """Register a compression engine with the manager.
3110 3114
3111 3115 The argument must be a ``compressionengine`` instance.
3112 3116 """
3113 3117 if not isinstance(engine, compressionengine):
3114 3118 raise ValueError(_('argument must be a compressionengine'))
3115 3119
3116 3120 name = engine.name()
3117 3121
3118 3122 if name in self._engines:
3119 3123 raise error.Abort(_('compression engine %s already registered') %
3120 3124 name)
3121 3125
3122 3126 bundleinfo = engine.bundletype()
3123 3127 if bundleinfo:
3124 3128 bundlename, bundletype = bundleinfo
3125 3129
3126 3130 if bundlename in self._bundlenames:
3127 3131 raise error.Abort(_('bundle name %s already registered') %
3128 3132 bundlename)
3129 3133 if bundletype in self._bundletypes:
3130 3134 raise error.Abort(_('bundle type %s already registered by %s') %
3131 3135 (bundletype, self._bundletypes[bundletype]))
3132 3136
3133 3137 # No external facing name declared.
3134 3138 if bundlename:
3135 3139 self._bundlenames[bundlename] = name
3136 3140
3137 3141 self._bundletypes[bundletype] = name
3138 3142
3139 3143 wiresupport = engine.wireprotosupport()
3140 3144 if wiresupport:
3141 3145 wiretype = wiresupport.name
3142 3146 if wiretype in self._wiretypes:
3143 3147 raise error.Abort(_('wire protocol compression %s already '
3144 3148 'registered by %s') %
3145 3149 (wiretype, self._wiretypes[wiretype]))
3146 3150
3147 3151 self._wiretypes[wiretype] = name
3148 3152
3149 3153 revlogheader = engine.revlogheader()
3150 3154 if revlogheader and revlogheader in self._revlogheaders:
3151 3155 raise error.Abort(_('revlog header %s already registered by %s') %
3152 3156 (revlogheader, self._revlogheaders[revlogheader]))
3153 3157
3154 3158 if revlogheader:
3155 3159 self._revlogheaders[revlogheader] = name
3156 3160
3157 3161 self._engines[name] = engine
3158 3162
3159 3163 @property
3160 3164 def supportedbundlenames(self):
3161 3165 return set(self._bundlenames.keys())
3162 3166
3163 3167 @property
3164 3168 def supportedbundletypes(self):
3165 3169 return set(self._bundletypes.keys())
3166 3170
3167 3171 def forbundlename(self, bundlename):
3168 3172 """Obtain a compression engine registered to a bundle name.
3169 3173
3170 3174 Will raise KeyError if the bundle type isn't registered.
3171 3175
3172 3176 Will abort if the engine is known but not available.
3173 3177 """
3174 3178 engine = self._engines[self._bundlenames[bundlename]]
3175 3179 if not engine.available():
3176 3180 raise error.Abort(_('compression engine %s could not be loaded') %
3177 3181 engine.name())
3178 3182 return engine
3179 3183
3180 3184 def forbundletype(self, bundletype):
3181 3185 """Obtain a compression engine registered to a bundle type.
3182 3186
3183 3187 Will raise KeyError if the bundle type isn't registered.
3184 3188
3185 3189 Will abort if the engine is known but not available.
3186 3190 """
3187 3191 engine = self._engines[self._bundletypes[bundletype]]
3188 3192 if not engine.available():
3189 3193 raise error.Abort(_('compression engine %s could not be loaded') %
3190 3194 engine.name())
3191 3195 return engine
3192 3196
3193 3197 def supportedwireengines(self, role, onlyavailable=True):
3194 3198 """Obtain compression engines that support the wire protocol.
3195 3199
3196 3200 Returns a list of engines in prioritized order, most desired first.
3197 3201
3198 3202 If ``onlyavailable`` is set, filter out engines that can't be
3199 3203 loaded.
3200 3204 """
3201 3205 assert role in (SERVERROLE, CLIENTROLE)
3202 3206
3203 3207 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3204 3208
3205 3209 engines = [self._engines[e] for e in self._wiretypes.values()]
3206 3210 if onlyavailable:
3207 3211 engines = [e for e in engines if e.available()]
3208 3212
3209 3213 def getkey(e):
3210 3214 # Sort first by priority, highest first. In case of tie, sort
3211 3215 # alphabetically. This is arbitrary, but ensures output is
3212 3216 # stable.
3213 3217 w = e.wireprotosupport()
3214 3218 return -1 * getattr(w, attr), w.name
3215 3219
3216 3220 return list(sorted(engines, key=getkey))
3217 3221
3218 3222 def forwiretype(self, wiretype):
3219 3223 engine = self._engines[self._wiretypes[wiretype]]
3220 3224 if not engine.available():
3221 3225 raise error.Abort(_('compression engine %s could not be loaded') %
3222 3226 engine.name())
3223 3227 return engine
3224 3228
3225 3229 def forrevlogheader(self, header):
3226 3230 """Obtain a compression engine registered to a revlog header.
3227 3231
3228 3232 Will raise KeyError if the revlog header value isn't registered.
3229 3233 """
3230 3234 return self._engines[self._revlogheaders[header]]
3231 3235
3232 3236 compengines = compressormanager()
3233 3237
3234 3238 class compressionengine(object):
3235 3239 """Base class for compression engines.
3236 3240
3237 3241 Compression engines must implement the interface defined by this class.
3238 3242 """
3239 3243 def name(self):
3240 3244 """Returns the name of the compression engine.
3241 3245
3242 3246 This is the key the engine is registered under.
3243 3247
3244 3248 This method must be implemented.
3245 3249 """
3246 3250 raise NotImplementedError()
3247 3251
3248 3252 def available(self):
3249 3253 """Whether the compression engine is available.
3250 3254
3251 3255 The intent of this method is to allow optional compression engines
3252 3256 that may not be available in all installations (such as engines relying
3253 3257 on C extensions that may not be present).
3254 3258 """
3255 3259 return True
3256 3260
3257 3261 def bundletype(self):
3258 3262 """Describes bundle identifiers for this engine.
3259 3263
3260 3264 If this compression engine isn't supported for bundles, returns None.
3261 3265
3262 3266 If this engine can be used for bundles, returns a 2-tuple of strings of
3263 3267 the user-facing "bundle spec" compression name and an internal
3264 3268 identifier used to denote the compression format within bundles. To
3265 3269 exclude the name from external usage, set the first element to ``None``.
3266 3270
3267 3271 If bundle compression is supported, the class must also implement
3268 3272 ``compressstream`` and `decompressorreader``.
3269 3273 """
3270 3274 return None
3271 3275
3272 3276 def wireprotosupport(self):
3273 3277 """Declare support for this compression format on the wire protocol.
3274 3278
3275 3279 If this compression engine isn't supported for compressing wire
3276 3280 protocol payloads, returns None.
3277 3281
3278 3282 Otherwise, returns ``compenginewireprotosupport`` with the following
3279 3283 fields:
3280 3284
3281 3285 * String format identifier
3282 3286 * Integer priority for the server
3283 3287 * Integer priority for the client
3284 3288
3285 3289 The integer priorities are used to order the advertisement of format
3286 3290 support by server and client. The highest integer is advertised
3287 3291 first. Integers with non-positive values aren't advertised.
3288 3292
3289 3293 The priority values are somewhat arbitrary and only used for default
3290 3294 ordering. The relative order can be changed via config options.
3291 3295
3292 3296 If wire protocol compression is supported, the class must also implement
3293 3297 ``compressstream`` and ``decompressorreader``.
3294 3298 """
3295 3299 return None
3296 3300
3297 3301 def revlogheader(self):
3298 3302 """Header added to revlog chunks that identifies this engine.
3299 3303
3300 3304 If this engine can be used to compress revlogs, this method should
3301 3305 return the bytes used to identify chunks compressed with this engine.
3302 3306 Else, the method should return ``None`` to indicate it does not
3303 3307 participate in revlog compression.
3304 3308 """
3305 3309 return None
3306 3310
3307 3311 def compressstream(self, it, opts=None):
3308 3312 """Compress an iterator of chunks.
3309 3313
3310 3314 The method receives an iterator (ideally a generator) of chunks of
3311 3315 bytes to be compressed. It returns an iterator (ideally a generator)
3312 3316 of bytes of chunks representing the compressed output.
3313 3317
3314 3318 Optionally accepts an argument defining how to perform compression.
3315 3319 Each engine treats this argument differently.
3316 3320 """
3317 3321 raise NotImplementedError()
3318 3322
3319 3323 def decompressorreader(self, fh):
3320 3324 """Perform decompression on a file object.
3321 3325
3322 3326 Argument is an object with a ``read(size)`` method that returns
3323 3327 compressed data. Return value is an object with a ``read(size)`` that
3324 3328 returns uncompressed data.
3325 3329 """
3326 3330 raise NotImplementedError()
3327 3331
3328 3332 def revlogcompressor(self, opts=None):
3329 3333 """Obtain an object that can be used to compress revlog entries.
3330 3334
3331 3335 The object has a ``compress(data)`` method that compresses binary
3332 3336 data. This method returns compressed binary data or ``None`` if
3333 3337 the data could not be compressed (too small, not compressible, etc).
3334 3338 The returned data should have a header uniquely identifying this
3335 3339 compression format so decompression can be routed to this engine.
3336 3340 This header should be identified by the ``revlogheader()`` return
3337 3341 value.
3338 3342
3339 3343 The object has a ``decompress(data)`` method that decompresses
3340 3344 data. The method will only be called if ``data`` begins with
3341 3345 ``revlogheader()``. The method should return the raw, uncompressed
3342 3346 data or raise a ``RevlogError``.
3343 3347
3344 3348 The object is reusable but is not thread safe.
3345 3349 """
3346 3350 raise NotImplementedError()
3347 3351
3348 3352 class _zlibengine(compressionengine):
3349 3353 def name(self):
3350 3354 return 'zlib'
3351 3355
3352 3356 def bundletype(self):
3353 3357 return 'gzip', 'GZ'
3354 3358
3355 3359 def wireprotosupport(self):
3356 3360 return compewireprotosupport('zlib', 20, 20)
3357 3361
3358 3362 def revlogheader(self):
3359 3363 return 'x'
3360 3364
3361 3365 def compressstream(self, it, opts=None):
3362 3366 opts = opts or {}
3363 3367
3364 3368 z = zlib.compressobj(opts.get('level', -1))
3365 3369 for chunk in it:
3366 3370 data = z.compress(chunk)
3367 3371 # Not all calls to compress emit data. It is cheaper to inspect
3368 3372 # here than to feed empty chunks through generator.
3369 3373 if data:
3370 3374 yield data
3371 3375
3372 3376 yield z.flush()
3373 3377
3374 3378 def decompressorreader(self, fh):
3375 3379 def gen():
3376 3380 d = zlib.decompressobj()
3377 3381 for chunk in filechunkiter(fh):
3378 3382 while chunk:
3379 3383 # Limit output size to limit memory.
3380 3384 yield d.decompress(chunk, 2 ** 18)
3381 3385 chunk = d.unconsumed_tail
3382 3386
3383 3387 return chunkbuffer(gen())
3384 3388
3385 3389 class zlibrevlogcompressor(object):
3386 3390 def compress(self, data):
3387 3391 insize = len(data)
3388 3392 # Caller handles empty input case.
3389 3393 assert insize > 0
3390 3394
3391 3395 if insize < 44:
3392 3396 return None
3393 3397
3394 3398 elif insize <= 1000000:
3395 3399 compressed = zlib.compress(data)
3396 3400 if len(compressed) < insize:
3397 3401 return compressed
3398 3402 return None
3399 3403
3400 3404 # zlib makes an internal copy of the input buffer, doubling
3401 3405 # memory usage for large inputs. So do streaming compression
3402 3406 # on large inputs.
3403 3407 else:
3404 3408 z = zlib.compressobj()
3405 3409 parts = []
3406 3410 pos = 0
3407 3411 while pos < insize:
3408 3412 pos2 = pos + 2**20
3409 3413 parts.append(z.compress(data[pos:pos2]))
3410 3414 pos = pos2
3411 3415 parts.append(z.flush())
3412 3416
3413 3417 if sum(map(len, parts)) < insize:
3414 3418 return ''.join(parts)
3415 3419 return None
3416 3420
3417 3421 def decompress(self, data):
3418 3422 try:
3419 3423 return zlib.decompress(data)
3420 3424 except zlib.error as e:
3421 3425 raise error.RevlogError(_('revlog decompress error: %s') %
3422 3426 str(e))
3423 3427
3424 3428 def revlogcompressor(self, opts=None):
3425 3429 return self.zlibrevlogcompressor()
3426 3430
3427 3431 compengines.register(_zlibengine())
3428 3432
3429 3433 class _bz2engine(compressionengine):
3430 3434 def name(self):
3431 3435 return 'bz2'
3432 3436
3433 3437 def bundletype(self):
3434 3438 return 'bzip2', 'BZ'
3435 3439
3436 3440 # We declare a protocol name but don't advertise by default because
3437 3441 # it is slow.
3438 3442 def wireprotosupport(self):
3439 3443 return compewireprotosupport('bzip2', 0, 0)
3440 3444
3441 3445 def compressstream(self, it, opts=None):
3442 3446 opts = opts or {}
3443 3447 z = bz2.BZ2Compressor(opts.get('level', 9))
3444 3448 for chunk in it:
3445 3449 data = z.compress(chunk)
3446 3450 if data:
3447 3451 yield data
3448 3452
3449 3453 yield z.flush()
3450 3454
3451 3455 def decompressorreader(self, fh):
3452 3456 def gen():
3453 3457 d = bz2.BZ2Decompressor()
3454 3458 for chunk in filechunkiter(fh):
3455 3459 yield d.decompress(chunk)
3456 3460
3457 3461 return chunkbuffer(gen())
3458 3462
3459 3463 compengines.register(_bz2engine())
3460 3464
3461 3465 class _truncatedbz2engine(compressionengine):
3462 3466 def name(self):
3463 3467 return 'bz2truncated'
3464 3468
3465 3469 def bundletype(self):
3466 3470 return None, '_truncatedBZ'
3467 3471
3468 3472 # We don't implement compressstream because it is hackily handled elsewhere.
3469 3473
3470 3474 def decompressorreader(self, fh):
3471 3475 def gen():
3472 3476 # The input stream doesn't have the 'BZ' header. So add it back.
3473 3477 d = bz2.BZ2Decompressor()
3474 3478 d.decompress('BZ')
3475 3479 for chunk in filechunkiter(fh):
3476 3480 yield d.decompress(chunk)
3477 3481
3478 3482 return chunkbuffer(gen())
3479 3483
3480 3484 compengines.register(_truncatedbz2engine())
3481 3485
3482 3486 class _noopengine(compressionengine):
3483 3487 def name(self):
3484 3488 return 'none'
3485 3489
3486 3490 def bundletype(self):
3487 3491 return 'none', 'UN'
3488 3492
3489 3493 # Clients always support uncompressed payloads. Servers don't because
3490 3494 # unless you are on a fast network, uncompressed payloads can easily
3491 3495 # saturate your network pipe.
3492 3496 def wireprotosupport(self):
3493 3497 return compewireprotosupport('none', 0, 10)
3494 3498
3495 3499 # We don't implement revlogheader because it is handled specially
3496 3500 # in the revlog class.
3497 3501
3498 3502 def compressstream(self, it, opts=None):
3499 3503 return it
3500 3504
3501 3505 def decompressorreader(self, fh):
3502 3506 return fh
3503 3507
3504 3508 class nooprevlogcompressor(object):
3505 3509 def compress(self, data):
3506 3510 return None
3507 3511
3508 3512 def revlogcompressor(self, opts=None):
3509 3513 return self.nooprevlogcompressor()
3510 3514
3511 3515 compengines.register(_noopengine())
3512 3516
3513 3517 class _zstdengine(compressionengine):
3514 3518 def name(self):
3515 3519 return 'zstd'
3516 3520
3517 3521 @propertycache
3518 3522 def _module(self):
3519 3523 # Not all installs have the zstd module available. So defer importing
3520 3524 # until first access.
3521 3525 try:
3522 3526 from . import zstd
3523 3527 # Force delayed import.
3524 3528 zstd.__version__
3525 3529 return zstd
3526 3530 except ImportError:
3527 3531 return None
3528 3532
3529 3533 def available(self):
3530 3534 return bool(self._module)
3531 3535
3532 3536 def bundletype(self):
3533 3537 return 'zstd', 'ZS'
3534 3538
3535 3539 def wireprotosupport(self):
3536 3540 return compewireprotosupport('zstd', 50, 50)
3537 3541
3538 3542 def revlogheader(self):
3539 3543 return '\x28'
3540 3544
3541 3545 def compressstream(self, it, opts=None):
3542 3546 opts = opts or {}
3543 3547 # zstd level 3 is almost always significantly faster than zlib
3544 3548 # while providing no worse compression. It strikes a good balance
3545 3549 # between speed and compression.
3546 3550 level = opts.get('level', 3)
3547 3551
3548 3552 zstd = self._module
3549 3553 z = zstd.ZstdCompressor(level=level).compressobj()
3550 3554 for chunk in it:
3551 3555 data = z.compress(chunk)
3552 3556 if data:
3553 3557 yield data
3554 3558
3555 3559 yield z.flush()
3556 3560
3557 3561 def decompressorreader(self, fh):
3558 3562 zstd = self._module
3559 3563 dctx = zstd.ZstdDecompressor()
3560 3564 return chunkbuffer(dctx.read_from(fh))
3561 3565
3562 3566 class zstdrevlogcompressor(object):
3563 3567 def __init__(self, zstd, level=3):
3564 3568 # Writing the content size adds a few bytes to the output. However,
3565 3569 # it allows decompression to be more optimal since we can
3566 3570 # pre-allocate a buffer to hold the result.
3567 3571 self._cctx = zstd.ZstdCompressor(level=level,
3568 3572 write_content_size=True)
3569 3573 self._dctx = zstd.ZstdDecompressor()
3570 3574 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3571 3575 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3572 3576
3573 3577 def compress(self, data):
3574 3578 insize = len(data)
3575 3579 # Caller handles empty input case.
3576 3580 assert insize > 0
3577 3581
3578 3582 if insize < 50:
3579 3583 return None
3580 3584
3581 3585 elif insize <= 1000000:
3582 3586 compressed = self._cctx.compress(data)
3583 3587 if len(compressed) < insize:
3584 3588 return compressed
3585 3589 return None
3586 3590 else:
3587 3591 z = self._cctx.compressobj()
3588 3592 chunks = []
3589 3593 pos = 0
3590 3594 while pos < insize:
3591 3595 pos2 = pos + self._compinsize
3592 3596 chunk = z.compress(data[pos:pos2])
3593 3597 if chunk:
3594 3598 chunks.append(chunk)
3595 3599 pos = pos2
3596 3600 chunks.append(z.flush())
3597 3601
3598 3602 if sum(map(len, chunks)) < insize:
3599 3603 return ''.join(chunks)
3600 3604 return None
3601 3605
3602 3606 def decompress(self, data):
3603 3607 insize = len(data)
3604 3608
3605 3609 try:
3606 3610 # This was measured to be faster than other streaming
3607 3611 # decompressors.
3608 3612 dobj = self._dctx.decompressobj()
3609 3613 chunks = []
3610 3614 pos = 0
3611 3615 while pos < insize:
3612 3616 pos2 = pos + self._decompinsize
3613 3617 chunk = dobj.decompress(data[pos:pos2])
3614 3618 if chunk:
3615 3619 chunks.append(chunk)
3616 3620 pos = pos2
3617 3621 # Frame should be exhausted, so no finish() API.
3618 3622
3619 3623 return ''.join(chunks)
3620 3624 except Exception as e:
3621 3625 raise error.RevlogError(_('revlog decompress error: %s') %
3622 3626 str(e))
3623 3627
3624 3628 def revlogcompressor(self, opts=None):
3625 3629 opts = opts or {}
3626 3630 return self.zstdrevlogcompressor(self._module,
3627 3631 level=opts.get('level', 3))
3628 3632
3629 3633 compengines.register(_zstdengine())
3630 3634
3631 3635 # convenient shortcut
3632 3636 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now