##// END OF EJS Templates
util: disable hardlink for copyfile if fstype is outside a whitelist...
Jun Wu -
r31575:e506e461 default
parent child Browse files
Show More
@@ -1,3586 +1,3605 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import zlib
42 42
43 43 from . import (
44 44 encoding,
45 45 error,
46 46 i18n,
47 47 osutil,
48 48 parsers,
49 49 pycompat,
50 50 )
51 51
52 52 empty = pycompat.empty
53 53 httplib = pycompat.httplib
54 54 httpserver = pycompat.httpserver
55 55 pickle = pycompat.pickle
56 56 queue = pycompat.queue
57 57 socketserver = pycompat.socketserver
58 58 stderr = pycompat.stderr
59 59 stdin = pycompat.stdin
60 60 stdout = pycompat.stdout
61 61 stringio = pycompat.stringio
62 62 urlerr = pycompat.urlerr
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 username = platform.username
139 139
140 140 # Python compatibility
141 141
142 142 _notset = object()
143 143
144 144 # disable Python's problematic floating point timestamps (issue4836)
145 145 # (Python hypocritically says you shouldn't change this behavior in
146 146 # libraries, and sure enough Mercurial is not a library.)
147 147 os.stat_float_times(False)
148 148
149 149 def safehasattr(thing, attr):
150 150 return getattr(thing, attr, _notset) is not _notset
151 151
152 152 def bitsfrom(container):
153 153 bits = 0
154 154 for bit in container:
155 155 bits |= bit
156 156 return bits
157 157
158 158 DIGESTS = {
159 159 'md5': hashlib.md5,
160 160 'sha1': hashlib.sha1,
161 161 'sha512': hashlib.sha512,
162 162 }
163 163 # List of digest types from strongest to weakest
164 164 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
165 165
166 166 for k in DIGESTS_BY_STRENGTH:
167 167 assert k in DIGESTS
168 168
169 169 class digester(object):
170 170 """helper to compute digests.
171 171
172 172 This helper can be used to compute one or more digests given their name.
173 173
174 174 >>> d = digester(['md5', 'sha1'])
175 175 >>> d.update('foo')
176 176 >>> [k for k in sorted(d)]
177 177 ['md5', 'sha1']
178 178 >>> d['md5']
179 179 'acbd18db4cc2f85cedef654fccc4a4d8'
180 180 >>> d['sha1']
181 181 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
182 182 >>> digester.preferred(['md5', 'sha1'])
183 183 'sha1'
184 184 """
185 185
186 186 def __init__(self, digests, s=''):
187 187 self._hashes = {}
188 188 for k in digests:
189 189 if k not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 self._hashes[k] = DIGESTS[k]()
192 192 if s:
193 193 self.update(s)
194 194
195 195 def update(self, data):
196 196 for h in self._hashes.values():
197 197 h.update(data)
198 198
199 199 def __getitem__(self, key):
200 200 if key not in DIGESTS:
201 201 raise Abort(_('unknown digest type: %s') % k)
202 202 return self._hashes[key].hexdigest()
203 203
204 204 def __iter__(self):
205 205 return iter(self._hashes)
206 206
207 207 @staticmethod
208 208 def preferred(supported):
209 209 """returns the strongest digest type in both supported and DIGESTS."""
210 210
211 211 for k in DIGESTS_BY_STRENGTH:
212 212 if k in supported:
213 213 return k
214 214 return None
215 215
216 216 class digestchecker(object):
217 217 """file handle wrapper that additionally checks content against a given
218 218 size and digests.
219 219
220 220 d = digestchecker(fh, size, {'md5': '...'})
221 221
222 222 When multiple digests are given, all of them are validated.
223 223 """
224 224
225 225 def __init__(self, fh, size, digests):
226 226 self._fh = fh
227 227 self._size = size
228 228 self._got = 0
229 229 self._digests = dict(digests)
230 230 self._digester = digester(self._digests.keys())
231 231
232 232 def read(self, length=-1):
233 233 content = self._fh.read(length)
234 234 self._digester.update(content)
235 235 self._got += len(content)
236 236 return content
237 237
238 238 def validate(self):
239 239 if self._size != self._got:
240 240 raise Abort(_('size mismatch: expected %d, got %d') %
241 241 (self._size, self._got))
242 242 for k, v in self._digests.items():
243 243 if v != self._digester[k]:
244 244 # i18n: first parameter is a digest name
245 245 raise Abort(_('%s mismatch: expected %s, got %s') %
246 246 (k, v, self._digester[k]))
247 247
248 248 try:
249 249 buffer = buffer
250 250 except NameError:
251 251 if not pycompat.ispy3:
252 252 def buffer(sliceable, offset=0, length=None):
253 253 if length is not None:
254 254 return sliceable[offset:offset + length]
255 255 return sliceable[offset:]
256 256 else:
257 257 def buffer(sliceable, offset=0, length=None):
258 258 if length is not None:
259 259 return memoryview(sliceable)[offset:offset + length]
260 260 return memoryview(sliceable)[offset:]
261 261
262 262 closefds = pycompat.osname == 'posix'
263 263
264 264 _chunksize = 4096
265 265
266 266 class bufferedinputpipe(object):
267 267 """a manually buffered input pipe
268 268
269 269 Python will not let us use buffered IO and lazy reading with 'polling' at
270 270 the same time. We cannot probe the buffer state and select will not detect
271 271 that data are ready to read if they are already buffered.
272 272
273 273 This class let us work around that by implementing its own buffering
274 274 (allowing efficient readline) while offering a way to know if the buffer is
275 275 empty from the output (allowing collaboration of the buffer with polling).
276 276
277 277 This class lives in the 'util' module because it makes use of the 'os'
278 278 module from the python stdlib.
279 279 """
280 280
281 281 def __init__(self, input):
282 282 self._input = input
283 283 self._buffer = []
284 284 self._eof = False
285 285 self._lenbuf = 0
286 286
287 287 @property
288 288 def hasbuffer(self):
289 289 """True is any data is currently buffered
290 290
291 291 This will be used externally a pre-step for polling IO. If there is
292 292 already data then no polling should be set in place."""
293 293 return bool(self._buffer)
294 294
295 295 @property
296 296 def closed(self):
297 297 return self._input.closed
298 298
299 299 def fileno(self):
300 300 return self._input.fileno()
301 301
302 302 def close(self):
303 303 return self._input.close()
304 304
305 305 def read(self, size):
306 306 while (not self._eof) and (self._lenbuf < size):
307 307 self._fillbuffer()
308 308 return self._frombuffer(size)
309 309
310 310 def readline(self, *args, **kwargs):
311 311 if 1 < len(self._buffer):
312 312 # this should not happen because both read and readline end with a
313 313 # _frombuffer call that collapse it.
314 314 self._buffer = [''.join(self._buffer)]
315 315 self._lenbuf = len(self._buffer[0])
316 316 lfi = -1
317 317 if self._buffer:
318 318 lfi = self._buffer[-1].find('\n')
319 319 while (not self._eof) and lfi < 0:
320 320 self._fillbuffer()
321 321 if self._buffer:
322 322 lfi = self._buffer[-1].find('\n')
323 323 size = lfi + 1
324 324 if lfi < 0: # end of file
325 325 size = self._lenbuf
326 326 elif 1 < len(self._buffer):
327 327 # we need to take previous chunks into account
328 328 size += self._lenbuf - len(self._buffer[-1])
329 329 return self._frombuffer(size)
330 330
331 331 def _frombuffer(self, size):
332 332 """return at most 'size' data from the buffer
333 333
334 334 The data are removed from the buffer."""
335 335 if size == 0 or not self._buffer:
336 336 return ''
337 337 buf = self._buffer[0]
338 338 if 1 < len(self._buffer):
339 339 buf = ''.join(self._buffer)
340 340
341 341 data = buf[:size]
342 342 buf = buf[len(data):]
343 343 if buf:
344 344 self._buffer = [buf]
345 345 self._lenbuf = len(buf)
346 346 else:
347 347 self._buffer = []
348 348 self._lenbuf = 0
349 349 return data
350 350
351 351 def _fillbuffer(self):
352 352 """read data to the buffer"""
353 353 data = os.read(self._input.fileno(), _chunksize)
354 354 if not data:
355 355 self._eof = True
356 356 else:
357 357 self._lenbuf += len(data)
358 358 self._buffer.append(data)
359 359
360 360 def popen2(cmd, env=None, newlines=False):
361 361 # Setting bufsize to -1 lets the system decide the buffer size.
362 362 # The default for bufsize is 0, meaning unbuffered. This leads to
363 363 # poor performance on Mac OS X: http://bugs.python.org/issue4194
364 364 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
365 365 close_fds=closefds,
366 366 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
367 367 universal_newlines=newlines,
368 368 env=env)
369 369 return p.stdin, p.stdout
370 370
371 371 def popen3(cmd, env=None, newlines=False):
372 372 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
373 373 return stdin, stdout, stderr
374 374
375 375 def popen4(cmd, env=None, newlines=False, bufsize=-1):
376 376 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
377 377 close_fds=closefds,
378 378 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
379 379 stderr=subprocess.PIPE,
380 380 universal_newlines=newlines,
381 381 env=env)
382 382 return p.stdin, p.stdout, p.stderr, p
383 383
384 384 def version():
385 385 """Return version information if available."""
386 386 try:
387 387 from . import __version__
388 388 return __version__.version
389 389 except ImportError:
390 390 return 'unknown'
391 391
392 392 def versiontuple(v=None, n=4):
393 393 """Parses a Mercurial version string into an N-tuple.
394 394
395 395 The version string to be parsed is specified with the ``v`` argument.
396 396 If it isn't defined, the current Mercurial version string will be parsed.
397 397
398 398 ``n`` can be 2, 3, or 4. Here is how some version strings map to
399 399 returned values:
400 400
401 401 >>> v = '3.6.1+190-df9b73d2d444'
402 402 >>> versiontuple(v, 2)
403 403 (3, 6)
404 404 >>> versiontuple(v, 3)
405 405 (3, 6, 1)
406 406 >>> versiontuple(v, 4)
407 407 (3, 6, 1, '190-df9b73d2d444')
408 408
409 409 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
410 410 (3, 6, 1, '190-df9b73d2d444+20151118')
411 411
412 412 >>> v = '3.6'
413 413 >>> versiontuple(v, 2)
414 414 (3, 6)
415 415 >>> versiontuple(v, 3)
416 416 (3, 6, None)
417 417 >>> versiontuple(v, 4)
418 418 (3, 6, None, None)
419 419
420 420 >>> v = '3.9-rc'
421 421 >>> versiontuple(v, 2)
422 422 (3, 9)
423 423 >>> versiontuple(v, 3)
424 424 (3, 9, None)
425 425 >>> versiontuple(v, 4)
426 426 (3, 9, None, 'rc')
427 427
428 428 >>> v = '3.9-rc+2-02a8fea4289b'
429 429 >>> versiontuple(v, 2)
430 430 (3, 9)
431 431 >>> versiontuple(v, 3)
432 432 (3, 9, None)
433 433 >>> versiontuple(v, 4)
434 434 (3, 9, None, 'rc+2-02a8fea4289b')
435 435 """
436 436 if not v:
437 437 v = version()
438 438 parts = remod.split('[\+-]', v, 1)
439 439 if len(parts) == 1:
440 440 vparts, extra = parts[0], None
441 441 else:
442 442 vparts, extra = parts
443 443
444 444 vints = []
445 445 for i in vparts.split('.'):
446 446 try:
447 447 vints.append(int(i))
448 448 except ValueError:
449 449 break
450 450 # (3, 6) -> (3, 6, None)
451 451 while len(vints) < 3:
452 452 vints.append(None)
453 453
454 454 if n == 2:
455 455 return (vints[0], vints[1])
456 456 if n == 3:
457 457 return (vints[0], vints[1], vints[2])
458 458 if n == 4:
459 459 return (vints[0], vints[1], vints[2], extra)
460 460
461 461 # used by parsedate
462 462 defaultdateformats = (
463 463 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
464 464 '%Y-%m-%dT%H:%M', # without seconds
465 465 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
466 466 '%Y-%m-%dT%H%M', # without seconds
467 467 '%Y-%m-%d %H:%M:%S', # our common legal variant
468 468 '%Y-%m-%d %H:%M', # without seconds
469 469 '%Y-%m-%d %H%M%S', # without :
470 470 '%Y-%m-%d %H%M', # without seconds
471 471 '%Y-%m-%d %I:%M:%S%p',
472 472 '%Y-%m-%d %H:%M',
473 473 '%Y-%m-%d %I:%M%p',
474 474 '%Y-%m-%d',
475 475 '%m-%d',
476 476 '%m/%d',
477 477 '%m/%d/%y',
478 478 '%m/%d/%Y',
479 479 '%a %b %d %H:%M:%S %Y',
480 480 '%a %b %d %I:%M:%S%p %Y',
481 481 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
482 482 '%b %d %H:%M:%S %Y',
483 483 '%b %d %I:%M:%S%p %Y',
484 484 '%b %d %H:%M:%S',
485 485 '%b %d %I:%M:%S%p',
486 486 '%b %d %H:%M',
487 487 '%b %d %I:%M%p',
488 488 '%b %d %Y',
489 489 '%b %d',
490 490 '%H:%M:%S',
491 491 '%I:%M:%S%p',
492 492 '%H:%M',
493 493 '%I:%M%p',
494 494 )
495 495
496 496 extendeddateformats = defaultdateformats + (
497 497 "%Y",
498 498 "%Y-%m",
499 499 "%b",
500 500 "%b %Y",
501 501 )
502 502
503 503 def cachefunc(func):
504 504 '''cache the result of function calls'''
505 505 # XXX doesn't handle keywords args
506 506 if func.__code__.co_argcount == 0:
507 507 cache = []
508 508 def f():
509 509 if len(cache) == 0:
510 510 cache.append(func())
511 511 return cache[0]
512 512 return f
513 513 cache = {}
514 514 if func.__code__.co_argcount == 1:
515 515 # we gain a small amount of time because
516 516 # we don't need to pack/unpack the list
517 517 def f(arg):
518 518 if arg not in cache:
519 519 cache[arg] = func(arg)
520 520 return cache[arg]
521 521 else:
522 522 def f(*args):
523 523 if args not in cache:
524 524 cache[args] = func(*args)
525 525 return cache[args]
526 526
527 527 return f
528 528
529 529 class sortdict(dict):
530 530 '''a simple sorted dictionary'''
531 531 def __init__(self, data=None):
532 532 self._list = []
533 533 if data:
534 534 self.update(data)
535 535 def copy(self):
536 536 return sortdict(self)
537 537 def __setitem__(self, key, val):
538 538 if key in self:
539 539 self._list.remove(key)
540 540 self._list.append(key)
541 541 dict.__setitem__(self, key, val)
542 542 def __iter__(self):
543 543 return self._list.__iter__()
544 544 def update(self, src):
545 545 if isinstance(src, dict):
546 546 src = src.iteritems()
547 547 for k, v in src:
548 548 self[k] = v
549 549 def clear(self):
550 550 dict.clear(self)
551 551 self._list = []
552 552 def items(self):
553 553 return [(k, self[k]) for k in self._list]
554 554 def __delitem__(self, key):
555 555 dict.__delitem__(self, key)
556 556 self._list.remove(key)
557 557 def pop(self, key, *args, **kwargs):
558 558 dict.pop(self, key, *args, **kwargs)
559 559 try:
560 560 self._list.remove(key)
561 561 except ValueError:
562 562 pass
563 563 def keys(self):
564 564 return self._list[:]
565 565 def iterkeys(self):
566 566 return self._list.__iter__()
567 567 def iteritems(self):
568 568 for k in self._list:
569 569 yield k, self[k]
570 570 def insert(self, index, key, val):
571 571 self._list.insert(index, key)
572 572 dict.__setitem__(self, key, val)
573 573 def __repr__(self):
574 574 if not self:
575 575 return '%s()' % self.__class__.__name__
576 576 return '%s(%r)' % (self.__class__.__name__, self.items())
577 577
578 578 class _lrucachenode(object):
579 579 """A node in a doubly linked list.
580 580
581 581 Holds a reference to nodes on either side as well as a key-value
582 582 pair for the dictionary entry.
583 583 """
584 584 __slots__ = (u'next', u'prev', u'key', u'value')
585 585
586 586 def __init__(self):
587 587 self.next = None
588 588 self.prev = None
589 589
590 590 self.key = _notset
591 591 self.value = None
592 592
593 593 def markempty(self):
594 594 """Mark the node as emptied."""
595 595 self.key = _notset
596 596
597 597 class lrucachedict(object):
598 598 """Dict that caches most recent accesses and sets.
599 599
600 600 The dict consists of an actual backing dict - indexed by original
601 601 key - and a doubly linked circular list defining the order of entries in
602 602 the cache.
603 603
604 604 The head node is the newest entry in the cache. If the cache is full,
605 605 we recycle head.prev and make it the new head. Cache accesses result in
606 606 the node being moved to before the existing head and being marked as the
607 607 new head node.
608 608 """
609 609 def __init__(self, max):
610 610 self._cache = {}
611 611
612 612 self._head = head = _lrucachenode()
613 613 head.prev = head
614 614 head.next = head
615 615 self._size = 1
616 616 self._capacity = max
617 617
618 618 def __len__(self):
619 619 return len(self._cache)
620 620
621 621 def __contains__(self, k):
622 622 return k in self._cache
623 623
624 624 def __iter__(self):
625 625 # We don't have to iterate in cache order, but why not.
626 626 n = self._head
627 627 for i in range(len(self._cache)):
628 628 yield n.key
629 629 n = n.next
630 630
631 631 def __getitem__(self, k):
632 632 node = self._cache[k]
633 633 self._movetohead(node)
634 634 return node.value
635 635
636 636 def __setitem__(self, k, v):
637 637 node = self._cache.get(k)
638 638 # Replace existing value and mark as newest.
639 639 if node is not None:
640 640 node.value = v
641 641 self._movetohead(node)
642 642 return
643 643
644 644 if self._size < self._capacity:
645 645 node = self._addcapacity()
646 646 else:
647 647 # Grab the last/oldest item.
648 648 node = self._head.prev
649 649
650 650 # At capacity. Kill the old entry.
651 651 if node.key is not _notset:
652 652 del self._cache[node.key]
653 653
654 654 node.key = k
655 655 node.value = v
656 656 self._cache[k] = node
657 657 # And mark it as newest entry. No need to adjust order since it
658 658 # is already self._head.prev.
659 659 self._head = node
660 660
661 661 def __delitem__(self, k):
662 662 node = self._cache.pop(k)
663 663 node.markempty()
664 664
665 665 # Temporarily mark as newest item before re-adjusting head to make
666 666 # this node the oldest item.
667 667 self._movetohead(node)
668 668 self._head = node.next
669 669
670 670 # Additional dict methods.
671 671
672 672 def get(self, k, default=None):
673 673 try:
674 674 return self._cache[k].value
675 675 except KeyError:
676 676 return default
677 677
678 678 def clear(self):
679 679 n = self._head
680 680 while n.key is not _notset:
681 681 n.markempty()
682 682 n = n.next
683 683
684 684 self._cache.clear()
685 685
686 686 def copy(self):
687 687 result = lrucachedict(self._capacity)
688 688 n = self._head.prev
689 689 # Iterate in oldest-to-newest order, so the copy has the right ordering
690 690 for i in range(len(self._cache)):
691 691 result[n.key] = n.value
692 692 n = n.prev
693 693 return result
694 694
695 695 def _movetohead(self, node):
696 696 """Mark a node as the newest, making it the new head.
697 697
698 698 When a node is accessed, it becomes the freshest entry in the LRU
699 699 list, which is denoted by self._head.
700 700
701 701 Visually, let's make ``N`` the new head node (* denotes head):
702 702
703 703 previous/oldest <-> head <-> next/next newest
704 704
705 705 ----<->--- A* ---<->-----
706 706 | |
707 707 E <-> D <-> N <-> C <-> B
708 708
709 709 To:
710 710
711 711 ----<->--- N* ---<->-----
712 712 | |
713 713 E <-> D <-> C <-> B <-> A
714 714
715 715 This requires the following moves:
716 716
717 717 C.next = D (node.prev.next = node.next)
718 718 D.prev = C (node.next.prev = node.prev)
719 719 E.next = N (head.prev.next = node)
720 720 N.prev = E (node.prev = head.prev)
721 721 N.next = A (node.next = head)
722 722 A.prev = N (head.prev = node)
723 723 """
724 724 head = self._head
725 725 # C.next = D
726 726 node.prev.next = node.next
727 727 # D.prev = C
728 728 node.next.prev = node.prev
729 729 # N.prev = E
730 730 node.prev = head.prev
731 731 # N.next = A
732 732 # It is tempting to do just "head" here, however if node is
733 733 # adjacent to head, this will do bad things.
734 734 node.next = head.prev.next
735 735 # E.next = N
736 736 node.next.prev = node
737 737 # A.prev = N
738 738 node.prev.next = node
739 739
740 740 self._head = node
741 741
742 742 def _addcapacity(self):
743 743 """Add a node to the circular linked list.
744 744
745 745 The new node is inserted before the head node.
746 746 """
747 747 head = self._head
748 748 node = _lrucachenode()
749 749 head.prev.next = node
750 750 node.prev = head.prev
751 751 node.next = head
752 752 head.prev = node
753 753 self._size += 1
754 754 return node
755 755
756 756 def lrucachefunc(func):
757 757 '''cache most recent results of function calls'''
758 758 cache = {}
759 759 order = collections.deque()
760 760 if func.__code__.co_argcount == 1:
761 761 def f(arg):
762 762 if arg not in cache:
763 763 if len(cache) > 20:
764 764 del cache[order.popleft()]
765 765 cache[arg] = func(arg)
766 766 else:
767 767 order.remove(arg)
768 768 order.append(arg)
769 769 return cache[arg]
770 770 else:
771 771 def f(*args):
772 772 if args not in cache:
773 773 if len(cache) > 20:
774 774 del cache[order.popleft()]
775 775 cache[args] = func(*args)
776 776 else:
777 777 order.remove(args)
778 778 order.append(args)
779 779 return cache[args]
780 780
781 781 return f
782 782
783 783 class propertycache(object):
784 784 def __init__(self, func):
785 785 self.func = func
786 786 self.name = func.__name__
787 787 def __get__(self, obj, type=None):
788 788 result = self.func(obj)
789 789 self.cachevalue(obj, result)
790 790 return result
791 791
792 792 def cachevalue(self, obj, value):
793 793 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
794 794 obj.__dict__[self.name] = value
795 795
796 796 def pipefilter(s, cmd):
797 797 '''filter string S through command CMD, returning its output'''
798 798 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
799 799 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
800 800 pout, perr = p.communicate(s)
801 801 return pout
802 802
803 803 def tempfilter(s, cmd):
804 804 '''filter string S through a pair of temporary files with CMD.
805 805 CMD is used as a template to create the real command to be run,
806 806 with the strings INFILE and OUTFILE replaced by the real names of
807 807 the temporary files generated.'''
808 808 inname, outname = None, None
809 809 try:
810 810 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
811 811 fp = os.fdopen(infd, pycompat.sysstr('wb'))
812 812 fp.write(s)
813 813 fp.close()
814 814 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
815 815 os.close(outfd)
816 816 cmd = cmd.replace('INFILE', inname)
817 817 cmd = cmd.replace('OUTFILE', outname)
818 818 code = os.system(cmd)
819 819 if pycompat.sysplatform == 'OpenVMS' and code & 1:
820 820 code = 0
821 821 if code:
822 822 raise Abort(_("command '%s' failed: %s") %
823 823 (cmd, explainexit(code)))
824 824 return readfile(outname)
825 825 finally:
826 826 try:
827 827 if inname:
828 828 os.unlink(inname)
829 829 except OSError:
830 830 pass
831 831 try:
832 832 if outname:
833 833 os.unlink(outname)
834 834 except OSError:
835 835 pass
836 836
837 837 filtertable = {
838 838 'tempfile:': tempfilter,
839 839 'pipe:': pipefilter,
840 840 }
841 841
842 842 def filter(s, cmd):
843 843 "filter a string through a command that transforms its input to its output"
844 844 for name, fn in filtertable.iteritems():
845 845 if cmd.startswith(name):
846 846 return fn(s, cmd[len(name):].lstrip())
847 847 return pipefilter(s, cmd)
848 848
849 849 def binary(s):
850 850 """return true if a string is binary data"""
851 851 return bool(s and '\0' in s)
852 852
853 853 def increasingchunks(source, min=1024, max=65536):
854 854 '''return no less than min bytes per chunk while data remains,
855 855 doubling min after each chunk until it reaches max'''
856 856 def log2(x):
857 857 if not x:
858 858 return 0
859 859 i = 0
860 860 while x:
861 861 x >>= 1
862 862 i += 1
863 863 return i - 1
864 864
865 865 buf = []
866 866 blen = 0
867 867 for chunk in source:
868 868 buf.append(chunk)
869 869 blen += len(chunk)
870 870 if blen >= min:
871 871 if min < max:
872 872 min = min << 1
873 873 nmin = 1 << log2(blen)
874 874 if nmin > min:
875 875 min = nmin
876 876 if min > max:
877 877 min = max
878 878 yield ''.join(buf)
879 879 blen = 0
880 880 buf = []
881 881 if buf:
882 882 yield ''.join(buf)
883 883
884 884 Abort = error.Abort
885 885
886 886 def always(fn):
887 887 return True
888 888
889 889 def never(fn):
890 890 return False
891 891
892 892 def nogc(func):
893 893 """disable garbage collector
894 894
895 895 Python's garbage collector triggers a GC each time a certain number of
896 896 container objects (the number being defined by gc.get_threshold()) are
897 897 allocated even when marked not to be tracked by the collector. Tracking has
898 898 no effect on when GCs are triggered, only on what objects the GC looks
899 899 into. As a workaround, disable GC while building complex (huge)
900 900 containers.
901 901
902 902 This garbage collector issue have been fixed in 2.7.
903 903 """
904 904 if sys.version_info >= (2, 7):
905 905 return func
906 906 def wrapper(*args, **kwargs):
907 907 gcenabled = gc.isenabled()
908 908 gc.disable()
909 909 try:
910 910 return func(*args, **kwargs)
911 911 finally:
912 912 if gcenabled:
913 913 gc.enable()
914 914 return wrapper
915 915
916 916 def pathto(root, n1, n2):
917 917 '''return the relative path from one place to another.
918 918 root should use os.sep to separate directories
919 919 n1 should use os.sep to separate directories
920 920 n2 should use "/" to separate directories
921 921 returns an os.sep-separated path.
922 922
923 923 If n1 is a relative path, it's assumed it's
924 924 relative to root.
925 925 n2 should always be relative to root.
926 926 '''
927 927 if not n1:
928 928 return localpath(n2)
929 929 if os.path.isabs(n1):
930 930 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
931 931 return os.path.join(root, localpath(n2))
932 932 n2 = '/'.join((pconvert(root), n2))
933 933 a, b = splitpath(n1), n2.split('/')
934 934 a.reverse()
935 935 b.reverse()
936 936 while a and b and a[-1] == b[-1]:
937 937 a.pop()
938 938 b.pop()
939 939 b.reverse()
940 940 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
941 941
942 942 def mainfrozen():
943 943 """return True if we are a frozen executable.
944 944
945 945 The code supports py2exe (most common, Windows only) and tools/freeze
946 946 (portable, not much used).
947 947 """
948 948 return (safehasattr(sys, "frozen") or # new py2exe
949 949 safehasattr(sys, "importers") or # old py2exe
950 950 imp.is_frozen(u"__main__")) # tools/freeze
951 951
952 952 # the location of data files matching the source code
953 953 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
954 954 # executable version (py2exe) doesn't support __file__
955 955 datapath = os.path.dirname(pycompat.sysexecutable)
956 956 else:
957 957 datapath = os.path.dirname(pycompat.fsencode(__file__))
958 958
959 959 i18n.setdatapath(datapath)
960 960
961 961 _hgexecutable = None
962 962
963 963 def hgexecutable():
964 964 """return location of the 'hg' executable.
965 965
966 966 Defaults to $HG or 'hg' in the search path.
967 967 """
968 968 if _hgexecutable is None:
969 969 hg = encoding.environ.get('HG')
970 970 mainmod = sys.modules[pycompat.sysstr('__main__')]
971 971 if hg:
972 972 _sethgexecutable(hg)
973 973 elif mainfrozen():
974 974 if getattr(sys, 'frozen', None) == 'macosx_app':
975 975 # Env variable set by py2app
976 976 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
977 977 else:
978 978 _sethgexecutable(pycompat.sysexecutable)
979 979 elif (os.path.basename(
980 980 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
981 981 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
982 982 else:
983 983 exe = findexe('hg') or os.path.basename(sys.argv[0])
984 984 _sethgexecutable(exe)
985 985 return _hgexecutable
986 986
987 987 def _sethgexecutable(path):
988 988 """set location of the 'hg' executable"""
989 989 global _hgexecutable
990 990 _hgexecutable = path
991 991
992 992 def _isstdout(f):
993 993 fileno = getattr(f, 'fileno', None)
994 994 return fileno and fileno() == sys.__stdout__.fileno()
995 995
996 996 def shellenviron(environ=None):
997 997 """return environ with optional override, useful for shelling out"""
998 998 def py2shell(val):
999 999 'convert python object into string that is useful to shell'
1000 1000 if val is None or val is False:
1001 1001 return '0'
1002 1002 if val is True:
1003 1003 return '1'
1004 1004 return str(val)
1005 1005 env = dict(encoding.environ)
1006 1006 if environ:
1007 1007 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1008 1008 env['HG'] = hgexecutable()
1009 1009 return env
1010 1010
1011 1011 def system(cmd, environ=None, cwd=None, out=None):
1012 1012 '''enhanced shell command execution.
1013 1013 run with environment maybe modified, maybe in different dir.
1014 1014
1015 1015 if out is specified, it is assumed to be a file-like object that has a
1016 1016 write() method. stdout and stderr will be redirected to out.'''
1017 1017 try:
1018 1018 stdout.flush()
1019 1019 except Exception:
1020 1020 pass
1021 1021 cmd = quotecommand(cmd)
1022 1022 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1023 1023 and sys.version_info[1] < 7):
1024 1024 # subprocess kludge to work around issues in half-baked Python
1025 1025 # ports, notably bichued/python:
1026 1026 if not cwd is None:
1027 1027 os.chdir(cwd)
1028 1028 rc = os.system(cmd)
1029 1029 else:
1030 1030 env = shellenviron(environ)
1031 1031 if out is None or _isstdout(out):
1032 1032 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1033 1033 env=env, cwd=cwd)
1034 1034 else:
1035 1035 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1036 1036 env=env, cwd=cwd, stdout=subprocess.PIPE,
1037 1037 stderr=subprocess.STDOUT)
1038 1038 for line in iter(proc.stdout.readline, ''):
1039 1039 out.write(line)
1040 1040 proc.wait()
1041 1041 rc = proc.returncode
1042 1042 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1043 1043 rc = 0
1044 1044 return rc
1045 1045
1046 1046 def checksignature(func):
1047 1047 '''wrap a function with code to check for calling errors'''
1048 1048 def check(*args, **kwargs):
1049 1049 try:
1050 1050 return func(*args, **kwargs)
1051 1051 except TypeError:
1052 1052 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1053 1053 raise error.SignatureError
1054 1054 raise
1055 1055
1056 1056 return check
1057 1057
1058 1058 # Hardlinks are problematic on CIFS, do not allow hardlinks
1059 1059 # until we find a way to work around it cleanly (issue4546).
1060 1060 # This is a variable so extensions can opt-in to using them.
1061 1061 allowhardlinks = False
1062 1062
1063 # a whilelist of known filesystems where hardlink works reliably
1064 _hardlinkfswhitelist = set([
1065 'btrfs',
1066 'ext2',
1067 'ext3',
1068 'ext4',
1069 'jfs',
1070 'reiserfs',
1071 'tmpfs',
1072 'xfs',
1073 ])
1074
1063 1075 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1064 1076 '''copy a file, preserving mode and optionally other stat info like
1065 1077 atime/mtime
1066 1078
1067 1079 checkambig argument is used with filestat, and is useful only if
1068 1080 destination file is guarded by any lock (e.g. repo.lock or
1069 1081 repo.wlock).
1070 1082
1071 1083 copystat and checkambig should be exclusive.
1072 1084 '''
1073 1085 assert not (copystat and checkambig)
1074 1086 oldstat = None
1075 1087 if os.path.lexists(dest):
1076 1088 if checkambig:
1077 1089 oldstat = checkambig and filestat(dest)
1078 1090 unlink(dest)
1091 if hardlink:
1092 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1093 # unless we are confident that dest is on a whitelisted filesystem.
1094 destdir = os.path.dirname(dest)
1095 fstype = getattr(osutil, 'getfstype', lambda x: None)(destdir)
1096 if fstype not in _hardlinkfswhitelist:
1097 hardlink = False
1079 1098 if allowhardlinks and hardlink:
1080 1099 try:
1081 1100 oslink(src, dest)
1082 1101 return
1083 1102 except (IOError, OSError):
1084 1103 pass # fall back to normal copy
1085 1104 if os.path.islink(src):
1086 1105 os.symlink(os.readlink(src), dest)
1087 1106 # copytime is ignored for symlinks, but in general copytime isn't needed
1088 1107 # for them anyway
1089 1108 else:
1090 1109 try:
1091 1110 shutil.copyfile(src, dest)
1092 1111 if copystat:
1093 1112 # copystat also copies mode
1094 1113 shutil.copystat(src, dest)
1095 1114 else:
1096 1115 shutil.copymode(src, dest)
1097 1116 if oldstat and oldstat.stat:
1098 1117 newstat = filestat(dest)
1099 1118 if newstat.isambig(oldstat):
1100 1119 # stat of copied file is ambiguous to original one
1101 1120 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1102 1121 os.utime(dest, (advanced, advanced))
1103 1122 except shutil.Error as inst:
1104 1123 raise Abort(str(inst))
1105 1124
1106 1125 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1107 1126 """Copy a directory tree using hardlinks if possible."""
1108 1127 num = 0
1109 1128
1110 1129 if hardlink is None:
1111 1130 hardlink = (os.stat(src).st_dev ==
1112 1131 os.stat(os.path.dirname(dst)).st_dev)
1113 1132 if hardlink:
1114 1133 topic = _('linking')
1115 1134 else:
1116 1135 topic = _('copying')
1117 1136
1118 1137 if os.path.isdir(src):
1119 1138 os.mkdir(dst)
1120 1139 for name, kind in osutil.listdir(src):
1121 1140 srcname = os.path.join(src, name)
1122 1141 dstname = os.path.join(dst, name)
1123 1142 def nprog(t, pos):
1124 1143 if pos is not None:
1125 1144 return progress(t, pos + num)
1126 1145 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1127 1146 num += n
1128 1147 else:
1129 1148 if hardlink:
1130 1149 try:
1131 1150 oslink(src, dst)
1132 1151 except (IOError, OSError):
1133 1152 hardlink = False
1134 1153 shutil.copy(src, dst)
1135 1154 else:
1136 1155 shutil.copy(src, dst)
1137 1156 num += 1
1138 1157 progress(topic, num)
1139 1158 progress(topic, None)
1140 1159
1141 1160 return hardlink, num
1142 1161
1143 1162 _winreservednames = '''con prn aux nul
1144 1163 com1 com2 com3 com4 com5 com6 com7 com8 com9
1145 1164 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1146 1165 _winreservedchars = ':*?"<>|'
1147 1166 def checkwinfilename(path):
1148 1167 r'''Check that the base-relative path is a valid filename on Windows.
1149 1168 Returns None if the path is ok, or a UI string describing the problem.
1150 1169
1151 1170 >>> checkwinfilename("just/a/normal/path")
1152 1171 >>> checkwinfilename("foo/bar/con.xml")
1153 1172 "filename contains 'con', which is reserved on Windows"
1154 1173 >>> checkwinfilename("foo/con.xml/bar")
1155 1174 "filename contains 'con', which is reserved on Windows"
1156 1175 >>> checkwinfilename("foo/bar/xml.con")
1157 1176 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1158 1177 "filename contains 'AUX', which is reserved on Windows"
1159 1178 >>> checkwinfilename("foo/bar/bla:.txt")
1160 1179 "filename contains ':', which is reserved on Windows"
1161 1180 >>> checkwinfilename("foo/bar/b\07la.txt")
1162 1181 "filename contains '\\x07', which is invalid on Windows"
1163 1182 >>> checkwinfilename("foo/bar/bla ")
1164 1183 "filename ends with ' ', which is not allowed on Windows"
1165 1184 >>> checkwinfilename("../bar")
1166 1185 >>> checkwinfilename("foo\\")
1167 1186 "filename ends with '\\', which is invalid on Windows"
1168 1187 >>> checkwinfilename("foo\\/bar")
1169 1188 "directory name ends with '\\', which is invalid on Windows"
1170 1189 '''
1171 1190 if path.endswith('\\'):
1172 1191 return _("filename ends with '\\', which is invalid on Windows")
1173 1192 if '\\/' in path:
1174 1193 return _("directory name ends with '\\', which is invalid on Windows")
1175 1194 for n in path.replace('\\', '/').split('/'):
1176 1195 if not n:
1177 1196 continue
1178 1197 for c in pycompat.bytestr(n):
1179 1198 if c in _winreservedchars:
1180 1199 return _("filename contains '%s', which is reserved "
1181 1200 "on Windows") % c
1182 1201 if ord(c) <= 31:
1183 1202 return _("filename contains %r, which is invalid "
1184 1203 "on Windows") % c
1185 1204 base = n.split('.')[0]
1186 1205 if base and base.lower() in _winreservednames:
1187 1206 return _("filename contains '%s', which is reserved "
1188 1207 "on Windows") % base
1189 1208 t = n[-1]
1190 1209 if t in '. ' and n not in '..':
1191 1210 return _("filename ends with '%s', which is not allowed "
1192 1211 "on Windows") % t
1193 1212
1194 1213 if pycompat.osname == 'nt':
1195 1214 checkosfilename = checkwinfilename
1196 1215 timer = time.clock
1197 1216 else:
1198 1217 checkosfilename = platform.checkosfilename
1199 1218 timer = time.time
1200 1219
1201 1220 if safehasattr(time, "perf_counter"):
1202 1221 timer = time.perf_counter
1203 1222
1204 1223 def makelock(info, pathname):
1205 1224 try:
1206 1225 return os.symlink(info, pathname)
1207 1226 except OSError as why:
1208 1227 if why.errno == errno.EEXIST:
1209 1228 raise
1210 1229 except AttributeError: # no symlink in os
1211 1230 pass
1212 1231
1213 1232 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1214 1233 os.write(ld, info)
1215 1234 os.close(ld)
1216 1235
1217 1236 def readlock(pathname):
1218 1237 try:
1219 1238 return os.readlink(pathname)
1220 1239 except OSError as why:
1221 1240 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1222 1241 raise
1223 1242 except AttributeError: # no symlink in os
1224 1243 pass
1225 1244 fp = posixfile(pathname)
1226 1245 r = fp.read()
1227 1246 fp.close()
1228 1247 return r
1229 1248
1230 1249 def fstat(fp):
1231 1250 '''stat file object that may not have fileno method.'''
1232 1251 try:
1233 1252 return os.fstat(fp.fileno())
1234 1253 except AttributeError:
1235 1254 return os.stat(fp.name)
1236 1255
1237 1256 # File system features
1238 1257
1239 1258 def fscasesensitive(path):
1240 1259 """
1241 1260 Return true if the given path is on a case-sensitive filesystem
1242 1261
1243 1262 Requires a path (like /foo/.hg) ending with a foldable final
1244 1263 directory component.
1245 1264 """
1246 1265 s1 = os.lstat(path)
1247 1266 d, b = os.path.split(path)
1248 1267 b2 = b.upper()
1249 1268 if b == b2:
1250 1269 b2 = b.lower()
1251 1270 if b == b2:
1252 1271 return True # no evidence against case sensitivity
1253 1272 p2 = os.path.join(d, b2)
1254 1273 try:
1255 1274 s2 = os.lstat(p2)
1256 1275 if s2 == s1:
1257 1276 return False
1258 1277 return True
1259 1278 except OSError:
1260 1279 return True
1261 1280
1262 1281 try:
1263 1282 import re2
1264 1283 _re2 = None
1265 1284 except ImportError:
1266 1285 _re2 = False
1267 1286
1268 1287 class _re(object):
1269 1288 def _checkre2(self):
1270 1289 global _re2
1271 1290 try:
1272 1291 # check if match works, see issue3964
1273 1292 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1274 1293 except ImportError:
1275 1294 _re2 = False
1276 1295
1277 1296 def compile(self, pat, flags=0):
1278 1297 '''Compile a regular expression, using re2 if possible
1279 1298
1280 1299 For best performance, use only re2-compatible regexp features. The
1281 1300 only flags from the re module that are re2-compatible are
1282 1301 IGNORECASE and MULTILINE.'''
1283 1302 if _re2 is None:
1284 1303 self._checkre2()
1285 1304 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1286 1305 if flags & remod.IGNORECASE:
1287 1306 pat = '(?i)' + pat
1288 1307 if flags & remod.MULTILINE:
1289 1308 pat = '(?m)' + pat
1290 1309 try:
1291 1310 return re2.compile(pat)
1292 1311 except re2.error:
1293 1312 pass
1294 1313 return remod.compile(pat, flags)
1295 1314
1296 1315 @propertycache
1297 1316 def escape(self):
1298 1317 '''Return the version of escape corresponding to self.compile.
1299 1318
1300 1319 This is imperfect because whether re2 or re is used for a particular
1301 1320 function depends on the flags, etc, but it's the best we can do.
1302 1321 '''
1303 1322 global _re2
1304 1323 if _re2 is None:
1305 1324 self._checkre2()
1306 1325 if _re2:
1307 1326 return re2.escape
1308 1327 else:
1309 1328 return remod.escape
1310 1329
1311 1330 re = _re()
1312 1331
1313 1332 _fspathcache = {}
1314 1333 def fspath(name, root):
1315 1334 '''Get name in the case stored in the filesystem
1316 1335
1317 1336 The name should be relative to root, and be normcase-ed for efficiency.
1318 1337
1319 1338 Note that this function is unnecessary, and should not be
1320 1339 called, for case-sensitive filesystems (simply because it's expensive).
1321 1340
1322 1341 The root should be normcase-ed, too.
1323 1342 '''
1324 1343 def _makefspathcacheentry(dir):
1325 1344 return dict((normcase(n), n) for n in os.listdir(dir))
1326 1345
1327 1346 seps = pycompat.ossep
1328 1347 if pycompat.osaltsep:
1329 1348 seps = seps + pycompat.osaltsep
1330 1349 # Protect backslashes. This gets silly very quickly.
1331 1350 seps.replace('\\','\\\\')
1332 1351 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1333 1352 dir = os.path.normpath(root)
1334 1353 result = []
1335 1354 for part, sep in pattern.findall(name):
1336 1355 if sep:
1337 1356 result.append(sep)
1338 1357 continue
1339 1358
1340 1359 if dir not in _fspathcache:
1341 1360 _fspathcache[dir] = _makefspathcacheentry(dir)
1342 1361 contents = _fspathcache[dir]
1343 1362
1344 1363 found = contents.get(part)
1345 1364 if not found:
1346 1365 # retry "once per directory" per "dirstate.walk" which
1347 1366 # may take place for each patches of "hg qpush", for example
1348 1367 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1349 1368 found = contents.get(part)
1350 1369
1351 1370 result.append(found or part)
1352 1371 dir = os.path.join(dir, part)
1353 1372
1354 1373 return ''.join(result)
1355 1374
1356 1375 def checknlink(testfile):
1357 1376 '''check whether hardlink count reporting works properly'''
1358 1377
1359 1378 # testfile may be open, so we need a separate file for checking to
1360 1379 # work around issue2543 (or testfile may get lost on Samba shares)
1361 1380 f1 = testfile + ".hgtmp1"
1362 1381 if os.path.lexists(f1):
1363 1382 return False
1364 1383 try:
1365 1384 posixfile(f1, 'w').close()
1366 1385 except IOError:
1367 1386 try:
1368 1387 os.unlink(f1)
1369 1388 except OSError:
1370 1389 pass
1371 1390 return False
1372 1391
1373 1392 f2 = testfile + ".hgtmp2"
1374 1393 fd = None
1375 1394 try:
1376 1395 oslink(f1, f2)
1377 1396 # nlinks() may behave differently for files on Windows shares if
1378 1397 # the file is open.
1379 1398 fd = posixfile(f2)
1380 1399 return nlinks(f2) > 1
1381 1400 except OSError:
1382 1401 return False
1383 1402 finally:
1384 1403 if fd is not None:
1385 1404 fd.close()
1386 1405 for f in (f1, f2):
1387 1406 try:
1388 1407 os.unlink(f)
1389 1408 except OSError:
1390 1409 pass
1391 1410
1392 1411 def endswithsep(path):
1393 1412 '''Check path ends with os.sep or os.altsep.'''
1394 1413 return (path.endswith(pycompat.ossep)
1395 1414 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1396 1415
1397 1416 def splitpath(path):
1398 1417 '''Split path by os.sep.
1399 1418 Note that this function does not use os.altsep because this is
1400 1419 an alternative of simple "xxx.split(os.sep)".
1401 1420 It is recommended to use os.path.normpath() before using this
1402 1421 function if need.'''
1403 1422 return path.split(pycompat.ossep)
1404 1423
1405 1424 def gui():
1406 1425 '''Are we running in a GUI?'''
1407 1426 if pycompat.sysplatform == 'darwin':
1408 1427 if 'SSH_CONNECTION' in encoding.environ:
1409 1428 # handle SSH access to a box where the user is logged in
1410 1429 return False
1411 1430 elif getattr(osutil, 'isgui', None):
1412 1431 # check if a CoreGraphics session is available
1413 1432 return osutil.isgui()
1414 1433 else:
1415 1434 # pure build; use a safe default
1416 1435 return True
1417 1436 else:
1418 1437 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1419 1438
1420 1439 def mktempcopy(name, emptyok=False, createmode=None):
1421 1440 """Create a temporary file with the same contents from name
1422 1441
1423 1442 The permission bits are copied from the original file.
1424 1443
1425 1444 If the temporary file is going to be truncated immediately, you
1426 1445 can use emptyok=True as an optimization.
1427 1446
1428 1447 Returns the name of the temporary file.
1429 1448 """
1430 1449 d, fn = os.path.split(name)
1431 1450 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1432 1451 os.close(fd)
1433 1452 # Temporary files are created with mode 0600, which is usually not
1434 1453 # what we want. If the original file already exists, just copy
1435 1454 # its mode. Otherwise, manually obey umask.
1436 1455 copymode(name, temp, createmode)
1437 1456 if emptyok:
1438 1457 return temp
1439 1458 try:
1440 1459 try:
1441 1460 ifp = posixfile(name, "rb")
1442 1461 except IOError as inst:
1443 1462 if inst.errno == errno.ENOENT:
1444 1463 return temp
1445 1464 if not getattr(inst, 'filename', None):
1446 1465 inst.filename = name
1447 1466 raise
1448 1467 ofp = posixfile(temp, "wb")
1449 1468 for chunk in filechunkiter(ifp):
1450 1469 ofp.write(chunk)
1451 1470 ifp.close()
1452 1471 ofp.close()
1453 1472 except: # re-raises
1454 1473 try: os.unlink(temp)
1455 1474 except OSError: pass
1456 1475 raise
1457 1476 return temp
1458 1477
1459 1478 class filestat(object):
1460 1479 """help to exactly detect change of a file
1461 1480
1462 1481 'stat' attribute is result of 'os.stat()' if specified 'path'
1463 1482 exists. Otherwise, it is None. This can avoid preparative
1464 1483 'exists()' examination on client side of this class.
1465 1484 """
1466 1485 def __init__(self, path):
1467 1486 try:
1468 1487 self.stat = os.stat(path)
1469 1488 except OSError as err:
1470 1489 if err.errno != errno.ENOENT:
1471 1490 raise
1472 1491 self.stat = None
1473 1492
1474 1493 __hash__ = object.__hash__
1475 1494
1476 1495 def __eq__(self, old):
1477 1496 try:
1478 1497 # if ambiguity between stat of new and old file is
1479 1498 # avoided, comparison of size, ctime and mtime is enough
1480 1499 # to exactly detect change of a file regardless of platform
1481 1500 return (self.stat.st_size == old.stat.st_size and
1482 1501 self.stat.st_ctime == old.stat.st_ctime and
1483 1502 self.stat.st_mtime == old.stat.st_mtime)
1484 1503 except AttributeError:
1485 1504 return False
1486 1505
1487 1506 def isambig(self, old):
1488 1507 """Examine whether new (= self) stat is ambiguous against old one
1489 1508
1490 1509 "S[N]" below means stat of a file at N-th change:
1491 1510
1492 1511 - S[n-1].ctime < S[n].ctime: can detect change of a file
1493 1512 - S[n-1].ctime == S[n].ctime
1494 1513 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1495 1514 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1496 1515 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1497 1516 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1498 1517
1499 1518 Case (*2) above means that a file was changed twice or more at
1500 1519 same time in sec (= S[n-1].ctime), and comparison of timestamp
1501 1520 is ambiguous.
1502 1521
1503 1522 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1504 1523 timestamp is ambiguous".
1505 1524
1506 1525 But advancing mtime only in case (*2) doesn't work as
1507 1526 expected, because naturally advanced S[n].mtime in case (*1)
1508 1527 might be equal to manually advanced S[n-1 or earlier].mtime.
1509 1528
1510 1529 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1511 1530 treated as ambiguous regardless of mtime, to avoid overlooking
1512 1531 by confliction between such mtime.
1513 1532
1514 1533 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1515 1534 S[n].mtime", even if size of a file isn't changed.
1516 1535 """
1517 1536 try:
1518 1537 return (self.stat.st_ctime == old.stat.st_ctime)
1519 1538 except AttributeError:
1520 1539 return False
1521 1540
1522 1541 def avoidambig(self, path, old):
1523 1542 """Change file stat of specified path to avoid ambiguity
1524 1543
1525 1544 'old' should be previous filestat of 'path'.
1526 1545
1527 1546 This skips avoiding ambiguity, if a process doesn't have
1528 1547 appropriate privileges for 'path'.
1529 1548 """
1530 1549 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1531 1550 try:
1532 1551 os.utime(path, (advanced, advanced))
1533 1552 except OSError as inst:
1534 1553 if inst.errno == errno.EPERM:
1535 1554 # utime() on the file created by another user causes EPERM,
1536 1555 # if a process doesn't have appropriate privileges
1537 1556 return
1538 1557 raise
1539 1558
1540 1559 def __ne__(self, other):
1541 1560 return not self == other
1542 1561
1543 1562 class atomictempfile(object):
1544 1563 '''writable file object that atomically updates a file
1545 1564
1546 1565 All writes will go to a temporary copy of the original file. Call
1547 1566 close() when you are done writing, and atomictempfile will rename
1548 1567 the temporary copy to the original name, making the changes
1549 1568 visible. If the object is destroyed without being closed, all your
1550 1569 writes are discarded.
1551 1570
1552 1571 checkambig argument of constructor is used with filestat, and is
1553 1572 useful only if target file is guarded by any lock (e.g. repo.lock
1554 1573 or repo.wlock).
1555 1574 '''
1556 1575 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1557 1576 self.__name = name # permanent name
1558 1577 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1559 1578 createmode=createmode)
1560 1579 self._fp = posixfile(self._tempname, mode)
1561 1580 self._checkambig = checkambig
1562 1581
1563 1582 # delegated methods
1564 1583 self.read = self._fp.read
1565 1584 self.write = self._fp.write
1566 1585 self.seek = self._fp.seek
1567 1586 self.tell = self._fp.tell
1568 1587 self.fileno = self._fp.fileno
1569 1588
1570 1589 def close(self):
1571 1590 if not self._fp.closed:
1572 1591 self._fp.close()
1573 1592 filename = localpath(self.__name)
1574 1593 oldstat = self._checkambig and filestat(filename)
1575 1594 if oldstat and oldstat.stat:
1576 1595 rename(self._tempname, filename)
1577 1596 newstat = filestat(filename)
1578 1597 if newstat.isambig(oldstat):
1579 1598 # stat of changed file is ambiguous to original one
1580 1599 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1581 1600 os.utime(filename, (advanced, advanced))
1582 1601 else:
1583 1602 rename(self._tempname, filename)
1584 1603
1585 1604 def discard(self):
1586 1605 if not self._fp.closed:
1587 1606 try:
1588 1607 os.unlink(self._tempname)
1589 1608 except OSError:
1590 1609 pass
1591 1610 self._fp.close()
1592 1611
1593 1612 def __del__(self):
1594 1613 if safehasattr(self, '_fp'): # constructor actually did something
1595 1614 self.discard()
1596 1615
1597 1616 def __enter__(self):
1598 1617 return self
1599 1618
1600 1619 def __exit__(self, exctype, excvalue, traceback):
1601 1620 if exctype is not None:
1602 1621 self.discard()
1603 1622 else:
1604 1623 self.close()
1605 1624
1606 1625 def unlinkpath(f, ignoremissing=False):
1607 1626 """unlink and remove the directory if it is empty"""
1608 1627 if ignoremissing:
1609 1628 tryunlink(f)
1610 1629 else:
1611 1630 unlink(f)
1612 1631 # try removing directories that might now be empty
1613 1632 try:
1614 1633 removedirs(os.path.dirname(f))
1615 1634 except OSError:
1616 1635 pass
1617 1636
1618 1637 def tryunlink(f):
1619 1638 """Attempt to remove a file, ignoring ENOENT errors."""
1620 1639 try:
1621 1640 unlink(f)
1622 1641 except OSError as e:
1623 1642 if e.errno != errno.ENOENT:
1624 1643 raise
1625 1644
1626 1645 def makedirs(name, mode=None, notindexed=False):
1627 1646 """recursive directory creation with parent mode inheritance
1628 1647
1629 1648 Newly created directories are marked as "not to be indexed by
1630 1649 the content indexing service", if ``notindexed`` is specified
1631 1650 for "write" mode access.
1632 1651 """
1633 1652 try:
1634 1653 makedir(name, notindexed)
1635 1654 except OSError as err:
1636 1655 if err.errno == errno.EEXIST:
1637 1656 return
1638 1657 if err.errno != errno.ENOENT or not name:
1639 1658 raise
1640 1659 parent = os.path.dirname(os.path.abspath(name))
1641 1660 if parent == name:
1642 1661 raise
1643 1662 makedirs(parent, mode, notindexed)
1644 1663 try:
1645 1664 makedir(name, notindexed)
1646 1665 except OSError as err:
1647 1666 # Catch EEXIST to handle races
1648 1667 if err.errno == errno.EEXIST:
1649 1668 return
1650 1669 raise
1651 1670 if mode is not None:
1652 1671 os.chmod(name, mode)
1653 1672
1654 1673 def readfile(path):
1655 1674 with open(path, 'rb') as fp:
1656 1675 return fp.read()
1657 1676
1658 1677 def writefile(path, text):
1659 1678 with open(path, 'wb') as fp:
1660 1679 fp.write(text)
1661 1680
1662 1681 def appendfile(path, text):
1663 1682 with open(path, 'ab') as fp:
1664 1683 fp.write(text)
1665 1684
1666 1685 class chunkbuffer(object):
1667 1686 """Allow arbitrary sized chunks of data to be efficiently read from an
1668 1687 iterator over chunks of arbitrary size."""
1669 1688
1670 1689 def __init__(self, in_iter):
1671 1690 """in_iter is the iterator that's iterating over the input chunks.
1672 1691 targetsize is how big a buffer to try to maintain."""
1673 1692 def splitbig(chunks):
1674 1693 for chunk in chunks:
1675 1694 if len(chunk) > 2**20:
1676 1695 pos = 0
1677 1696 while pos < len(chunk):
1678 1697 end = pos + 2 ** 18
1679 1698 yield chunk[pos:end]
1680 1699 pos = end
1681 1700 else:
1682 1701 yield chunk
1683 1702 self.iter = splitbig(in_iter)
1684 1703 self._queue = collections.deque()
1685 1704 self._chunkoffset = 0
1686 1705
1687 1706 def read(self, l=None):
1688 1707 """Read L bytes of data from the iterator of chunks of data.
1689 1708 Returns less than L bytes if the iterator runs dry.
1690 1709
1691 1710 If size parameter is omitted, read everything"""
1692 1711 if l is None:
1693 1712 return ''.join(self.iter)
1694 1713
1695 1714 left = l
1696 1715 buf = []
1697 1716 queue = self._queue
1698 1717 while left > 0:
1699 1718 # refill the queue
1700 1719 if not queue:
1701 1720 target = 2**18
1702 1721 for chunk in self.iter:
1703 1722 queue.append(chunk)
1704 1723 target -= len(chunk)
1705 1724 if target <= 0:
1706 1725 break
1707 1726 if not queue:
1708 1727 break
1709 1728
1710 1729 # The easy way to do this would be to queue.popleft(), modify the
1711 1730 # chunk (if necessary), then queue.appendleft(). However, for cases
1712 1731 # where we read partial chunk content, this incurs 2 dequeue
1713 1732 # mutations and creates a new str for the remaining chunk in the
1714 1733 # queue. Our code below avoids this overhead.
1715 1734
1716 1735 chunk = queue[0]
1717 1736 chunkl = len(chunk)
1718 1737 offset = self._chunkoffset
1719 1738
1720 1739 # Use full chunk.
1721 1740 if offset == 0 and left >= chunkl:
1722 1741 left -= chunkl
1723 1742 queue.popleft()
1724 1743 buf.append(chunk)
1725 1744 # self._chunkoffset remains at 0.
1726 1745 continue
1727 1746
1728 1747 chunkremaining = chunkl - offset
1729 1748
1730 1749 # Use all of unconsumed part of chunk.
1731 1750 if left >= chunkremaining:
1732 1751 left -= chunkremaining
1733 1752 queue.popleft()
1734 1753 # offset == 0 is enabled by block above, so this won't merely
1735 1754 # copy via ``chunk[0:]``.
1736 1755 buf.append(chunk[offset:])
1737 1756 self._chunkoffset = 0
1738 1757
1739 1758 # Partial chunk needed.
1740 1759 else:
1741 1760 buf.append(chunk[offset:offset + left])
1742 1761 self._chunkoffset += left
1743 1762 left -= chunkremaining
1744 1763
1745 1764 return ''.join(buf)
1746 1765
1747 1766 def filechunkiter(f, size=131072, limit=None):
1748 1767 """Create a generator that produces the data in the file size
1749 1768 (default 131072) bytes at a time, up to optional limit (default is
1750 1769 to read all data). Chunks may be less than size bytes if the
1751 1770 chunk is the last chunk in the file, or the file is a socket or
1752 1771 some other type of file that sometimes reads less data than is
1753 1772 requested."""
1754 1773 assert size >= 0
1755 1774 assert limit is None or limit >= 0
1756 1775 while True:
1757 1776 if limit is None:
1758 1777 nbytes = size
1759 1778 else:
1760 1779 nbytes = min(limit, size)
1761 1780 s = nbytes and f.read(nbytes)
1762 1781 if not s:
1763 1782 break
1764 1783 if limit:
1765 1784 limit -= len(s)
1766 1785 yield s
1767 1786
1768 1787 def makedate(timestamp=None):
1769 1788 '''Return a unix timestamp (or the current time) as a (unixtime,
1770 1789 offset) tuple based off the local timezone.'''
1771 1790 if timestamp is None:
1772 1791 timestamp = time.time()
1773 1792 if timestamp < 0:
1774 1793 hint = _("check your clock")
1775 1794 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1776 1795 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1777 1796 datetime.datetime.fromtimestamp(timestamp))
1778 1797 tz = delta.days * 86400 + delta.seconds
1779 1798 return timestamp, tz
1780 1799
1781 1800 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1782 1801 """represent a (unixtime, offset) tuple as a localized time.
1783 1802 unixtime is seconds since the epoch, and offset is the time zone's
1784 1803 number of seconds away from UTC.
1785 1804
1786 1805 >>> datestr((0, 0))
1787 1806 'Thu Jan 01 00:00:00 1970 +0000'
1788 1807 >>> datestr((42, 0))
1789 1808 'Thu Jan 01 00:00:42 1970 +0000'
1790 1809 >>> datestr((-42, 0))
1791 1810 'Wed Dec 31 23:59:18 1969 +0000'
1792 1811 >>> datestr((0x7fffffff, 0))
1793 1812 'Tue Jan 19 03:14:07 2038 +0000'
1794 1813 >>> datestr((-0x80000000, 0))
1795 1814 'Fri Dec 13 20:45:52 1901 +0000'
1796 1815 """
1797 1816 t, tz = date or makedate()
1798 1817 if "%1" in format or "%2" in format or "%z" in format:
1799 1818 sign = (tz > 0) and "-" or "+"
1800 1819 minutes = abs(tz) // 60
1801 1820 q, r = divmod(minutes, 60)
1802 1821 format = format.replace("%z", "%1%2")
1803 1822 format = format.replace("%1", "%c%02d" % (sign, q))
1804 1823 format = format.replace("%2", "%02d" % r)
1805 1824 d = t - tz
1806 1825 if d > 0x7fffffff:
1807 1826 d = 0x7fffffff
1808 1827 elif d < -0x80000000:
1809 1828 d = -0x80000000
1810 1829 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1811 1830 # because they use the gmtime() system call which is buggy on Windows
1812 1831 # for negative values.
1813 1832 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1814 1833 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1815 1834 return s
1816 1835
1817 1836 def shortdate(date=None):
1818 1837 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1819 1838 return datestr(date, format='%Y-%m-%d')
1820 1839
1821 1840 def parsetimezone(s):
1822 1841 """find a trailing timezone, if any, in string, and return a
1823 1842 (offset, remainder) pair"""
1824 1843
1825 1844 if s.endswith("GMT") or s.endswith("UTC"):
1826 1845 return 0, s[:-3].rstrip()
1827 1846
1828 1847 # Unix-style timezones [+-]hhmm
1829 1848 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1830 1849 sign = (s[-5] == "+") and 1 or -1
1831 1850 hours = int(s[-4:-2])
1832 1851 minutes = int(s[-2:])
1833 1852 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1834 1853
1835 1854 # ISO8601 trailing Z
1836 1855 if s.endswith("Z") and s[-2:-1].isdigit():
1837 1856 return 0, s[:-1]
1838 1857
1839 1858 # ISO8601-style [+-]hh:mm
1840 1859 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1841 1860 s[-5:-3].isdigit() and s[-2:].isdigit()):
1842 1861 sign = (s[-6] == "+") and 1 or -1
1843 1862 hours = int(s[-5:-3])
1844 1863 minutes = int(s[-2:])
1845 1864 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1846 1865
1847 1866 return None, s
1848 1867
1849 1868 def strdate(string, format, defaults=None):
1850 1869 """parse a localized time string and return a (unixtime, offset) tuple.
1851 1870 if the string cannot be parsed, ValueError is raised."""
1852 1871 if defaults is None:
1853 1872 defaults = {}
1854 1873
1855 1874 # NOTE: unixtime = localunixtime + offset
1856 1875 offset, date = parsetimezone(string)
1857 1876
1858 1877 # add missing elements from defaults
1859 1878 usenow = False # default to using biased defaults
1860 1879 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1861 1880 found = [True for p in part if ("%"+p) in format]
1862 1881 if not found:
1863 1882 date += "@" + defaults[part][usenow]
1864 1883 format += "@%" + part[0]
1865 1884 else:
1866 1885 # We've found a specific time element, less specific time
1867 1886 # elements are relative to today
1868 1887 usenow = True
1869 1888
1870 1889 timetuple = time.strptime(date, format)
1871 1890 localunixtime = int(calendar.timegm(timetuple))
1872 1891 if offset is None:
1873 1892 # local timezone
1874 1893 unixtime = int(time.mktime(timetuple))
1875 1894 offset = unixtime - localunixtime
1876 1895 else:
1877 1896 unixtime = localunixtime + offset
1878 1897 return unixtime, offset
1879 1898
1880 1899 def parsedate(date, formats=None, bias=None):
1881 1900 """parse a localized date/time and return a (unixtime, offset) tuple.
1882 1901
1883 1902 The date may be a "unixtime offset" string or in one of the specified
1884 1903 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1885 1904
1886 1905 >>> parsedate(' today ') == parsedate(\
1887 1906 datetime.date.today().strftime('%b %d'))
1888 1907 True
1889 1908 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1890 1909 datetime.timedelta(days=1)\
1891 1910 ).strftime('%b %d'))
1892 1911 True
1893 1912 >>> now, tz = makedate()
1894 1913 >>> strnow, strtz = parsedate('now')
1895 1914 >>> (strnow - now) < 1
1896 1915 True
1897 1916 >>> tz == strtz
1898 1917 True
1899 1918 """
1900 1919 if bias is None:
1901 1920 bias = {}
1902 1921 if not date:
1903 1922 return 0, 0
1904 1923 if isinstance(date, tuple) and len(date) == 2:
1905 1924 return date
1906 1925 if not formats:
1907 1926 formats = defaultdateformats
1908 1927 date = date.strip()
1909 1928
1910 1929 if date == 'now' or date == _('now'):
1911 1930 return makedate()
1912 1931 if date == 'today' or date == _('today'):
1913 1932 date = datetime.date.today().strftime('%b %d')
1914 1933 elif date == 'yesterday' or date == _('yesterday'):
1915 1934 date = (datetime.date.today() -
1916 1935 datetime.timedelta(days=1)).strftime('%b %d')
1917 1936
1918 1937 try:
1919 1938 when, offset = map(int, date.split(' '))
1920 1939 except ValueError:
1921 1940 # fill out defaults
1922 1941 now = makedate()
1923 1942 defaults = {}
1924 1943 for part in ("d", "mb", "yY", "HI", "M", "S"):
1925 1944 # this piece is for rounding the specific end of unknowns
1926 1945 b = bias.get(part)
1927 1946 if b is None:
1928 1947 if part[0] in "HMS":
1929 1948 b = "00"
1930 1949 else:
1931 1950 b = "0"
1932 1951
1933 1952 # this piece is for matching the generic end to today's date
1934 1953 n = datestr(now, "%" + part[0])
1935 1954
1936 1955 defaults[part] = (b, n)
1937 1956
1938 1957 for format in formats:
1939 1958 try:
1940 1959 when, offset = strdate(date, format, defaults)
1941 1960 except (ValueError, OverflowError):
1942 1961 pass
1943 1962 else:
1944 1963 break
1945 1964 else:
1946 1965 raise Abort(_('invalid date: %r') % date)
1947 1966 # validate explicit (probably user-specified) date and
1948 1967 # time zone offset. values must fit in signed 32 bits for
1949 1968 # current 32-bit linux runtimes. timezones go from UTC-12
1950 1969 # to UTC+14
1951 1970 if when < -0x80000000 or when > 0x7fffffff:
1952 1971 raise Abort(_('date exceeds 32 bits: %d') % when)
1953 1972 if offset < -50400 or offset > 43200:
1954 1973 raise Abort(_('impossible time zone offset: %d') % offset)
1955 1974 return when, offset
1956 1975
1957 1976 def matchdate(date):
1958 1977 """Return a function that matches a given date match specifier
1959 1978
1960 1979 Formats include:
1961 1980
1962 1981 '{date}' match a given date to the accuracy provided
1963 1982
1964 1983 '<{date}' on or before a given date
1965 1984
1966 1985 '>{date}' on or after a given date
1967 1986
1968 1987 >>> p1 = parsedate("10:29:59")
1969 1988 >>> p2 = parsedate("10:30:00")
1970 1989 >>> p3 = parsedate("10:30:59")
1971 1990 >>> p4 = parsedate("10:31:00")
1972 1991 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1973 1992 >>> f = matchdate("10:30")
1974 1993 >>> f(p1[0])
1975 1994 False
1976 1995 >>> f(p2[0])
1977 1996 True
1978 1997 >>> f(p3[0])
1979 1998 True
1980 1999 >>> f(p4[0])
1981 2000 False
1982 2001 >>> f(p5[0])
1983 2002 False
1984 2003 """
1985 2004
1986 2005 def lower(date):
1987 2006 d = {'mb': "1", 'd': "1"}
1988 2007 return parsedate(date, extendeddateformats, d)[0]
1989 2008
1990 2009 def upper(date):
1991 2010 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1992 2011 for days in ("31", "30", "29"):
1993 2012 try:
1994 2013 d["d"] = days
1995 2014 return parsedate(date, extendeddateformats, d)[0]
1996 2015 except Abort:
1997 2016 pass
1998 2017 d["d"] = "28"
1999 2018 return parsedate(date, extendeddateformats, d)[0]
2000 2019
2001 2020 date = date.strip()
2002 2021
2003 2022 if not date:
2004 2023 raise Abort(_("dates cannot consist entirely of whitespace"))
2005 2024 elif date[0] == "<":
2006 2025 if not date[1:]:
2007 2026 raise Abort(_("invalid day spec, use '<DATE'"))
2008 2027 when = upper(date[1:])
2009 2028 return lambda x: x <= when
2010 2029 elif date[0] == ">":
2011 2030 if not date[1:]:
2012 2031 raise Abort(_("invalid day spec, use '>DATE'"))
2013 2032 when = lower(date[1:])
2014 2033 return lambda x: x >= when
2015 2034 elif date[0] == "-":
2016 2035 try:
2017 2036 days = int(date[1:])
2018 2037 except ValueError:
2019 2038 raise Abort(_("invalid day spec: %s") % date[1:])
2020 2039 if days < 0:
2021 2040 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2022 2041 % date[1:])
2023 2042 when = makedate()[0] - days * 3600 * 24
2024 2043 return lambda x: x >= when
2025 2044 elif " to " in date:
2026 2045 a, b = date.split(" to ")
2027 2046 start, stop = lower(a), upper(b)
2028 2047 return lambda x: x >= start and x <= stop
2029 2048 else:
2030 2049 start, stop = lower(date), upper(date)
2031 2050 return lambda x: x >= start and x <= stop
2032 2051
2033 2052 def stringmatcher(pattern, casesensitive=True):
2034 2053 """
2035 2054 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2036 2055 returns the matcher name, pattern, and matcher function.
2037 2056 missing or unknown prefixes are treated as literal matches.
2038 2057
2039 2058 helper for tests:
2040 2059 >>> def test(pattern, *tests):
2041 2060 ... kind, pattern, matcher = stringmatcher(pattern)
2042 2061 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2043 2062 >>> def itest(pattern, *tests):
2044 2063 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2045 2064 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2046 2065
2047 2066 exact matching (no prefix):
2048 2067 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2049 2068 ('literal', 'abcdefg', [False, False, True])
2050 2069
2051 2070 regex matching ('re:' prefix)
2052 2071 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2053 2072 ('re', 'a.+b', [False, False, True])
2054 2073
2055 2074 force exact matches ('literal:' prefix)
2056 2075 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2057 2076 ('literal', 're:foobar', [False, True])
2058 2077
2059 2078 unknown prefixes are ignored and treated as literals
2060 2079 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2061 2080 ('literal', 'foo:bar', [False, False, True])
2062 2081
2063 2082 case insensitive regex matches
2064 2083 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2065 2084 ('re', 'A.+b', [False, False, True])
2066 2085
2067 2086 case insensitive literal matches
2068 2087 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2069 2088 ('literal', 'ABCDEFG', [False, False, True])
2070 2089 """
2071 2090 if pattern.startswith('re:'):
2072 2091 pattern = pattern[3:]
2073 2092 try:
2074 2093 flags = 0
2075 2094 if not casesensitive:
2076 2095 flags = remod.I
2077 2096 regex = remod.compile(pattern, flags)
2078 2097 except remod.error as e:
2079 2098 raise error.ParseError(_('invalid regular expression: %s')
2080 2099 % e)
2081 2100 return 're', pattern, regex.search
2082 2101 elif pattern.startswith('literal:'):
2083 2102 pattern = pattern[8:]
2084 2103
2085 2104 match = pattern.__eq__
2086 2105
2087 2106 if not casesensitive:
2088 2107 ipat = encoding.lower(pattern)
2089 2108 match = lambda s: ipat == encoding.lower(s)
2090 2109 return 'literal', pattern, match
2091 2110
2092 2111 def shortuser(user):
2093 2112 """Return a short representation of a user name or email address."""
2094 2113 f = user.find('@')
2095 2114 if f >= 0:
2096 2115 user = user[:f]
2097 2116 f = user.find('<')
2098 2117 if f >= 0:
2099 2118 user = user[f + 1:]
2100 2119 f = user.find(' ')
2101 2120 if f >= 0:
2102 2121 user = user[:f]
2103 2122 f = user.find('.')
2104 2123 if f >= 0:
2105 2124 user = user[:f]
2106 2125 return user
2107 2126
2108 2127 def emailuser(user):
2109 2128 """Return the user portion of an email address."""
2110 2129 f = user.find('@')
2111 2130 if f >= 0:
2112 2131 user = user[:f]
2113 2132 f = user.find('<')
2114 2133 if f >= 0:
2115 2134 user = user[f + 1:]
2116 2135 return user
2117 2136
2118 2137 def email(author):
2119 2138 '''get email of author.'''
2120 2139 r = author.find('>')
2121 2140 if r == -1:
2122 2141 r = None
2123 2142 return author[author.find('<') + 1:r]
2124 2143
2125 2144 def ellipsis(text, maxlength=400):
2126 2145 """Trim string to at most maxlength (default: 400) columns in display."""
2127 2146 return encoding.trim(text, maxlength, ellipsis='...')
2128 2147
2129 2148 def unitcountfn(*unittable):
2130 2149 '''return a function that renders a readable count of some quantity'''
2131 2150
2132 2151 def go(count):
2133 2152 for multiplier, divisor, format in unittable:
2134 2153 if count >= divisor * multiplier:
2135 2154 return format % (count / float(divisor))
2136 2155 return unittable[-1][2] % count
2137 2156
2138 2157 return go
2139 2158
2140 2159 bytecount = unitcountfn(
2141 2160 (100, 1 << 30, _('%.0f GB')),
2142 2161 (10, 1 << 30, _('%.1f GB')),
2143 2162 (1, 1 << 30, _('%.2f GB')),
2144 2163 (100, 1 << 20, _('%.0f MB')),
2145 2164 (10, 1 << 20, _('%.1f MB')),
2146 2165 (1, 1 << 20, _('%.2f MB')),
2147 2166 (100, 1 << 10, _('%.0f KB')),
2148 2167 (10, 1 << 10, _('%.1f KB')),
2149 2168 (1, 1 << 10, _('%.2f KB')),
2150 2169 (1, 1, _('%.0f bytes')),
2151 2170 )
2152 2171
2153 2172 def escapestr(s):
2154 2173 # call underlying function of s.encode('string_escape') directly for
2155 2174 # Python 3 compatibility
2156 2175 return codecs.escape_encode(s)[0]
2157 2176
2158 2177 def unescapestr(s):
2159 2178 return codecs.escape_decode(s)[0]
2160 2179
2161 2180 def uirepr(s):
2162 2181 # Avoid double backslash in Windows path repr()
2163 2182 return repr(s).replace('\\\\', '\\')
2164 2183
2165 2184 # delay import of textwrap
2166 2185 def MBTextWrapper(**kwargs):
2167 2186 class tw(textwrap.TextWrapper):
2168 2187 """
2169 2188 Extend TextWrapper for width-awareness.
2170 2189
2171 2190 Neither number of 'bytes' in any encoding nor 'characters' is
2172 2191 appropriate to calculate terminal columns for specified string.
2173 2192
2174 2193 Original TextWrapper implementation uses built-in 'len()' directly,
2175 2194 so overriding is needed to use width information of each characters.
2176 2195
2177 2196 In addition, characters classified into 'ambiguous' width are
2178 2197 treated as wide in East Asian area, but as narrow in other.
2179 2198
2180 2199 This requires use decision to determine width of such characters.
2181 2200 """
2182 2201 def _cutdown(self, ucstr, space_left):
2183 2202 l = 0
2184 2203 colwidth = encoding.ucolwidth
2185 2204 for i in xrange(len(ucstr)):
2186 2205 l += colwidth(ucstr[i])
2187 2206 if space_left < l:
2188 2207 return (ucstr[:i], ucstr[i:])
2189 2208 return ucstr, ''
2190 2209
2191 2210 # overriding of base class
2192 2211 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2193 2212 space_left = max(width - cur_len, 1)
2194 2213
2195 2214 if self.break_long_words:
2196 2215 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2197 2216 cur_line.append(cut)
2198 2217 reversed_chunks[-1] = res
2199 2218 elif not cur_line:
2200 2219 cur_line.append(reversed_chunks.pop())
2201 2220
2202 2221 # this overriding code is imported from TextWrapper of Python 2.6
2203 2222 # to calculate columns of string by 'encoding.ucolwidth()'
2204 2223 def _wrap_chunks(self, chunks):
2205 2224 colwidth = encoding.ucolwidth
2206 2225
2207 2226 lines = []
2208 2227 if self.width <= 0:
2209 2228 raise ValueError("invalid width %r (must be > 0)" % self.width)
2210 2229
2211 2230 # Arrange in reverse order so items can be efficiently popped
2212 2231 # from a stack of chucks.
2213 2232 chunks.reverse()
2214 2233
2215 2234 while chunks:
2216 2235
2217 2236 # Start the list of chunks that will make up the current line.
2218 2237 # cur_len is just the length of all the chunks in cur_line.
2219 2238 cur_line = []
2220 2239 cur_len = 0
2221 2240
2222 2241 # Figure out which static string will prefix this line.
2223 2242 if lines:
2224 2243 indent = self.subsequent_indent
2225 2244 else:
2226 2245 indent = self.initial_indent
2227 2246
2228 2247 # Maximum width for this line.
2229 2248 width = self.width - len(indent)
2230 2249
2231 2250 # First chunk on line is whitespace -- drop it, unless this
2232 2251 # is the very beginning of the text (i.e. no lines started yet).
2233 2252 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2234 2253 del chunks[-1]
2235 2254
2236 2255 while chunks:
2237 2256 l = colwidth(chunks[-1])
2238 2257
2239 2258 # Can at least squeeze this chunk onto the current line.
2240 2259 if cur_len + l <= width:
2241 2260 cur_line.append(chunks.pop())
2242 2261 cur_len += l
2243 2262
2244 2263 # Nope, this line is full.
2245 2264 else:
2246 2265 break
2247 2266
2248 2267 # The current line is full, and the next chunk is too big to
2249 2268 # fit on *any* line (not just this one).
2250 2269 if chunks and colwidth(chunks[-1]) > width:
2251 2270 self._handle_long_word(chunks, cur_line, cur_len, width)
2252 2271
2253 2272 # If the last chunk on this line is all whitespace, drop it.
2254 2273 if (self.drop_whitespace and
2255 2274 cur_line and cur_line[-1].strip() == ''):
2256 2275 del cur_line[-1]
2257 2276
2258 2277 # Convert current line back to a string and store it in list
2259 2278 # of all lines (return value).
2260 2279 if cur_line:
2261 2280 lines.append(indent + ''.join(cur_line))
2262 2281
2263 2282 return lines
2264 2283
2265 2284 global MBTextWrapper
2266 2285 MBTextWrapper = tw
2267 2286 return tw(**kwargs)
2268 2287
2269 2288 def wrap(line, width, initindent='', hangindent=''):
2270 2289 maxindent = max(len(hangindent), len(initindent))
2271 2290 if width <= maxindent:
2272 2291 # adjust for weird terminal size
2273 2292 width = max(78, maxindent + 1)
2274 2293 line = line.decode(pycompat.sysstr(encoding.encoding),
2275 2294 pycompat.sysstr(encoding.encodingmode))
2276 2295 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2277 2296 pycompat.sysstr(encoding.encodingmode))
2278 2297 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2279 2298 pycompat.sysstr(encoding.encodingmode))
2280 2299 wrapper = MBTextWrapper(width=width,
2281 2300 initial_indent=initindent,
2282 2301 subsequent_indent=hangindent)
2283 2302 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2284 2303
2285 2304 if (pyplatform.python_implementation() == 'CPython' and
2286 2305 sys.version_info < (3, 0)):
2287 2306 # There is an issue in CPython that some IO methods do not handle EINTR
2288 2307 # correctly. The following table shows what CPython version (and functions)
2289 2308 # are affected (buggy: has the EINTR bug, okay: otherwise):
2290 2309 #
2291 2310 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2292 2311 # --------------------------------------------------
2293 2312 # fp.__iter__ | buggy | buggy | okay
2294 2313 # fp.read* | buggy | okay [1] | okay
2295 2314 #
2296 2315 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2297 2316 #
2298 2317 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2299 2318 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2300 2319 #
2301 2320 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2302 2321 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2303 2322 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2304 2323 # fp.__iter__ but not other fp.read* methods.
2305 2324 #
2306 2325 # On modern systems like Linux, the "read" syscall cannot be interrupted
2307 2326 # when reading "fast" files like on-disk files. So the EINTR issue only
2308 2327 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2309 2328 # files approximately as "fast" files and use the fast (unsafe) code path,
2310 2329 # to minimize the performance impact.
2311 2330 if sys.version_info >= (2, 7, 4):
2312 2331 # fp.readline deals with EINTR correctly, use it as a workaround.
2313 2332 def _safeiterfile(fp):
2314 2333 return iter(fp.readline, '')
2315 2334 else:
2316 2335 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2317 2336 # note: this may block longer than necessary because of bufsize.
2318 2337 def _safeiterfile(fp, bufsize=4096):
2319 2338 fd = fp.fileno()
2320 2339 line = ''
2321 2340 while True:
2322 2341 try:
2323 2342 buf = os.read(fd, bufsize)
2324 2343 except OSError as ex:
2325 2344 # os.read only raises EINTR before any data is read
2326 2345 if ex.errno == errno.EINTR:
2327 2346 continue
2328 2347 else:
2329 2348 raise
2330 2349 line += buf
2331 2350 if '\n' in buf:
2332 2351 splitted = line.splitlines(True)
2333 2352 line = ''
2334 2353 for l in splitted:
2335 2354 if l[-1] == '\n':
2336 2355 yield l
2337 2356 else:
2338 2357 line = l
2339 2358 if not buf:
2340 2359 break
2341 2360 if line:
2342 2361 yield line
2343 2362
2344 2363 def iterfile(fp):
2345 2364 fastpath = True
2346 2365 if type(fp) is file:
2347 2366 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2348 2367 if fastpath:
2349 2368 return fp
2350 2369 else:
2351 2370 return _safeiterfile(fp)
2352 2371 else:
2353 2372 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2354 2373 def iterfile(fp):
2355 2374 return fp
2356 2375
2357 2376 def iterlines(iterator):
2358 2377 for chunk in iterator:
2359 2378 for line in chunk.splitlines():
2360 2379 yield line
2361 2380
2362 2381 def expandpath(path):
2363 2382 return os.path.expanduser(os.path.expandvars(path))
2364 2383
2365 2384 def hgcmd():
2366 2385 """Return the command used to execute current hg
2367 2386
2368 2387 This is different from hgexecutable() because on Windows we want
2369 2388 to avoid things opening new shell windows like batch files, so we
2370 2389 get either the python call or current executable.
2371 2390 """
2372 2391 if mainfrozen():
2373 2392 if getattr(sys, 'frozen', None) == 'macosx_app':
2374 2393 # Env variable set by py2app
2375 2394 return [encoding.environ['EXECUTABLEPATH']]
2376 2395 else:
2377 2396 return [pycompat.sysexecutable]
2378 2397 return gethgcmd()
2379 2398
2380 2399 def rundetached(args, condfn):
2381 2400 """Execute the argument list in a detached process.
2382 2401
2383 2402 condfn is a callable which is called repeatedly and should return
2384 2403 True once the child process is known to have started successfully.
2385 2404 At this point, the child process PID is returned. If the child
2386 2405 process fails to start or finishes before condfn() evaluates to
2387 2406 True, return -1.
2388 2407 """
2389 2408 # Windows case is easier because the child process is either
2390 2409 # successfully starting and validating the condition or exiting
2391 2410 # on failure. We just poll on its PID. On Unix, if the child
2392 2411 # process fails to start, it will be left in a zombie state until
2393 2412 # the parent wait on it, which we cannot do since we expect a long
2394 2413 # running process on success. Instead we listen for SIGCHLD telling
2395 2414 # us our child process terminated.
2396 2415 terminated = set()
2397 2416 def handler(signum, frame):
2398 2417 terminated.add(os.wait())
2399 2418 prevhandler = None
2400 2419 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2401 2420 if SIGCHLD is not None:
2402 2421 prevhandler = signal.signal(SIGCHLD, handler)
2403 2422 try:
2404 2423 pid = spawndetached(args)
2405 2424 while not condfn():
2406 2425 if ((pid in terminated or not testpid(pid))
2407 2426 and not condfn()):
2408 2427 return -1
2409 2428 time.sleep(0.1)
2410 2429 return pid
2411 2430 finally:
2412 2431 if prevhandler is not None:
2413 2432 signal.signal(signal.SIGCHLD, prevhandler)
2414 2433
2415 2434 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2416 2435 """Return the result of interpolating items in the mapping into string s.
2417 2436
2418 2437 prefix is a single character string, or a two character string with
2419 2438 a backslash as the first character if the prefix needs to be escaped in
2420 2439 a regular expression.
2421 2440
2422 2441 fn is an optional function that will be applied to the replacement text
2423 2442 just before replacement.
2424 2443
2425 2444 escape_prefix is an optional flag that allows using doubled prefix for
2426 2445 its escaping.
2427 2446 """
2428 2447 fn = fn or (lambda s: s)
2429 2448 patterns = '|'.join(mapping.keys())
2430 2449 if escape_prefix:
2431 2450 patterns += '|' + prefix
2432 2451 if len(prefix) > 1:
2433 2452 prefix_char = prefix[1:]
2434 2453 else:
2435 2454 prefix_char = prefix
2436 2455 mapping[prefix_char] = prefix_char
2437 2456 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2438 2457 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2439 2458
2440 2459 def getport(port):
2441 2460 """Return the port for a given network service.
2442 2461
2443 2462 If port is an integer, it's returned as is. If it's a string, it's
2444 2463 looked up using socket.getservbyname(). If there's no matching
2445 2464 service, error.Abort is raised.
2446 2465 """
2447 2466 try:
2448 2467 return int(port)
2449 2468 except ValueError:
2450 2469 pass
2451 2470
2452 2471 try:
2453 2472 return socket.getservbyname(port)
2454 2473 except socket.error:
2455 2474 raise Abort(_("no port number associated with service '%s'") % port)
2456 2475
2457 2476 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2458 2477 '0': False, 'no': False, 'false': False, 'off': False,
2459 2478 'never': False}
2460 2479
2461 2480 def parsebool(s):
2462 2481 """Parse s into a boolean.
2463 2482
2464 2483 If s is not a valid boolean, returns None.
2465 2484 """
2466 2485 return _booleans.get(s.lower(), None)
2467 2486
2468 2487 _hextochr = dict((a + b, chr(int(a + b, 16)))
2469 2488 for a in string.hexdigits for b in string.hexdigits)
2470 2489
2471 2490 class url(object):
2472 2491 r"""Reliable URL parser.
2473 2492
2474 2493 This parses URLs and provides attributes for the following
2475 2494 components:
2476 2495
2477 2496 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2478 2497
2479 2498 Missing components are set to None. The only exception is
2480 2499 fragment, which is set to '' if present but empty.
2481 2500
2482 2501 If parsefragment is False, fragment is included in query. If
2483 2502 parsequery is False, query is included in path. If both are
2484 2503 False, both fragment and query are included in path.
2485 2504
2486 2505 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2487 2506
2488 2507 Note that for backward compatibility reasons, bundle URLs do not
2489 2508 take host names. That means 'bundle://../' has a path of '../'.
2490 2509
2491 2510 Examples:
2492 2511
2493 2512 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2494 2513 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2495 2514 >>> url('ssh://[::1]:2200//home/joe/repo')
2496 2515 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2497 2516 >>> url('file:///home/joe/repo')
2498 2517 <url scheme: 'file', path: '/home/joe/repo'>
2499 2518 >>> url('file:///c:/temp/foo/')
2500 2519 <url scheme: 'file', path: 'c:/temp/foo/'>
2501 2520 >>> url('bundle:foo')
2502 2521 <url scheme: 'bundle', path: 'foo'>
2503 2522 >>> url('bundle://../foo')
2504 2523 <url scheme: 'bundle', path: '../foo'>
2505 2524 >>> url(r'c:\foo\bar')
2506 2525 <url path: 'c:\\foo\\bar'>
2507 2526 >>> url(r'\\blah\blah\blah')
2508 2527 <url path: '\\\\blah\\blah\\blah'>
2509 2528 >>> url(r'\\blah\blah\blah#baz')
2510 2529 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2511 2530 >>> url(r'file:///C:\users\me')
2512 2531 <url scheme: 'file', path: 'C:\\users\\me'>
2513 2532
2514 2533 Authentication credentials:
2515 2534
2516 2535 >>> url('ssh://joe:xyz@x/repo')
2517 2536 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2518 2537 >>> url('ssh://joe@x/repo')
2519 2538 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2520 2539
2521 2540 Query strings and fragments:
2522 2541
2523 2542 >>> url('http://host/a?b#c')
2524 2543 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2525 2544 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2526 2545 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2527 2546
2528 2547 Empty path:
2529 2548
2530 2549 >>> url('')
2531 2550 <url path: ''>
2532 2551 >>> url('#a')
2533 2552 <url path: '', fragment: 'a'>
2534 2553 >>> url('http://host/')
2535 2554 <url scheme: 'http', host: 'host', path: ''>
2536 2555 >>> url('http://host/#a')
2537 2556 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2538 2557
2539 2558 Only scheme:
2540 2559
2541 2560 >>> url('http:')
2542 2561 <url scheme: 'http'>
2543 2562 """
2544 2563
2545 2564 _safechars = "!~*'()+"
2546 2565 _safepchars = "/!~*'()+:\\"
2547 2566 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2548 2567
2549 2568 def __init__(self, path, parsequery=True, parsefragment=True):
2550 2569 # We slowly chomp away at path until we have only the path left
2551 2570 self.scheme = self.user = self.passwd = self.host = None
2552 2571 self.port = self.path = self.query = self.fragment = None
2553 2572 self._localpath = True
2554 2573 self._hostport = ''
2555 2574 self._origpath = path
2556 2575
2557 2576 if parsefragment and '#' in path:
2558 2577 path, self.fragment = path.split('#', 1)
2559 2578
2560 2579 # special case for Windows drive letters and UNC paths
2561 2580 if hasdriveletter(path) or path.startswith('\\\\'):
2562 2581 self.path = path
2563 2582 return
2564 2583
2565 2584 # For compatibility reasons, we can't handle bundle paths as
2566 2585 # normal URLS
2567 2586 if path.startswith('bundle:'):
2568 2587 self.scheme = 'bundle'
2569 2588 path = path[7:]
2570 2589 if path.startswith('//'):
2571 2590 path = path[2:]
2572 2591 self.path = path
2573 2592 return
2574 2593
2575 2594 if self._matchscheme(path):
2576 2595 parts = path.split(':', 1)
2577 2596 if parts[0]:
2578 2597 self.scheme, path = parts
2579 2598 self._localpath = False
2580 2599
2581 2600 if not path:
2582 2601 path = None
2583 2602 if self._localpath:
2584 2603 self.path = ''
2585 2604 return
2586 2605 else:
2587 2606 if self._localpath:
2588 2607 self.path = path
2589 2608 return
2590 2609
2591 2610 if parsequery and '?' in path:
2592 2611 path, self.query = path.split('?', 1)
2593 2612 if not path:
2594 2613 path = None
2595 2614 if not self.query:
2596 2615 self.query = None
2597 2616
2598 2617 # // is required to specify a host/authority
2599 2618 if path and path.startswith('//'):
2600 2619 parts = path[2:].split('/', 1)
2601 2620 if len(parts) > 1:
2602 2621 self.host, path = parts
2603 2622 else:
2604 2623 self.host = parts[0]
2605 2624 path = None
2606 2625 if not self.host:
2607 2626 self.host = None
2608 2627 # path of file:///d is /d
2609 2628 # path of file:///d:/ is d:/, not /d:/
2610 2629 if path and not hasdriveletter(path):
2611 2630 path = '/' + path
2612 2631
2613 2632 if self.host and '@' in self.host:
2614 2633 self.user, self.host = self.host.rsplit('@', 1)
2615 2634 if ':' in self.user:
2616 2635 self.user, self.passwd = self.user.split(':', 1)
2617 2636 if not self.host:
2618 2637 self.host = None
2619 2638
2620 2639 # Don't split on colons in IPv6 addresses without ports
2621 2640 if (self.host and ':' in self.host and
2622 2641 not (self.host.startswith('[') and self.host.endswith(']'))):
2623 2642 self._hostport = self.host
2624 2643 self.host, self.port = self.host.rsplit(':', 1)
2625 2644 if not self.host:
2626 2645 self.host = None
2627 2646
2628 2647 if (self.host and self.scheme == 'file' and
2629 2648 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2630 2649 raise Abort(_('file:// URLs can only refer to localhost'))
2631 2650
2632 2651 self.path = path
2633 2652
2634 2653 # leave the query string escaped
2635 2654 for a in ('user', 'passwd', 'host', 'port',
2636 2655 'path', 'fragment'):
2637 2656 v = getattr(self, a)
2638 2657 if v is not None:
2639 2658 setattr(self, a, urlreq.unquote(v))
2640 2659
2641 2660 def __repr__(self):
2642 2661 attrs = []
2643 2662 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2644 2663 'query', 'fragment'):
2645 2664 v = getattr(self, a)
2646 2665 if v is not None:
2647 2666 attrs.append('%s: %r' % (a, v))
2648 2667 return '<url %s>' % ', '.join(attrs)
2649 2668
2650 2669 def __str__(self):
2651 2670 r"""Join the URL's components back into a URL string.
2652 2671
2653 2672 Examples:
2654 2673
2655 2674 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2656 2675 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2657 2676 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2658 2677 'http://user:pw@host:80/?foo=bar&baz=42'
2659 2678 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2660 2679 'http://user:pw@host:80/?foo=bar%3dbaz'
2661 2680 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2662 2681 'ssh://user:pw@[::1]:2200//home/joe#'
2663 2682 >>> str(url('http://localhost:80//'))
2664 2683 'http://localhost:80//'
2665 2684 >>> str(url('http://localhost:80/'))
2666 2685 'http://localhost:80/'
2667 2686 >>> str(url('http://localhost:80'))
2668 2687 'http://localhost:80/'
2669 2688 >>> str(url('bundle:foo'))
2670 2689 'bundle:foo'
2671 2690 >>> str(url('bundle://../foo'))
2672 2691 'bundle:../foo'
2673 2692 >>> str(url('path'))
2674 2693 'path'
2675 2694 >>> str(url('file:///tmp/foo/bar'))
2676 2695 'file:///tmp/foo/bar'
2677 2696 >>> str(url('file:///c:/tmp/foo/bar'))
2678 2697 'file:///c:/tmp/foo/bar'
2679 2698 >>> print url(r'bundle:foo\bar')
2680 2699 bundle:foo\bar
2681 2700 >>> print url(r'file:///D:\data\hg')
2682 2701 file:///D:\data\hg
2683 2702 """
2684 2703 return encoding.strfromlocal(self.__bytes__())
2685 2704
2686 2705 def __bytes__(self):
2687 2706 if self._localpath:
2688 2707 s = self.path
2689 2708 if self.scheme == 'bundle':
2690 2709 s = 'bundle:' + s
2691 2710 if self.fragment:
2692 2711 s += '#' + self.fragment
2693 2712 return s
2694 2713
2695 2714 s = self.scheme + ':'
2696 2715 if self.user or self.passwd or self.host:
2697 2716 s += '//'
2698 2717 elif self.scheme and (not self.path or self.path.startswith('/')
2699 2718 or hasdriveletter(self.path)):
2700 2719 s += '//'
2701 2720 if hasdriveletter(self.path):
2702 2721 s += '/'
2703 2722 if self.user:
2704 2723 s += urlreq.quote(self.user, safe=self._safechars)
2705 2724 if self.passwd:
2706 2725 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2707 2726 if self.user or self.passwd:
2708 2727 s += '@'
2709 2728 if self.host:
2710 2729 if not (self.host.startswith('[') and self.host.endswith(']')):
2711 2730 s += urlreq.quote(self.host)
2712 2731 else:
2713 2732 s += self.host
2714 2733 if self.port:
2715 2734 s += ':' + urlreq.quote(self.port)
2716 2735 if self.host:
2717 2736 s += '/'
2718 2737 if self.path:
2719 2738 # TODO: similar to the query string, we should not unescape the
2720 2739 # path when we store it, the path might contain '%2f' = '/',
2721 2740 # which we should *not* escape.
2722 2741 s += urlreq.quote(self.path, safe=self._safepchars)
2723 2742 if self.query:
2724 2743 # we store the query in escaped form.
2725 2744 s += '?' + self.query
2726 2745 if self.fragment is not None:
2727 2746 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2728 2747 return s
2729 2748
2730 2749 def authinfo(self):
2731 2750 user, passwd = self.user, self.passwd
2732 2751 try:
2733 2752 self.user, self.passwd = None, None
2734 2753 s = str(self)
2735 2754 finally:
2736 2755 self.user, self.passwd = user, passwd
2737 2756 if not self.user:
2738 2757 return (s, None)
2739 2758 # authinfo[1] is passed to urllib2 password manager, and its
2740 2759 # URIs must not contain credentials. The host is passed in the
2741 2760 # URIs list because Python < 2.4.3 uses only that to search for
2742 2761 # a password.
2743 2762 return (s, (None, (s, self.host),
2744 2763 self.user, self.passwd or ''))
2745 2764
2746 2765 def isabs(self):
2747 2766 if self.scheme and self.scheme != 'file':
2748 2767 return True # remote URL
2749 2768 if hasdriveletter(self.path):
2750 2769 return True # absolute for our purposes - can't be joined()
2751 2770 if self.path.startswith(r'\\'):
2752 2771 return True # Windows UNC path
2753 2772 if self.path.startswith('/'):
2754 2773 return True # POSIX-style
2755 2774 return False
2756 2775
2757 2776 def localpath(self):
2758 2777 if self.scheme == 'file' or self.scheme == 'bundle':
2759 2778 path = self.path or '/'
2760 2779 # For Windows, we need to promote hosts containing drive
2761 2780 # letters to paths with drive letters.
2762 2781 if hasdriveletter(self._hostport):
2763 2782 path = self._hostport + '/' + self.path
2764 2783 elif (self.host is not None and self.path
2765 2784 and not hasdriveletter(path)):
2766 2785 path = '/' + path
2767 2786 return path
2768 2787 return self._origpath
2769 2788
2770 2789 def islocal(self):
2771 2790 '''whether localpath will return something that posixfile can open'''
2772 2791 return (not self.scheme or self.scheme == 'file'
2773 2792 or self.scheme == 'bundle')
2774 2793
2775 2794 def hasscheme(path):
2776 2795 return bool(url(path).scheme)
2777 2796
2778 2797 def hasdriveletter(path):
2779 2798 return path and path[1:2] == ':' and path[0:1].isalpha()
2780 2799
2781 2800 def urllocalpath(path):
2782 2801 return url(path, parsequery=False, parsefragment=False).localpath()
2783 2802
2784 2803 def hidepassword(u):
2785 2804 '''hide user credential in a url string'''
2786 2805 u = url(u)
2787 2806 if u.passwd:
2788 2807 u.passwd = '***'
2789 2808 return str(u)
2790 2809
2791 2810 def removeauth(u):
2792 2811 '''remove all authentication information from a url string'''
2793 2812 u = url(u)
2794 2813 u.user = u.passwd = None
2795 2814 return str(u)
2796 2815
2797 2816 timecount = unitcountfn(
2798 2817 (1, 1e3, _('%.0f s')),
2799 2818 (100, 1, _('%.1f s')),
2800 2819 (10, 1, _('%.2f s')),
2801 2820 (1, 1, _('%.3f s')),
2802 2821 (100, 0.001, _('%.1f ms')),
2803 2822 (10, 0.001, _('%.2f ms')),
2804 2823 (1, 0.001, _('%.3f ms')),
2805 2824 (100, 0.000001, _('%.1f us')),
2806 2825 (10, 0.000001, _('%.2f us')),
2807 2826 (1, 0.000001, _('%.3f us')),
2808 2827 (100, 0.000000001, _('%.1f ns')),
2809 2828 (10, 0.000000001, _('%.2f ns')),
2810 2829 (1, 0.000000001, _('%.3f ns')),
2811 2830 )
2812 2831
2813 2832 _timenesting = [0]
2814 2833
2815 2834 def timed(func):
2816 2835 '''Report the execution time of a function call to stderr.
2817 2836
2818 2837 During development, use as a decorator when you need to measure
2819 2838 the cost of a function, e.g. as follows:
2820 2839
2821 2840 @util.timed
2822 2841 def foo(a, b, c):
2823 2842 pass
2824 2843 '''
2825 2844
2826 2845 def wrapper(*args, **kwargs):
2827 2846 start = timer()
2828 2847 indent = 2
2829 2848 _timenesting[0] += indent
2830 2849 try:
2831 2850 return func(*args, **kwargs)
2832 2851 finally:
2833 2852 elapsed = timer() - start
2834 2853 _timenesting[0] -= indent
2835 2854 stderr.write('%s%s: %s\n' %
2836 2855 (' ' * _timenesting[0], func.__name__,
2837 2856 timecount(elapsed)))
2838 2857 return wrapper
2839 2858
2840 2859 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2841 2860 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2842 2861
2843 2862 def sizetoint(s):
2844 2863 '''Convert a space specifier to a byte count.
2845 2864
2846 2865 >>> sizetoint('30')
2847 2866 30
2848 2867 >>> sizetoint('2.2kb')
2849 2868 2252
2850 2869 >>> sizetoint('6M')
2851 2870 6291456
2852 2871 '''
2853 2872 t = s.strip().lower()
2854 2873 try:
2855 2874 for k, u in _sizeunits:
2856 2875 if t.endswith(k):
2857 2876 return int(float(t[:-len(k)]) * u)
2858 2877 return int(t)
2859 2878 except ValueError:
2860 2879 raise error.ParseError(_("couldn't parse size: %s") % s)
2861 2880
2862 2881 class hooks(object):
2863 2882 '''A collection of hook functions that can be used to extend a
2864 2883 function's behavior. Hooks are called in lexicographic order,
2865 2884 based on the names of their sources.'''
2866 2885
2867 2886 def __init__(self):
2868 2887 self._hooks = []
2869 2888
2870 2889 def add(self, source, hook):
2871 2890 self._hooks.append((source, hook))
2872 2891
2873 2892 def __call__(self, *args):
2874 2893 self._hooks.sort(key=lambda x: x[0])
2875 2894 results = []
2876 2895 for source, hook in self._hooks:
2877 2896 results.append(hook(*args))
2878 2897 return results
2879 2898
2880 2899 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2881 2900 '''Yields lines for a nicely formatted stacktrace.
2882 2901 Skips the 'skip' last entries, then return the last 'depth' entries.
2883 2902 Each file+linenumber is formatted according to fileline.
2884 2903 Each line is formatted according to line.
2885 2904 If line is None, it yields:
2886 2905 length of longest filepath+line number,
2887 2906 filepath+linenumber,
2888 2907 function
2889 2908
2890 2909 Not be used in production code but very convenient while developing.
2891 2910 '''
2892 2911 entries = [(fileline % (fn, ln), func)
2893 2912 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2894 2913 ][-depth:]
2895 2914 if entries:
2896 2915 fnmax = max(len(entry[0]) for entry in entries)
2897 2916 for fnln, func in entries:
2898 2917 if line is None:
2899 2918 yield (fnmax, fnln, func)
2900 2919 else:
2901 2920 yield line % (fnmax, fnln, func)
2902 2921
2903 2922 def debugstacktrace(msg='stacktrace', skip=0,
2904 2923 f=stderr, otherf=stdout, depth=0):
2905 2924 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2906 2925 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2907 2926 By default it will flush stdout first.
2908 2927 It can be used everywhere and intentionally does not require an ui object.
2909 2928 Not be used in production code but very convenient while developing.
2910 2929 '''
2911 2930 if otherf:
2912 2931 otherf.flush()
2913 2932 f.write('%s at:\n' % msg.rstrip())
2914 2933 for line in getstackframes(skip + 1, depth=depth):
2915 2934 f.write(line)
2916 2935 f.flush()
2917 2936
2918 2937 class dirs(object):
2919 2938 '''a multiset of directory names from a dirstate or manifest'''
2920 2939
2921 2940 def __init__(self, map, skip=None):
2922 2941 self._dirs = {}
2923 2942 addpath = self.addpath
2924 2943 if safehasattr(map, 'iteritems') and skip is not None:
2925 2944 for f, s in map.iteritems():
2926 2945 if s[0] != skip:
2927 2946 addpath(f)
2928 2947 else:
2929 2948 for f in map:
2930 2949 addpath(f)
2931 2950
2932 2951 def addpath(self, path):
2933 2952 dirs = self._dirs
2934 2953 for base in finddirs(path):
2935 2954 if base in dirs:
2936 2955 dirs[base] += 1
2937 2956 return
2938 2957 dirs[base] = 1
2939 2958
2940 2959 def delpath(self, path):
2941 2960 dirs = self._dirs
2942 2961 for base in finddirs(path):
2943 2962 if dirs[base] > 1:
2944 2963 dirs[base] -= 1
2945 2964 return
2946 2965 del dirs[base]
2947 2966
2948 2967 def __iter__(self):
2949 2968 return iter(self._dirs)
2950 2969
2951 2970 def __contains__(self, d):
2952 2971 return d in self._dirs
2953 2972
2954 2973 if safehasattr(parsers, 'dirs'):
2955 2974 dirs = parsers.dirs
2956 2975
2957 2976 def finddirs(path):
2958 2977 pos = path.rfind('/')
2959 2978 while pos != -1:
2960 2979 yield path[:pos]
2961 2980 pos = path.rfind('/', 0, pos)
2962 2981
2963 2982 class ctxmanager(object):
2964 2983 '''A context manager for use in 'with' blocks to allow multiple
2965 2984 contexts to be entered at once. This is both safer and more
2966 2985 flexible than contextlib.nested.
2967 2986
2968 2987 Once Mercurial supports Python 2.7+, this will become mostly
2969 2988 unnecessary.
2970 2989 '''
2971 2990
2972 2991 def __init__(self, *args):
2973 2992 '''Accepts a list of no-argument functions that return context
2974 2993 managers. These will be invoked at __call__ time.'''
2975 2994 self._pending = args
2976 2995 self._atexit = []
2977 2996
2978 2997 def __enter__(self):
2979 2998 return self
2980 2999
2981 3000 def enter(self):
2982 3001 '''Create and enter context managers in the order in which they were
2983 3002 passed to the constructor.'''
2984 3003 values = []
2985 3004 for func in self._pending:
2986 3005 obj = func()
2987 3006 values.append(obj.__enter__())
2988 3007 self._atexit.append(obj.__exit__)
2989 3008 del self._pending
2990 3009 return values
2991 3010
2992 3011 def atexit(self, func, *args, **kwargs):
2993 3012 '''Add a function to call when this context manager exits. The
2994 3013 ordering of multiple atexit calls is unspecified, save that
2995 3014 they will happen before any __exit__ functions.'''
2996 3015 def wrapper(exc_type, exc_val, exc_tb):
2997 3016 func(*args, **kwargs)
2998 3017 self._atexit.append(wrapper)
2999 3018 return func
3000 3019
3001 3020 def __exit__(self, exc_type, exc_val, exc_tb):
3002 3021 '''Context managers are exited in the reverse order from which
3003 3022 they were created.'''
3004 3023 received = exc_type is not None
3005 3024 suppressed = False
3006 3025 pending = None
3007 3026 self._atexit.reverse()
3008 3027 for exitfunc in self._atexit:
3009 3028 try:
3010 3029 if exitfunc(exc_type, exc_val, exc_tb):
3011 3030 suppressed = True
3012 3031 exc_type = None
3013 3032 exc_val = None
3014 3033 exc_tb = None
3015 3034 except BaseException:
3016 3035 pending = sys.exc_info()
3017 3036 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3018 3037 del self._atexit
3019 3038 if pending:
3020 3039 raise exc_val
3021 3040 return received and suppressed
3022 3041
3023 3042 # compression code
3024 3043
3025 3044 SERVERROLE = 'server'
3026 3045 CLIENTROLE = 'client'
3027 3046
3028 3047 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3029 3048 (u'name', u'serverpriority',
3030 3049 u'clientpriority'))
3031 3050
3032 3051 class compressormanager(object):
3033 3052 """Holds registrations of various compression engines.
3034 3053
3035 3054 This class essentially abstracts the differences between compression
3036 3055 engines to allow new compression formats to be added easily, possibly from
3037 3056 extensions.
3038 3057
3039 3058 Compressors are registered against the global instance by calling its
3040 3059 ``register()`` method.
3041 3060 """
3042 3061 def __init__(self):
3043 3062 self._engines = {}
3044 3063 # Bundle spec human name to engine name.
3045 3064 self._bundlenames = {}
3046 3065 # Internal bundle identifier to engine name.
3047 3066 self._bundletypes = {}
3048 3067 # Revlog header to engine name.
3049 3068 self._revlogheaders = {}
3050 3069 # Wire proto identifier to engine name.
3051 3070 self._wiretypes = {}
3052 3071
3053 3072 def __getitem__(self, key):
3054 3073 return self._engines[key]
3055 3074
3056 3075 def __contains__(self, key):
3057 3076 return key in self._engines
3058 3077
3059 3078 def __iter__(self):
3060 3079 return iter(self._engines.keys())
3061 3080
3062 3081 def register(self, engine):
3063 3082 """Register a compression engine with the manager.
3064 3083
3065 3084 The argument must be a ``compressionengine`` instance.
3066 3085 """
3067 3086 if not isinstance(engine, compressionengine):
3068 3087 raise ValueError(_('argument must be a compressionengine'))
3069 3088
3070 3089 name = engine.name()
3071 3090
3072 3091 if name in self._engines:
3073 3092 raise error.Abort(_('compression engine %s already registered') %
3074 3093 name)
3075 3094
3076 3095 bundleinfo = engine.bundletype()
3077 3096 if bundleinfo:
3078 3097 bundlename, bundletype = bundleinfo
3079 3098
3080 3099 if bundlename in self._bundlenames:
3081 3100 raise error.Abort(_('bundle name %s already registered') %
3082 3101 bundlename)
3083 3102 if bundletype in self._bundletypes:
3084 3103 raise error.Abort(_('bundle type %s already registered by %s') %
3085 3104 (bundletype, self._bundletypes[bundletype]))
3086 3105
3087 3106 # No external facing name declared.
3088 3107 if bundlename:
3089 3108 self._bundlenames[bundlename] = name
3090 3109
3091 3110 self._bundletypes[bundletype] = name
3092 3111
3093 3112 wiresupport = engine.wireprotosupport()
3094 3113 if wiresupport:
3095 3114 wiretype = wiresupport.name
3096 3115 if wiretype in self._wiretypes:
3097 3116 raise error.Abort(_('wire protocol compression %s already '
3098 3117 'registered by %s') %
3099 3118 (wiretype, self._wiretypes[wiretype]))
3100 3119
3101 3120 self._wiretypes[wiretype] = name
3102 3121
3103 3122 revlogheader = engine.revlogheader()
3104 3123 if revlogheader and revlogheader in self._revlogheaders:
3105 3124 raise error.Abort(_('revlog header %s already registered by %s') %
3106 3125 (revlogheader, self._revlogheaders[revlogheader]))
3107 3126
3108 3127 if revlogheader:
3109 3128 self._revlogheaders[revlogheader] = name
3110 3129
3111 3130 self._engines[name] = engine
3112 3131
3113 3132 @property
3114 3133 def supportedbundlenames(self):
3115 3134 return set(self._bundlenames.keys())
3116 3135
3117 3136 @property
3118 3137 def supportedbundletypes(self):
3119 3138 return set(self._bundletypes.keys())
3120 3139
3121 3140 def forbundlename(self, bundlename):
3122 3141 """Obtain a compression engine registered to a bundle name.
3123 3142
3124 3143 Will raise KeyError if the bundle type isn't registered.
3125 3144
3126 3145 Will abort if the engine is known but not available.
3127 3146 """
3128 3147 engine = self._engines[self._bundlenames[bundlename]]
3129 3148 if not engine.available():
3130 3149 raise error.Abort(_('compression engine %s could not be loaded') %
3131 3150 engine.name())
3132 3151 return engine
3133 3152
3134 3153 def forbundletype(self, bundletype):
3135 3154 """Obtain a compression engine registered to a bundle type.
3136 3155
3137 3156 Will raise KeyError if the bundle type isn't registered.
3138 3157
3139 3158 Will abort if the engine is known but not available.
3140 3159 """
3141 3160 engine = self._engines[self._bundletypes[bundletype]]
3142 3161 if not engine.available():
3143 3162 raise error.Abort(_('compression engine %s could not be loaded') %
3144 3163 engine.name())
3145 3164 return engine
3146 3165
3147 3166 def supportedwireengines(self, role, onlyavailable=True):
3148 3167 """Obtain compression engines that support the wire protocol.
3149 3168
3150 3169 Returns a list of engines in prioritized order, most desired first.
3151 3170
3152 3171 If ``onlyavailable`` is set, filter out engines that can't be
3153 3172 loaded.
3154 3173 """
3155 3174 assert role in (SERVERROLE, CLIENTROLE)
3156 3175
3157 3176 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3158 3177
3159 3178 engines = [self._engines[e] for e in self._wiretypes.values()]
3160 3179 if onlyavailable:
3161 3180 engines = [e for e in engines if e.available()]
3162 3181
3163 3182 def getkey(e):
3164 3183 # Sort first by priority, highest first. In case of tie, sort
3165 3184 # alphabetically. This is arbitrary, but ensures output is
3166 3185 # stable.
3167 3186 w = e.wireprotosupport()
3168 3187 return -1 * getattr(w, attr), w.name
3169 3188
3170 3189 return list(sorted(engines, key=getkey))
3171 3190
3172 3191 def forwiretype(self, wiretype):
3173 3192 engine = self._engines[self._wiretypes[wiretype]]
3174 3193 if not engine.available():
3175 3194 raise error.Abort(_('compression engine %s could not be loaded') %
3176 3195 engine.name())
3177 3196 return engine
3178 3197
3179 3198 def forrevlogheader(self, header):
3180 3199 """Obtain a compression engine registered to a revlog header.
3181 3200
3182 3201 Will raise KeyError if the revlog header value isn't registered.
3183 3202 """
3184 3203 return self._engines[self._revlogheaders[header]]
3185 3204
3186 3205 compengines = compressormanager()
3187 3206
3188 3207 class compressionengine(object):
3189 3208 """Base class for compression engines.
3190 3209
3191 3210 Compression engines must implement the interface defined by this class.
3192 3211 """
3193 3212 def name(self):
3194 3213 """Returns the name of the compression engine.
3195 3214
3196 3215 This is the key the engine is registered under.
3197 3216
3198 3217 This method must be implemented.
3199 3218 """
3200 3219 raise NotImplementedError()
3201 3220
3202 3221 def available(self):
3203 3222 """Whether the compression engine is available.
3204 3223
3205 3224 The intent of this method is to allow optional compression engines
3206 3225 that may not be available in all installations (such as engines relying
3207 3226 on C extensions that may not be present).
3208 3227 """
3209 3228 return True
3210 3229
3211 3230 def bundletype(self):
3212 3231 """Describes bundle identifiers for this engine.
3213 3232
3214 3233 If this compression engine isn't supported for bundles, returns None.
3215 3234
3216 3235 If this engine can be used for bundles, returns a 2-tuple of strings of
3217 3236 the user-facing "bundle spec" compression name and an internal
3218 3237 identifier used to denote the compression format within bundles. To
3219 3238 exclude the name from external usage, set the first element to ``None``.
3220 3239
3221 3240 If bundle compression is supported, the class must also implement
3222 3241 ``compressstream`` and `decompressorreader``.
3223 3242 """
3224 3243 return None
3225 3244
3226 3245 def wireprotosupport(self):
3227 3246 """Declare support for this compression format on the wire protocol.
3228 3247
3229 3248 If this compression engine isn't supported for compressing wire
3230 3249 protocol payloads, returns None.
3231 3250
3232 3251 Otherwise, returns ``compenginewireprotosupport`` with the following
3233 3252 fields:
3234 3253
3235 3254 * String format identifier
3236 3255 * Integer priority for the server
3237 3256 * Integer priority for the client
3238 3257
3239 3258 The integer priorities are used to order the advertisement of format
3240 3259 support by server and client. The highest integer is advertised
3241 3260 first. Integers with non-positive values aren't advertised.
3242 3261
3243 3262 The priority values are somewhat arbitrary and only used for default
3244 3263 ordering. The relative order can be changed via config options.
3245 3264
3246 3265 If wire protocol compression is supported, the class must also implement
3247 3266 ``compressstream`` and ``decompressorreader``.
3248 3267 """
3249 3268 return None
3250 3269
3251 3270 def revlogheader(self):
3252 3271 """Header added to revlog chunks that identifies this engine.
3253 3272
3254 3273 If this engine can be used to compress revlogs, this method should
3255 3274 return the bytes used to identify chunks compressed with this engine.
3256 3275 Else, the method should return ``None`` to indicate it does not
3257 3276 participate in revlog compression.
3258 3277 """
3259 3278 return None
3260 3279
3261 3280 def compressstream(self, it, opts=None):
3262 3281 """Compress an iterator of chunks.
3263 3282
3264 3283 The method receives an iterator (ideally a generator) of chunks of
3265 3284 bytes to be compressed. It returns an iterator (ideally a generator)
3266 3285 of bytes of chunks representing the compressed output.
3267 3286
3268 3287 Optionally accepts an argument defining how to perform compression.
3269 3288 Each engine treats this argument differently.
3270 3289 """
3271 3290 raise NotImplementedError()
3272 3291
3273 3292 def decompressorreader(self, fh):
3274 3293 """Perform decompression on a file object.
3275 3294
3276 3295 Argument is an object with a ``read(size)`` method that returns
3277 3296 compressed data. Return value is an object with a ``read(size)`` that
3278 3297 returns uncompressed data.
3279 3298 """
3280 3299 raise NotImplementedError()
3281 3300
3282 3301 def revlogcompressor(self, opts=None):
3283 3302 """Obtain an object that can be used to compress revlog entries.
3284 3303
3285 3304 The object has a ``compress(data)`` method that compresses binary
3286 3305 data. This method returns compressed binary data or ``None`` if
3287 3306 the data could not be compressed (too small, not compressible, etc).
3288 3307 The returned data should have a header uniquely identifying this
3289 3308 compression format so decompression can be routed to this engine.
3290 3309 This header should be identified by the ``revlogheader()`` return
3291 3310 value.
3292 3311
3293 3312 The object has a ``decompress(data)`` method that decompresses
3294 3313 data. The method will only be called if ``data`` begins with
3295 3314 ``revlogheader()``. The method should return the raw, uncompressed
3296 3315 data or raise a ``RevlogError``.
3297 3316
3298 3317 The object is reusable but is not thread safe.
3299 3318 """
3300 3319 raise NotImplementedError()
3301 3320
3302 3321 class _zlibengine(compressionengine):
3303 3322 def name(self):
3304 3323 return 'zlib'
3305 3324
3306 3325 def bundletype(self):
3307 3326 return 'gzip', 'GZ'
3308 3327
3309 3328 def wireprotosupport(self):
3310 3329 return compewireprotosupport('zlib', 20, 20)
3311 3330
3312 3331 def revlogheader(self):
3313 3332 return 'x'
3314 3333
3315 3334 def compressstream(self, it, opts=None):
3316 3335 opts = opts or {}
3317 3336
3318 3337 z = zlib.compressobj(opts.get('level', -1))
3319 3338 for chunk in it:
3320 3339 data = z.compress(chunk)
3321 3340 # Not all calls to compress emit data. It is cheaper to inspect
3322 3341 # here than to feed empty chunks through generator.
3323 3342 if data:
3324 3343 yield data
3325 3344
3326 3345 yield z.flush()
3327 3346
3328 3347 def decompressorreader(self, fh):
3329 3348 def gen():
3330 3349 d = zlib.decompressobj()
3331 3350 for chunk in filechunkiter(fh):
3332 3351 while chunk:
3333 3352 # Limit output size to limit memory.
3334 3353 yield d.decompress(chunk, 2 ** 18)
3335 3354 chunk = d.unconsumed_tail
3336 3355
3337 3356 return chunkbuffer(gen())
3338 3357
3339 3358 class zlibrevlogcompressor(object):
3340 3359 def compress(self, data):
3341 3360 insize = len(data)
3342 3361 # Caller handles empty input case.
3343 3362 assert insize > 0
3344 3363
3345 3364 if insize < 44:
3346 3365 return None
3347 3366
3348 3367 elif insize <= 1000000:
3349 3368 compressed = zlib.compress(data)
3350 3369 if len(compressed) < insize:
3351 3370 return compressed
3352 3371 return None
3353 3372
3354 3373 # zlib makes an internal copy of the input buffer, doubling
3355 3374 # memory usage for large inputs. So do streaming compression
3356 3375 # on large inputs.
3357 3376 else:
3358 3377 z = zlib.compressobj()
3359 3378 parts = []
3360 3379 pos = 0
3361 3380 while pos < insize:
3362 3381 pos2 = pos + 2**20
3363 3382 parts.append(z.compress(data[pos:pos2]))
3364 3383 pos = pos2
3365 3384 parts.append(z.flush())
3366 3385
3367 3386 if sum(map(len, parts)) < insize:
3368 3387 return ''.join(parts)
3369 3388 return None
3370 3389
3371 3390 def decompress(self, data):
3372 3391 try:
3373 3392 return zlib.decompress(data)
3374 3393 except zlib.error as e:
3375 3394 raise error.RevlogError(_('revlog decompress error: %s') %
3376 3395 str(e))
3377 3396
3378 3397 def revlogcompressor(self, opts=None):
3379 3398 return self.zlibrevlogcompressor()
3380 3399
3381 3400 compengines.register(_zlibengine())
3382 3401
3383 3402 class _bz2engine(compressionengine):
3384 3403 def name(self):
3385 3404 return 'bz2'
3386 3405
3387 3406 def bundletype(self):
3388 3407 return 'bzip2', 'BZ'
3389 3408
3390 3409 # We declare a protocol name but don't advertise by default because
3391 3410 # it is slow.
3392 3411 def wireprotosupport(self):
3393 3412 return compewireprotosupport('bzip2', 0, 0)
3394 3413
3395 3414 def compressstream(self, it, opts=None):
3396 3415 opts = opts or {}
3397 3416 z = bz2.BZ2Compressor(opts.get('level', 9))
3398 3417 for chunk in it:
3399 3418 data = z.compress(chunk)
3400 3419 if data:
3401 3420 yield data
3402 3421
3403 3422 yield z.flush()
3404 3423
3405 3424 def decompressorreader(self, fh):
3406 3425 def gen():
3407 3426 d = bz2.BZ2Decompressor()
3408 3427 for chunk in filechunkiter(fh):
3409 3428 yield d.decompress(chunk)
3410 3429
3411 3430 return chunkbuffer(gen())
3412 3431
3413 3432 compengines.register(_bz2engine())
3414 3433
3415 3434 class _truncatedbz2engine(compressionengine):
3416 3435 def name(self):
3417 3436 return 'bz2truncated'
3418 3437
3419 3438 def bundletype(self):
3420 3439 return None, '_truncatedBZ'
3421 3440
3422 3441 # We don't implement compressstream because it is hackily handled elsewhere.
3423 3442
3424 3443 def decompressorreader(self, fh):
3425 3444 def gen():
3426 3445 # The input stream doesn't have the 'BZ' header. So add it back.
3427 3446 d = bz2.BZ2Decompressor()
3428 3447 d.decompress('BZ')
3429 3448 for chunk in filechunkiter(fh):
3430 3449 yield d.decompress(chunk)
3431 3450
3432 3451 return chunkbuffer(gen())
3433 3452
3434 3453 compengines.register(_truncatedbz2engine())
3435 3454
3436 3455 class _noopengine(compressionengine):
3437 3456 def name(self):
3438 3457 return 'none'
3439 3458
3440 3459 def bundletype(self):
3441 3460 return 'none', 'UN'
3442 3461
3443 3462 # Clients always support uncompressed payloads. Servers don't because
3444 3463 # unless you are on a fast network, uncompressed payloads can easily
3445 3464 # saturate your network pipe.
3446 3465 def wireprotosupport(self):
3447 3466 return compewireprotosupport('none', 0, 10)
3448 3467
3449 3468 # We don't implement revlogheader because it is handled specially
3450 3469 # in the revlog class.
3451 3470
3452 3471 def compressstream(self, it, opts=None):
3453 3472 return it
3454 3473
3455 3474 def decompressorreader(self, fh):
3456 3475 return fh
3457 3476
3458 3477 class nooprevlogcompressor(object):
3459 3478 def compress(self, data):
3460 3479 return None
3461 3480
3462 3481 def revlogcompressor(self, opts=None):
3463 3482 return self.nooprevlogcompressor()
3464 3483
3465 3484 compengines.register(_noopengine())
3466 3485
3467 3486 class _zstdengine(compressionengine):
3468 3487 def name(self):
3469 3488 return 'zstd'
3470 3489
3471 3490 @propertycache
3472 3491 def _module(self):
3473 3492 # Not all installs have the zstd module available. So defer importing
3474 3493 # until first access.
3475 3494 try:
3476 3495 from . import zstd
3477 3496 # Force delayed import.
3478 3497 zstd.__version__
3479 3498 return zstd
3480 3499 except ImportError:
3481 3500 return None
3482 3501
3483 3502 def available(self):
3484 3503 return bool(self._module)
3485 3504
3486 3505 def bundletype(self):
3487 3506 return 'zstd', 'ZS'
3488 3507
3489 3508 def wireprotosupport(self):
3490 3509 return compewireprotosupport('zstd', 50, 50)
3491 3510
3492 3511 def revlogheader(self):
3493 3512 return '\x28'
3494 3513
3495 3514 def compressstream(self, it, opts=None):
3496 3515 opts = opts or {}
3497 3516 # zstd level 3 is almost always significantly faster than zlib
3498 3517 # while providing no worse compression. It strikes a good balance
3499 3518 # between speed and compression.
3500 3519 level = opts.get('level', 3)
3501 3520
3502 3521 zstd = self._module
3503 3522 z = zstd.ZstdCompressor(level=level).compressobj()
3504 3523 for chunk in it:
3505 3524 data = z.compress(chunk)
3506 3525 if data:
3507 3526 yield data
3508 3527
3509 3528 yield z.flush()
3510 3529
3511 3530 def decompressorreader(self, fh):
3512 3531 zstd = self._module
3513 3532 dctx = zstd.ZstdDecompressor()
3514 3533 return chunkbuffer(dctx.read_from(fh))
3515 3534
3516 3535 class zstdrevlogcompressor(object):
3517 3536 def __init__(self, zstd, level=3):
3518 3537 # Writing the content size adds a few bytes to the output. However,
3519 3538 # it allows decompression to be more optimal since we can
3520 3539 # pre-allocate a buffer to hold the result.
3521 3540 self._cctx = zstd.ZstdCompressor(level=level,
3522 3541 write_content_size=True)
3523 3542 self._dctx = zstd.ZstdDecompressor()
3524 3543 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3525 3544 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3526 3545
3527 3546 def compress(self, data):
3528 3547 insize = len(data)
3529 3548 # Caller handles empty input case.
3530 3549 assert insize > 0
3531 3550
3532 3551 if insize < 50:
3533 3552 return None
3534 3553
3535 3554 elif insize <= 1000000:
3536 3555 compressed = self._cctx.compress(data)
3537 3556 if len(compressed) < insize:
3538 3557 return compressed
3539 3558 return None
3540 3559 else:
3541 3560 z = self._cctx.compressobj()
3542 3561 chunks = []
3543 3562 pos = 0
3544 3563 while pos < insize:
3545 3564 pos2 = pos + self._compinsize
3546 3565 chunk = z.compress(data[pos:pos2])
3547 3566 if chunk:
3548 3567 chunks.append(chunk)
3549 3568 pos = pos2
3550 3569 chunks.append(z.flush())
3551 3570
3552 3571 if sum(map(len, chunks)) < insize:
3553 3572 return ''.join(chunks)
3554 3573 return None
3555 3574
3556 3575 def decompress(self, data):
3557 3576 insize = len(data)
3558 3577
3559 3578 try:
3560 3579 # This was measured to be faster than other streaming
3561 3580 # decompressors.
3562 3581 dobj = self._dctx.decompressobj()
3563 3582 chunks = []
3564 3583 pos = 0
3565 3584 while pos < insize:
3566 3585 pos2 = pos + self._decompinsize
3567 3586 chunk = dobj.decompress(data[pos:pos2])
3568 3587 if chunk:
3569 3588 chunks.append(chunk)
3570 3589 pos = pos2
3571 3590 # Frame should be exhausted, so no finish() API.
3572 3591
3573 3592 return ''.join(chunks)
3574 3593 except Exception as e:
3575 3594 raise error.RevlogError(_('revlog decompress error: %s') %
3576 3595 str(e))
3577 3596
3578 3597 def revlogcompressor(self, opts=None):
3579 3598 opts = opts or {}
3580 3599 return self.zstdrevlogcompressor(self._module,
3581 3600 level=opts.get('level', 3))
3582 3601
3583 3602 compengines.register(_zstdengine())
3584 3603
3585 3604 # convenient shortcut
3586 3605 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now