##// END OF EJS Templates
py3: use iter() instead of iterkeys()
Rishabh Madan -
r31430:80738758 default
parent child Browse files
Show More
@@ -1,3560 +1,3560
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 def isatty(fp):
67 67 try:
68 68 return fp.isatty()
69 69 except AttributeError:
70 70 return False
71 71
72 72 # glibc determines buffering on first write to stdout - if we replace a TTY
73 73 # destined stdout with a pipe destined stdout (e.g. pager), we want line
74 74 # buffering
75 75 if isatty(stdout):
76 76 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
77 77
78 78 if pycompat.osname == 'nt':
79 79 from . import windows as platform
80 80 stdout = platform.winstdout(stdout)
81 81 else:
82 82 from . import posix as platform
83 83
84 84 _ = i18n._
85 85
86 86 bindunixsocket = platform.bindunixsocket
87 87 cachestat = platform.cachestat
88 88 checkexec = platform.checkexec
89 89 checklink = platform.checklink
90 90 copymode = platform.copymode
91 91 executablepath = platform.executablepath
92 92 expandglobs = platform.expandglobs
93 93 explainexit = platform.explainexit
94 94 findexe = platform.findexe
95 95 gethgcmd = platform.gethgcmd
96 96 getuser = platform.getuser
97 97 getpid = os.getpid
98 98 groupmembers = platform.groupmembers
99 99 groupname = platform.groupname
100 100 hidewindow = platform.hidewindow
101 101 isexec = platform.isexec
102 102 isowner = platform.isowner
103 103 localpath = platform.localpath
104 104 lookupreg = platform.lookupreg
105 105 makedir = platform.makedir
106 106 nlinks = platform.nlinks
107 107 normpath = platform.normpath
108 108 normcase = platform.normcase
109 109 normcasespec = platform.normcasespec
110 110 normcasefallback = platform.normcasefallback
111 111 openhardlinks = platform.openhardlinks
112 112 oslink = platform.oslink
113 113 parsepatchoutput = platform.parsepatchoutput
114 114 pconvert = platform.pconvert
115 115 poll = platform.poll
116 116 popen = platform.popen
117 117 posixfile = platform.posixfile
118 118 quotecommand = platform.quotecommand
119 119 readpipe = platform.readpipe
120 120 rename = platform.rename
121 121 removedirs = platform.removedirs
122 122 samedevice = platform.samedevice
123 123 samefile = platform.samefile
124 124 samestat = platform.samestat
125 125 setbinary = platform.setbinary
126 126 setflags = platform.setflags
127 127 setsignalhandler = platform.setsignalhandler
128 128 shellquote = platform.shellquote
129 129 spawndetached = platform.spawndetached
130 130 split = platform.split
131 131 sshargs = platform.sshargs
132 132 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
133 133 statisexec = platform.statisexec
134 134 statislink = platform.statislink
135 135 testpid = platform.testpid
136 136 umask = platform.umask
137 137 unlink = platform.unlink
138 138 unlinkpath = platform.unlinkpath
139 139 username = platform.username
140 140
141 141 # Python compatibility
142 142
143 143 _notset = object()
144 144
145 145 # disable Python's problematic floating point timestamps (issue4836)
146 146 # (Python hypocritically says you shouldn't change this behavior in
147 147 # libraries, and sure enough Mercurial is not a library.)
148 148 os.stat_float_times(False)
149 149
150 150 def safehasattr(thing, attr):
151 151 return getattr(thing, attr, _notset) is not _notset
152 152
153 153 def bitsfrom(container):
154 154 bits = 0
155 155 for bit in container:
156 156 bits |= bit
157 157 return bits
158 158
159 159 DIGESTS = {
160 160 'md5': hashlib.md5,
161 161 'sha1': hashlib.sha1,
162 162 'sha512': hashlib.sha512,
163 163 }
164 164 # List of digest types from strongest to weakest
165 165 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
166 166
167 167 for k in DIGESTS_BY_STRENGTH:
168 168 assert k in DIGESTS
169 169
170 170 class digester(object):
171 171 """helper to compute digests.
172 172
173 173 This helper can be used to compute one or more digests given their name.
174 174
175 175 >>> d = digester(['md5', 'sha1'])
176 176 >>> d.update('foo')
177 177 >>> [k for k in sorted(d)]
178 178 ['md5', 'sha1']
179 179 >>> d['md5']
180 180 'acbd18db4cc2f85cedef654fccc4a4d8'
181 181 >>> d['sha1']
182 182 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
183 183 >>> digester.preferred(['md5', 'sha1'])
184 184 'sha1'
185 185 """
186 186
187 187 def __init__(self, digests, s=''):
188 188 self._hashes = {}
189 189 for k in digests:
190 190 if k not in DIGESTS:
191 191 raise Abort(_('unknown digest type: %s') % k)
192 192 self._hashes[k] = DIGESTS[k]()
193 193 if s:
194 194 self.update(s)
195 195
196 196 def update(self, data):
197 197 for h in self._hashes.values():
198 198 h.update(data)
199 199
200 200 def __getitem__(self, key):
201 201 if key not in DIGESTS:
202 202 raise Abort(_('unknown digest type: %s') % k)
203 203 return self._hashes[key].hexdigest()
204 204
205 205 def __iter__(self):
206 206 return iter(self._hashes)
207 207
208 208 @staticmethod
209 209 def preferred(supported):
210 210 """returns the strongest digest type in both supported and DIGESTS."""
211 211
212 212 for k in DIGESTS_BY_STRENGTH:
213 213 if k in supported:
214 214 return k
215 215 return None
216 216
217 217 class digestchecker(object):
218 218 """file handle wrapper that additionally checks content against a given
219 219 size and digests.
220 220
221 221 d = digestchecker(fh, size, {'md5': '...'})
222 222
223 223 When multiple digests are given, all of them are validated.
224 224 """
225 225
226 226 def __init__(self, fh, size, digests):
227 227 self._fh = fh
228 228 self._size = size
229 229 self._got = 0
230 230 self._digests = dict(digests)
231 231 self._digester = digester(self._digests.keys())
232 232
233 233 def read(self, length=-1):
234 234 content = self._fh.read(length)
235 235 self._digester.update(content)
236 236 self._got += len(content)
237 237 return content
238 238
239 239 def validate(self):
240 240 if self._size != self._got:
241 241 raise Abort(_('size mismatch: expected %d, got %d') %
242 242 (self._size, self._got))
243 243 for k, v in self._digests.items():
244 244 if v != self._digester[k]:
245 245 # i18n: first parameter is a digest name
246 246 raise Abort(_('%s mismatch: expected %s, got %s') %
247 247 (k, v, self._digester[k]))
248 248
249 249 try:
250 250 buffer = buffer
251 251 except NameError:
252 252 if not pycompat.ispy3:
253 253 def buffer(sliceable, offset=0, length=None):
254 254 if length is not None:
255 255 return sliceable[offset:offset + length]
256 256 return sliceable[offset:]
257 257 else:
258 258 def buffer(sliceable, offset=0, length=None):
259 259 if length is not None:
260 260 return memoryview(sliceable)[offset:offset + length]
261 261 return memoryview(sliceable)[offset:]
262 262
263 263 closefds = pycompat.osname == 'posix'
264 264
265 265 _chunksize = 4096
266 266
267 267 class bufferedinputpipe(object):
268 268 """a manually buffered input pipe
269 269
270 270 Python will not let us use buffered IO and lazy reading with 'polling' at
271 271 the same time. We cannot probe the buffer state and select will not detect
272 272 that data are ready to read if they are already buffered.
273 273
274 274 This class let us work around that by implementing its own buffering
275 275 (allowing efficient readline) while offering a way to know if the buffer is
276 276 empty from the output (allowing collaboration of the buffer with polling).
277 277
278 278 This class lives in the 'util' module because it makes use of the 'os'
279 279 module from the python stdlib.
280 280 """
281 281
282 282 def __init__(self, input):
283 283 self._input = input
284 284 self._buffer = []
285 285 self._eof = False
286 286 self._lenbuf = 0
287 287
288 288 @property
289 289 def hasbuffer(self):
290 290 """True is any data is currently buffered
291 291
292 292 This will be used externally a pre-step for polling IO. If there is
293 293 already data then no polling should be set in place."""
294 294 return bool(self._buffer)
295 295
296 296 @property
297 297 def closed(self):
298 298 return self._input.closed
299 299
300 300 def fileno(self):
301 301 return self._input.fileno()
302 302
303 303 def close(self):
304 304 return self._input.close()
305 305
306 306 def read(self, size):
307 307 while (not self._eof) and (self._lenbuf < size):
308 308 self._fillbuffer()
309 309 return self._frombuffer(size)
310 310
311 311 def readline(self, *args, **kwargs):
312 312 if 1 < len(self._buffer):
313 313 # this should not happen because both read and readline end with a
314 314 # _frombuffer call that collapse it.
315 315 self._buffer = [''.join(self._buffer)]
316 316 self._lenbuf = len(self._buffer[0])
317 317 lfi = -1
318 318 if self._buffer:
319 319 lfi = self._buffer[-1].find('\n')
320 320 while (not self._eof) and lfi < 0:
321 321 self._fillbuffer()
322 322 if self._buffer:
323 323 lfi = self._buffer[-1].find('\n')
324 324 size = lfi + 1
325 325 if lfi < 0: # end of file
326 326 size = self._lenbuf
327 327 elif 1 < len(self._buffer):
328 328 # we need to take previous chunks into account
329 329 size += self._lenbuf - len(self._buffer[-1])
330 330 return self._frombuffer(size)
331 331
332 332 def _frombuffer(self, size):
333 333 """return at most 'size' data from the buffer
334 334
335 335 The data are removed from the buffer."""
336 336 if size == 0 or not self._buffer:
337 337 return ''
338 338 buf = self._buffer[0]
339 339 if 1 < len(self._buffer):
340 340 buf = ''.join(self._buffer)
341 341
342 342 data = buf[:size]
343 343 buf = buf[len(data):]
344 344 if buf:
345 345 self._buffer = [buf]
346 346 self._lenbuf = len(buf)
347 347 else:
348 348 self._buffer = []
349 349 self._lenbuf = 0
350 350 return data
351 351
352 352 def _fillbuffer(self):
353 353 """read data to the buffer"""
354 354 data = os.read(self._input.fileno(), _chunksize)
355 355 if not data:
356 356 self._eof = True
357 357 else:
358 358 self._lenbuf += len(data)
359 359 self._buffer.append(data)
360 360
361 361 def popen2(cmd, env=None, newlines=False):
362 362 # Setting bufsize to -1 lets the system decide the buffer size.
363 363 # The default for bufsize is 0, meaning unbuffered. This leads to
364 364 # poor performance on Mac OS X: http://bugs.python.org/issue4194
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 universal_newlines=newlines,
369 369 env=env)
370 370 return p.stdin, p.stdout
371 371
372 372 def popen3(cmd, env=None, newlines=False):
373 373 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
374 374 return stdin, stdout, stderr
375 375
376 376 def popen4(cmd, env=None, newlines=False, bufsize=-1):
377 377 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
378 378 close_fds=closefds,
379 379 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
380 380 stderr=subprocess.PIPE,
381 381 universal_newlines=newlines,
382 382 env=env)
383 383 return p.stdin, p.stdout, p.stderr, p
384 384
385 385 def version():
386 386 """Return version information if available."""
387 387 try:
388 388 from . import __version__
389 389 return __version__.version
390 390 except ImportError:
391 391 return 'unknown'
392 392
393 393 def versiontuple(v=None, n=4):
394 394 """Parses a Mercurial version string into an N-tuple.
395 395
396 396 The version string to be parsed is specified with the ``v`` argument.
397 397 If it isn't defined, the current Mercurial version string will be parsed.
398 398
399 399 ``n`` can be 2, 3, or 4. Here is how some version strings map to
400 400 returned values:
401 401
402 402 >>> v = '3.6.1+190-df9b73d2d444'
403 403 >>> versiontuple(v, 2)
404 404 (3, 6)
405 405 >>> versiontuple(v, 3)
406 406 (3, 6, 1)
407 407 >>> versiontuple(v, 4)
408 408 (3, 6, 1, '190-df9b73d2d444')
409 409
410 410 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
411 411 (3, 6, 1, '190-df9b73d2d444+20151118')
412 412
413 413 >>> v = '3.6'
414 414 >>> versiontuple(v, 2)
415 415 (3, 6)
416 416 >>> versiontuple(v, 3)
417 417 (3, 6, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 6, None, None)
420 420
421 421 >>> v = '3.9-rc'
422 422 >>> versiontuple(v, 2)
423 423 (3, 9)
424 424 >>> versiontuple(v, 3)
425 425 (3, 9, None)
426 426 >>> versiontuple(v, 4)
427 427 (3, 9, None, 'rc')
428 428
429 429 >>> v = '3.9-rc+2-02a8fea4289b'
430 430 >>> versiontuple(v, 2)
431 431 (3, 9)
432 432 >>> versiontuple(v, 3)
433 433 (3, 9, None)
434 434 >>> versiontuple(v, 4)
435 435 (3, 9, None, 'rc+2-02a8fea4289b')
436 436 """
437 437 if not v:
438 438 v = version()
439 439 parts = remod.split('[\+-]', v, 1)
440 440 if len(parts) == 1:
441 441 vparts, extra = parts[0], None
442 442 else:
443 443 vparts, extra = parts
444 444
445 445 vints = []
446 446 for i in vparts.split('.'):
447 447 try:
448 448 vints.append(int(i))
449 449 except ValueError:
450 450 break
451 451 # (3, 6) -> (3, 6, None)
452 452 while len(vints) < 3:
453 453 vints.append(None)
454 454
455 455 if n == 2:
456 456 return (vints[0], vints[1])
457 457 if n == 3:
458 458 return (vints[0], vints[1], vints[2])
459 459 if n == 4:
460 460 return (vints[0], vints[1], vints[2], extra)
461 461
462 462 # used by parsedate
463 463 defaultdateformats = (
464 464 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
465 465 '%Y-%m-%dT%H:%M', # without seconds
466 466 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
467 467 '%Y-%m-%dT%H%M', # without seconds
468 468 '%Y-%m-%d %H:%M:%S', # our common legal variant
469 469 '%Y-%m-%d %H:%M', # without seconds
470 470 '%Y-%m-%d %H%M%S', # without :
471 471 '%Y-%m-%d %H%M', # without seconds
472 472 '%Y-%m-%d %I:%M:%S%p',
473 473 '%Y-%m-%d %H:%M',
474 474 '%Y-%m-%d %I:%M%p',
475 475 '%Y-%m-%d',
476 476 '%m-%d',
477 477 '%m/%d',
478 478 '%m/%d/%y',
479 479 '%m/%d/%Y',
480 480 '%a %b %d %H:%M:%S %Y',
481 481 '%a %b %d %I:%M:%S%p %Y',
482 482 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
483 483 '%b %d %H:%M:%S %Y',
484 484 '%b %d %I:%M:%S%p %Y',
485 485 '%b %d %H:%M:%S',
486 486 '%b %d %I:%M:%S%p',
487 487 '%b %d %H:%M',
488 488 '%b %d %I:%M%p',
489 489 '%b %d %Y',
490 490 '%b %d',
491 491 '%H:%M:%S',
492 492 '%I:%M:%S%p',
493 493 '%H:%M',
494 494 '%I:%M%p',
495 495 )
496 496
497 497 extendeddateformats = defaultdateformats + (
498 498 "%Y",
499 499 "%Y-%m",
500 500 "%b",
501 501 "%b %Y",
502 502 )
503 503
504 504 def cachefunc(func):
505 505 '''cache the result of function calls'''
506 506 # XXX doesn't handle keywords args
507 507 if func.__code__.co_argcount == 0:
508 508 cache = []
509 509 def f():
510 510 if len(cache) == 0:
511 511 cache.append(func())
512 512 return cache[0]
513 513 return f
514 514 cache = {}
515 515 if func.__code__.co_argcount == 1:
516 516 # we gain a small amount of time because
517 517 # we don't need to pack/unpack the list
518 518 def f(arg):
519 519 if arg not in cache:
520 520 cache[arg] = func(arg)
521 521 return cache[arg]
522 522 else:
523 523 def f(*args):
524 524 if args not in cache:
525 525 cache[args] = func(*args)
526 526 return cache[args]
527 527
528 528 return f
529 529
530 530 class sortdict(dict):
531 531 '''a simple sorted dictionary'''
532 532 def __init__(self, data=None):
533 533 self._list = []
534 534 if data:
535 535 self.update(data)
536 536 def copy(self):
537 537 return sortdict(self)
538 538 def __setitem__(self, key, val):
539 539 if key in self:
540 540 self._list.remove(key)
541 541 self._list.append(key)
542 542 dict.__setitem__(self, key, val)
543 543 def __iter__(self):
544 544 return self._list.__iter__()
545 545 def update(self, src):
546 546 if isinstance(src, dict):
547 547 src = src.iteritems()
548 548 for k, v in src:
549 549 self[k] = v
550 550 def clear(self):
551 551 dict.clear(self)
552 552 self._list = []
553 553 def items(self):
554 554 return [(k, self[k]) for k in self._list]
555 555 def __delitem__(self, key):
556 556 dict.__delitem__(self, key)
557 557 self._list.remove(key)
558 558 def pop(self, key, *args, **kwargs):
559 559 dict.pop(self, key, *args, **kwargs)
560 560 try:
561 561 self._list.remove(key)
562 562 except ValueError:
563 563 pass
564 564 def keys(self):
565 565 return self._list[:]
566 566 def iterkeys(self):
567 567 return self._list.__iter__()
568 568 def iteritems(self):
569 569 for k in self._list:
570 570 yield k, self[k]
571 571 def insert(self, index, key, val):
572 572 self._list.insert(index, key)
573 573 dict.__setitem__(self, key, val)
574 574 def __repr__(self):
575 575 if not self:
576 576 return '%s()' % self.__class__.__name__
577 577 return '%s(%r)' % (self.__class__.__name__, self.items())
578 578
579 579 class _lrucachenode(object):
580 580 """A node in a doubly linked list.
581 581
582 582 Holds a reference to nodes on either side as well as a key-value
583 583 pair for the dictionary entry.
584 584 """
585 585 __slots__ = (u'next', u'prev', u'key', u'value')
586 586
587 587 def __init__(self):
588 588 self.next = None
589 589 self.prev = None
590 590
591 591 self.key = _notset
592 592 self.value = None
593 593
594 594 def markempty(self):
595 595 """Mark the node as emptied."""
596 596 self.key = _notset
597 597
598 598 class lrucachedict(object):
599 599 """Dict that caches most recent accesses and sets.
600 600
601 601 The dict consists of an actual backing dict - indexed by original
602 602 key - and a doubly linked circular list defining the order of entries in
603 603 the cache.
604 604
605 605 The head node is the newest entry in the cache. If the cache is full,
606 606 we recycle head.prev and make it the new head. Cache accesses result in
607 607 the node being moved to before the existing head and being marked as the
608 608 new head node.
609 609 """
610 610 def __init__(self, max):
611 611 self._cache = {}
612 612
613 613 self._head = head = _lrucachenode()
614 614 head.prev = head
615 615 head.next = head
616 616 self._size = 1
617 617 self._capacity = max
618 618
619 619 def __len__(self):
620 620 return len(self._cache)
621 621
622 622 def __contains__(self, k):
623 623 return k in self._cache
624 624
625 625 def __iter__(self):
626 626 # We don't have to iterate in cache order, but why not.
627 627 n = self._head
628 628 for i in range(len(self._cache)):
629 629 yield n.key
630 630 n = n.next
631 631
632 632 def __getitem__(self, k):
633 633 node = self._cache[k]
634 634 self._movetohead(node)
635 635 return node.value
636 636
637 637 def __setitem__(self, k, v):
638 638 node = self._cache.get(k)
639 639 # Replace existing value and mark as newest.
640 640 if node is not None:
641 641 node.value = v
642 642 self._movetohead(node)
643 643 return
644 644
645 645 if self._size < self._capacity:
646 646 node = self._addcapacity()
647 647 else:
648 648 # Grab the last/oldest item.
649 649 node = self._head.prev
650 650
651 651 # At capacity. Kill the old entry.
652 652 if node.key is not _notset:
653 653 del self._cache[node.key]
654 654
655 655 node.key = k
656 656 node.value = v
657 657 self._cache[k] = node
658 658 # And mark it as newest entry. No need to adjust order since it
659 659 # is already self._head.prev.
660 660 self._head = node
661 661
662 662 def __delitem__(self, k):
663 663 node = self._cache.pop(k)
664 664 node.markempty()
665 665
666 666 # Temporarily mark as newest item before re-adjusting head to make
667 667 # this node the oldest item.
668 668 self._movetohead(node)
669 669 self._head = node.next
670 670
671 671 # Additional dict methods.
672 672
673 673 def get(self, k, default=None):
674 674 try:
675 675 return self._cache[k].value
676 676 except KeyError:
677 677 return default
678 678
679 679 def clear(self):
680 680 n = self._head
681 681 while n.key is not _notset:
682 682 n.markempty()
683 683 n = n.next
684 684
685 685 self._cache.clear()
686 686
687 687 def copy(self):
688 688 result = lrucachedict(self._capacity)
689 689 n = self._head.prev
690 690 # Iterate in oldest-to-newest order, so the copy has the right ordering
691 691 for i in range(len(self._cache)):
692 692 result[n.key] = n.value
693 693 n = n.prev
694 694 return result
695 695
696 696 def _movetohead(self, node):
697 697 """Mark a node as the newest, making it the new head.
698 698
699 699 When a node is accessed, it becomes the freshest entry in the LRU
700 700 list, which is denoted by self._head.
701 701
702 702 Visually, let's make ``N`` the new head node (* denotes head):
703 703
704 704 previous/oldest <-> head <-> next/next newest
705 705
706 706 ----<->--- A* ---<->-----
707 707 | |
708 708 E <-> D <-> N <-> C <-> B
709 709
710 710 To:
711 711
712 712 ----<->--- N* ---<->-----
713 713 | |
714 714 E <-> D <-> C <-> B <-> A
715 715
716 716 This requires the following moves:
717 717
718 718 C.next = D (node.prev.next = node.next)
719 719 D.prev = C (node.next.prev = node.prev)
720 720 E.next = N (head.prev.next = node)
721 721 N.prev = E (node.prev = head.prev)
722 722 N.next = A (node.next = head)
723 723 A.prev = N (head.prev = node)
724 724 """
725 725 head = self._head
726 726 # C.next = D
727 727 node.prev.next = node.next
728 728 # D.prev = C
729 729 node.next.prev = node.prev
730 730 # N.prev = E
731 731 node.prev = head.prev
732 732 # N.next = A
733 733 # It is tempting to do just "head" here, however if node is
734 734 # adjacent to head, this will do bad things.
735 735 node.next = head.prev.next
736 736 # E.next = N
737 737 node.next.prev = node
738 738 # A.prev = N
739 739 node.prev.next = node
740 740
741 741 self._head = node
742 742
743 743 def _addcapacity(self):
744 744 """Add a node to the circular linked list.
745 745
746 746 The new node is inserted before the head node.
747 747 """
748 748 head = self._head
749 749 node = _lrucachenode()
750 750 head.prev.next = node
751 751 node.prev = head.prev
752 752 node.next = head
753 753 head.prev = node
754 754 self._size += 1
755 755 return node
756 756
757 757 def lrucachefunc(func):
758 758 '''cache most recent results of function calls'''
759 759 cache = {}
760 760 order = collections.deque()
761 761 if func.__code__.co_argcount == 1:
762 762 def f(arg):
763 763 if arg not in cache:
764 764 if len(cache) > 20:
765 765 del cache[order.popleft()]
766 766 cache[arg] = func(arg)
767 767 else:
768 768 order.remove(arg)
769 769 order.append(arg)
770 770 return cache[arg]
771 771 else:
772 772 def f(*args):
773 773 if args not in cache:
774 774 if len(cache) > 20:
775 775 del cache[order.popleft()]
776 776 cache[args] = func(*args)
777 777 else:
778 778 order.remove(args)
779 779 order.append(args)
780 780 return cache[args]
781 781
782 782 return f
783 783
784 784 class propertycache(object):
785 785 def __init__(self, func):
786 786 self.func = func
787 787 self.name = func.__name__
788 788 def __get__(self, obj, type=None):
789 789 result = self.func(obj)
790 790 self.cachevalue(obj, result)
791 791 return result
792 792
793 793 def cachevalue(self, obj, value):
794 794 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
795 795 obj.__dict__[self.name] = value
796 796
797 797 def pipefilter(s, cmd):
798 798 '''filter string S through command CMD, returning its output'''
799 799 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
800 800 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
801 801 pout, perr = p.communicate(s)
802 802 return pout
803 803
804 804 def tempfilter(s, cmd):
805 805 '''filter string S through a pair of temporary files with CMD.
806 806 CMD is used as a template to create the real command to be run,
807 807 with the strings INFILE and OUTFILE replaced by the real names of
808 808 the temporary files generated.'''
809 809 inname, outname = None, None
810 810 try:
811 811 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
812 812 fp = os.fdopen(infd, pycompat.sysstr('wb'))
813 813 fp.write(s)
814 814 fp.close()
815 815 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
816 816 os.close(outfd)
817 817 cmd = cmd.replace('INFILE', inname)
818 818 cmd = cmd.replace('OUTFILE', outname)
819 819 code = os.system(cmd)
820 820 if pycompat.sysplatform == 'OpenVMS' and code & 1:
821 821 code = 0
822 822 if code:
823 823 raise Abort(_("command '%s' failed: %s") %
824 824 (cmd, explainexit(code)))
825 825 return readfile(outname)
826 826 finally:
827 827 try:
828 828 if inname:
829 829 os.unlink(inname)
830 830 except OSError:
831 831 pass
832 832 try:
833 833 if outname:
834 834 os.unlink(outname)
835 835 except OSError:
836 836 pass
837 837
838 838 filtertable = {
839 839 'tempfile:': tempfilter,
840 840 'pipe:': pipefilter,
841 841 }
842 842
843 843 def filter(s, cmd):
844 844 "filter a string through a command that transforms its input to its output"
845 845 for name, fn in filtertable.iteritems():
846 846 if cmd.startswith(name):
847 847 return fn(s, cmd[len(name):].lstrip())
848 848 return pipefilter(s, cmd)
849 849
850 850 def binary(s):
851 851 """return true if a string is binary data"""
852 852 return bool(s and '\0' in s)
853 853
854 854 def increasingchunks(source, min=1024, max=65536):
855 855 '''return no less than min bytes per chunk while data remains,
856 856 doubling min after each chunk until it reaches max'''
857 857 def log2(x):
858 858 if not x:
859 859 return 0
860 860 i = 0
861 861 while x:
862 862 x >>= 1
863 863 i += 1
864 864 return i - 1
865 865
866 866 buf = []
867 867 blen = 0
868 868 for chunk in source:
869 869 buf.append(chunk)
870 870 blen += len(chunk)
871 871 if blen >= min:
872 872 if min < max:
873 873 min = min << 1
874 874 nmin = 1 << log2(blen)
875 875 if nmin > min:
876 876 min = nmin
877 877 if min > max:
878 878 min = max
879 879 yield ''.join(buf)
880 880 blen = 0
881 881 buf = []
882 882 if buf:
883 883 yield ''.join(buf)
884 884
885 885 Abort = error.Abort
886 886
887 887 def always(fn):
888 888 return True
889 889
890 890 def never(fn):
891 891 return False
892 892
893 893 def nogc(func):
894 894 """disable garbage collector
895 895
896 896 Python's garbage collector triggers a GC each time a certain number of
897 897 container objects (the number being defined by gc.get_threshold()) are
898 898 allocated even when marked not to be tracked by the collector. Tracking has
899 899 no effect on when GCs are triggered, only on what objects the GC looks
900 900 into. As a workaround, disable GC while building complex (huge)
901 901 containers.
902 902
903 903 This garbage collector issue have been fixed in 2.7.
904 904 """
905 905 if sys.version_info >= (2, 7):
906 906 return func
907 907 def wrapper(*args, **kwargs):
908 908 gcenabled = gc.isenabled()
909 909 gc.disable()
910 910 try:
911 911 return func(*args, **kwargs)
912 912 finally:
913 913 if gcenabled:
914 914 gc.enable()
915 915 return wrapper
916 916
917 917 def pathto(root, n1, n2):
918 918 '''return the relative path from one place to another.
919 919 root should use os.sep to separate directories
920 920 n1 should use os.sep to separate directories
921 921 n2 should use "/" to separate directories
922 922 returns an os.sep-separated path.
923 923
924 924 If n1 is a relative path, it's assumed it's
925 925 relative to root.
926 926 n2 should always be relative to root.
927 927 '''
928 928 if not n1:
929 929 return localpath(n2)
930 930 if os.path.isabs(n1):
931 931 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
932 932 return os.path.join(root, localpath(n2))
933 933 n2 = '/'.join((pconvert(root), n2))
934 934 a, b = splitpath(n1), n2.split('/')
935 935 a.reverse()
936 936 b.reverse()
937 937 while a and b and a[-1] == b[-1]:
938 938 a.pop()
939 939 b.pop()
940 940 b.reverse()
941 941 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
942 942
943 943 def mainfrozen():
944 944 """return True if we are a frozen executable.
945 945
946 946 The code supports py2exe (most common, Windows only) and tools/freeze
947 947 (portable, not much used).
948 948 """
949 949 return (safehasattr(sys, "frozen") or # new py2exe
950 950 safehasattr(sys, "importers") or # old py2exe
951 951 imp.is_frozen(u"__main__")) # tools/freeze
952 952
953 953 # the location of data files matching the source code
954 954 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
955 955 # executable version (py2exe) doesn't support __file__
956 956 datapath = os.path.dirname(pycompat.sysexecutable)
957 957 else:
958 958 datapath = os.path.dirname(pycompat.fsencode(__file__))
959 959
960 960 i18n.setdatapath(datapath)
961 961
962 962 _hgexecutable = None
963 963
964 964 def hgexecutable():
965 965 """return location of the 'hg' executable.
966 966
967 967 Defaults to $HG or 'hg' in the search path.
968 968 """
969 969 if _hgexecutable is None:
970 970 hg = encoding.environ.get('HG')
971 971 mainmod = sys.modules['__main__']
972 972 if hg:
973 973 _sethgexecutable(hg)
974 974 elif mainfrozen():
975 975 if getattr(sys, 'frozen', None) == 'macosx_app':
976 976 # Env variable set by py2app
977 977 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
978 978 else:
979 979 _sethgexecutable(pycompat.sysexecutable)
980 980 elif (os.path.basename(
981 981 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
982 982 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
983 983 else:
984 984 exe = findexe('hg') or os.path.basename(sys.argv[0])
985 985 _sethgexecutable(exe)
986 986 return _hgexecutable
987 987
988 988 def _sethgexecutable(path):
989 989 """set location of the 'hg' executable"""
990 990 global _hgexecutable
991 991 _hgexecutable = path
992 992
993 993 def _isstdout(f):
994 994 fileno = getattr(f, 'fileno', None)
995 995 return fileno and fileno() == sys.__stdout__.fileno()
996 996
997 997 def shellenviron(environ=None):
998 998 """return environ with optional override, useful for shelling out"""
999 999 def py2shell(val):
1000 1000 'convert python object into string that is useful to shell'
1001 1001 if val is None or val is False:
1002 1002 return '0'
1003 1003 if val is True:
1004 1004 return '1'
1005 1005 return str(val)
1006 1006 env = dict(encoding.environ)
1007 1007 if environ:
1008 1008 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1009 1009 env['HG'] = hgexecutable()
1010 1010 return env
1011 1011
1012 1012 def system(cmd, environ=None, cwd=None, out=None):
1013 1013 '''enhanced shell command execution.
1014 1014 run with environment maybe modified, maybe in different dir.
1015 1015
1016 1016 if out is specified, it is assumed to be a file-like object that has a
1017 1017 write() method. stdout and stderr will be redirected to out.'''
1018 1018 try:
1019 1019 stdout.flush()
1020 1020 except Exception:
1021 1021 pass
1022 1022 cmd = quotecommand(cmd)
1023 1023 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1024 1024 and sys.version_info[1] < 7):
1025 1025 # subprocess kludge to work around issues in half-baked Python
1026 1026 # ports, notably bichued/python:
1027 1027 if not cwd is None:
1028 1028 os.chdir(cwd)
1029 1029 rc = os.system(cmd)
1030 1030 else:
1031 1031 env = shellenviron(environ)
1032 1032 if out is None or _isstdout(out):
1033 1033 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1034 1034 env=env, cwd=cwd)
1035 1035 else:
1036 1036 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1037 1037 env=env, cwd=cwd, stdout=subprocess.PIPE,
1038 1038 stderr=subprocess.STDOUT)
1039 1039 for line in iter(proc.stdout.readline, ''):
1040 1040 out.write(line)
1041 1041 proc.wait()
1042 1042 rc = proc.returncode
1043 1043 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1044 1044 rc = 0
1045 1045 return rc
1046 1046
1047 1047 def checksignature(func):
1048 1048 '''wrap a function with code to check for calling errors'''
1049 1049 def check(*args, **kwargs):
1050 1050 try:
1051 1051 return func(*args, **kwargs)
1052 1052 except TypeError:
1053 1053 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1054 1054 raise error.SignatureError
1055 1055 raise
1056 1056
1057 1057 return check
1058 1058
1059 1059 # Hardlinks are problematic on CIFS, do not allow hardlinks
1060 1060 # until we find a way to work around it cleanly (issue4546).
1061 1061 # This is a variable so extensions can opt-in to using them.
1062 1062 allowhardlinks = False
1063 1063
1064 1064 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1065 1065 '''copy a file, preserving mode and optionally other stat info like
1066 1066 atime/mtime
1067 1067
1068 1068 checkambig argument is used with filestat, and is useful only if
1069 1069 destination file is guarded by any lock (e.g. repo.lock or
1070 1070 repo.wlock).
1071 1071
1072 1072 copystat and checkambig should be exclusive.
1073 1073 '''
1074 1074 assert not (copystat and checkambig)
1075 1075 oldstat = None
1076 1076 if os.path.lexists(dest):
1077 1077 if checkambig:
1078 1078 oldstat = checkambig and filestat(dest)
1079 1079 unlink(dest)
1080 1080 if allowhardlinks and hardlink:
1081 1081 try:
1082 1082 oslink(src, dest)
1083 1083 return
1084 1084 except (IOError, OSError):
1085 1085 pass # fall back to normal copy
1086 1086 if os.path.islink(src):
1087 1087 os.symlink(os.readlink(src), dest)
1088 1088 # copytime is ignored for symlinks, but in general copytime isn't needed
1089 1089 # for them anyway
1090 1090 else:
1091 1091 try:
1092 1092 shutil.copyfile(src, dest)
1093 1093 if copystat:
1094 1094 # copystat also copies mode
1095 1095 shutil.copystat(src, dest)
1096 1096 else:
1097 1097 shutil.copymode(src, dest)
1098 1098 if oldstat and oldstat.stat:
1099 1099 newstat = filestat(dest)
1100 1100 if newstat.isambig(oldstat):
1101 1101 # stat of copied file is ambiguous to original one
1102 1102 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1103 1103 os.utime(dest, (advanced, advanced))
1104 1104 except shutil.Error as inst:
1105 1105 raise Abort(str(inst))
1106 1106
1107 1107 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1108 1108 """Copy a directory tree using hardlinks if possible."""
1109 1109 num = 0
1110 1110
1111 1111 if hardlink is None:
1112 1112 hardlink = (os.stat(src).st_dev ==
1113 1113 os.stat(os.path.dirname(dst)).st_dev)
1114 1114 if hardlink:
1115 1115 topic = _('linking')
1116 1116 else:
1117 1117 topic = _('copying')
1118 1118
1119 1119 if os.path.isdir(src):
1120 1120 os.mkdir(dst)
1121 1121 for name, kind in osutil.listdir(src):
1122 1122 srcname = os.path.join(src, name)
1123 1123 dstname = os.path.join(dst, name)
1124 1124 def nprog(t, pos):
1125 1125 if pos is not None:
1126 1126 return progress(t, pos + num)
1127 1127 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1128 1128 num += n
1129 1129 else:
1130 1130 if hardlink:
1131 1131 try:
1132 1132 oslink(src, dst)
1133 1133 except (IOError, OSError):
1134 1134 hardlink = False
1135 1135 shutil.copy(src, dst)
1136 1136 else:
1137 1137 shutil.copy(src, dst)
1138 1138 num += 1
1139 1139 progress(topic, num)
1140 1140 progress(topic, None)
1141 1141
1142 1142 return hardlink, num
1143 1143
1144 1144 _winreservednames = '''con prn aux nul
1145 1145 com1 com2 com3 com4 com5 com6 com7 com8 com9
1146 1146 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1147 1147 _winreservedchars = ':*?"<>|'
1148 1148 def checkwinfilename(path):
1149 1149 r'''Check that the base-relative path is a valid filename on Windows.
1150 1150 Returns None if the path is ok, or a UI string describing the problem.
1151 1151
1152 1152 >>> checkwinfilename("just/a/normal/path")
1153 1153 >>> checkwinfilename("foo/bar/con.xml")
1154 1154 "filename contains 'con', which is reserved on Windows"
1155 1155 >>> checkwinfilename("foo/con.xml/bar")
1156 1156 "filename contains 'con', which is reserved on Windows"
1157 1157 >>> checkwinfilename("foo/bar/xml.con")
1158 1158 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1159 1159 "filename contains 'AUX', which is reserved on Windows"
1160 1160 >>> checkwinfilename("foo/bar/bla:.txt")
1161 1161 "filename contains ':', which is reserved on Windows"
1162 1162 >>> checkwinfilename("foo/bar/b\07la.txt")
1163 1163 "filename contains '\\x07', which is invalid on Windows"
1164 1164 >>> checkwinfilename("foo/bar/bla ")
1165 1165 "filename ends with ' ', which is not allowed on Windows"
1166 1166 >>> checkwinfilename("../bar")
1167 1167 >>> checkwinfilename("foo\\")
1168 1168 "filename ends with '\\', which is invalid on Windows"
1169 1169 >>> checkwinfilename("foo\\/bar")
1170 1170 "directory name ends with '\\', which is invalid on Windows"
1171 1171 '''
1172 1172 if path.endswith('\\'):
1173 1173 return _("filename ends with '\\', which is invalid on Windows")
1174 1174 if '\\/' in path:
1175 1175 return _("directory name ends with '\\', which is invalid on Windows")
1176 1176 for n in path.replace('\\', '/').split('/'):
1177 1177 if not n:
1178 1178 continue
1179 1179 for c in n:
1180 1180 if c in _winreservedchars:
1181 1181 return _("filename contains '%s', which is reserved "
1182 1182 "on Windows") % c
1183 1183 if ord(c) <= 31:
1184 1184 return _("filename contains %r, which is invalid "
1185 1185 "on Windows") % c
1186 1186 base = n.split('.')[0]
1187 1187 if base and base.lower() in _winreservednames:
1188 1188 return _("filename contains '%s', which is reserved "
1189 1189 "on Windows") % base
1190 1190 t = n[-1]
1191 1191 if t in '. ' and n not in '..':
1192 1192 return _("filename ends with '%s', which is not allowed "
1193 1193 "on Windows") % t
1194 1194
1195 1195 if pycompat.osname == 'nt':
1196 1196 checkosfilename = checkwinfilename
1197 1197 timer = time.clock
1198 1198 else:
1199 1199 checkosfilename = platform.checkosfilename
1200 1200 timer = time.time
1201 1201
1202 1202 if safehasattr(time, "perf_counter"):
1203 1203 timer = time.perf_counter
1204 1204
1205 1205 def makelock(info, pathname):
1206 1206 try:
1207 1207 return os.symlink(info, pathname)
1208 1208 except OSError as why:
1209 1209 if why.errno == errno.EEXIST:
1210 1210 raise
1211 1211 except AttributeError: # no symlink in os
1212 1212 pass
1213 1213
1214 1214 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1215 1215 os.write(ld, info)
1216 1216 os.close(ld)
1217 1217
1218 1218 def readlock(pathname):
1219 1219 try:
1220 1220 return os.readlink(pathname)
1221 1221 except OSError as why:
1222 1222 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1223 1223 raise
1224 1224 except AttributeError: # no symlink in os
1225 1225 pass
1226 1226 fp = posixfile(pathname)
1227 1227 r = fp.read()
1228 1228 fp.close()
1229 1229 return r
1230 1230
1231 1231 def fstat(fp):
1232 1232 '''stat file object that may not have fileno method.'''
1233 1233 try:
1234 1234 return os.fstat(fp.fileno())
1235 1235 except AttributeError:
1236 1236 return os.stat(fp.name)
1237 1237
1238 1238 # File system features
1239 1239
1240 1240 def fscasesensitive(path):
1241 1241 """
1242 1242 Return true if the given path is on a case-sensitive filesystem
1243 1243
1244 1244 Requires a path (like /foo/.hg) ending with a foldable final
1245 1245 directory component.
1246 1246 """
1247 1247 s1 = os.lstat(path)
1248 1248 d, b = os.path.split(path)
1249 1249 b2 = b.upper()
1250 1250 if b == b2:
1251 1251 b2 = b.lower()
1252 1252 if b == b2:
1253 1253 return True # no evidence against case sensitivity
1254 1254 p2 = os.path.join(d, b2)
1255 1255 try:
1256 1256 s2 = os.lstat(p2)
1257 1257 if s2 == s1:
1258 1258 return False
1259 1259 return True
1260 1260 except OSError:
1261 1261 return True
1262 1262
1263 1263 try:
1264 1264 import re2
1265 1265 _re2 = None
1266 1266 except ImportError:
1267 1267 _re2 = False
1268 1268
1269 1269 class _re(object):
1270 1270 def _checkre2(self):
1271 1271 global _re2
1272 1272 try:
1273 1273 # check if match works, see issue3964
1274 1274 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1275 1275 except ImportError:
1276 1276 _re2 = False
1277 1277
1278 1278 def compile(self, pat, flags=0):
1279 1279 '''Compile a regular expression, using re2 if possible
1280 1280
1281 1281 For best performance, use only re2-compatible regexp features. The
1282 1282 only flags from the re module that are re2-compatible are
1283 1283 IGNORECASE and MULTILINE.'''
1284 1284 if _re2 is None:
1285 1285 self._checkre2()
1286 1286 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1287 1287 if flags & remod.IGNORECASE:
1288 1288 pat = '(?i)' + pat
1289 1289 if flags & remod.MULTILINE:
1290 1290 pat = '(?m)' + pat
1291 1291 try:
1292 1292 return re2.compile(pat)
1293 1293 except re2.error:
1294 1294 pass
1295 1295 return remod.compile(pat, flags)
1296 1296
1297 1297 @propertycache
1298 1298 def escape(self):
1299 1299 '''Return the version of escape corresponding to self.compile.
1300 1300
1301 1301 This is imperfect because whether re2 or re is used for a particular
1302 1302 function depends on the flags, etc, but it's the best we can do.
1303 1303 '''
1304 1304 global _re2
1305 1305 if _re2 is None:
1306 1306 self._checkre2()
1307 1307 if _re2:
1308 1308 return re2.escape
1309 1309 else:
1310 1310 return remod.escape
1311 1311
1312 1312 re = _re()
1313 1313
1314 1314 _fspathcache = {}
1315 1315 def fspath(name, root):
1316 1316 '''Get name in the case stored in the filesystem
1317 1317
1318 1318 The name should be relative to root, and be normcase-ed for efficiency.
1319 1319
1320 1320 Note that this function is unnecessary, and should not be
1321 1321 called, for case-sensitive filesystems (simply because it's expensive).
1322 1322
1323 1323 The root should be normcase-ed, too.
1324 1324 '''
1325 1325 def _makefspathcacheentry(dir):
1326 1326 return dict((normcase(n), n) for n in os.listdir(dir))
1327 1327
1328 1328 seps = pycompat.ossep
1329 1329 if pycompat.osaltsep:
1330 1330 seps = seps + pycompat.osaltsep
1331 1331 # Protect backslashes. This gets silly very quickly.
1332 1332 seps.replace('\\','\\\\')
1333 1333 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1334 1334 dir = os.path.normpath(root)
1335 1335 result = []
1336 1336 for part, sep in pattern.findall(name):
1337 1337 if sep:
1338 1338 result.append(sep)
1339 1339 continue
1340 1340
1341 1341 if dir not in _fspathcache:
1342 1342 _fspathcache[dir] = _makefspathcacheentry(dir)
1343 1343 contents = _fspathcache[dir]
1344 1344
1345 1345 found = contents.get(part)
1346 1346 if not found:
1347 1347 # retry "once per directory" per "dirstate.walk" which
1348 1348 # may take place for each patches of "hg qpush", for example
1349 1349 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1350 1350 found = contents.get(part)
1351 1351
1352 1352 result.append(found or part)
1353 1353 dir = os.path.join(dir, part)
1354 1354
1355 1355 return ''.join(result)
1356 1356
1357 1357 def checknlink(testfile):
1358 1358 '''check whether hardlink count reporting works properly'''
1359 1359
1360 1360 # testfile may be open, so we need a separate file for checking to
1361 1361 # work around issue2543 (or testfile may get lost on Samba shares)
1362 1362 f1 = testfile + ".hgtmp1"
1363 1363 if os.path.lexists(f1):
1364 1364 return False
1365 1365 try:
1366 1366 posixfile(f1, 'w').close()
1367 1367 except IOError:
1368 1368 try:
1369 1369 os.unlink(f1)
1370 1370 except OSError:
1371 1371 pass
1372 1372 return False
1373 1373
1374 1374 f2 = testfile + ".hgtmp2"
1375 1375 fd = None
1376 1376 try:
1377 1377 oslink(f1, f2)
1378 1378 # nlinks() may behave differently for files on Windows shares if
1379 1379 # the file is open.
1380 1380 fd = posixfile(f2)
1381 1381 return nlinks(f2) > 1
1382 1382 except OSError:
1383 1383 return False
1384 1384 finally:
1385 1385 if fd is not None:
1386 1386 fd.close()
1387 1387 for f in (f1, f2):
1388 1388 try:
1389 1389 os.unlink(f)
1390 1390 except OSError:
1391 1391 pass
1392 1392
1393 1393 def endswithsep(path):
1394 1394 '''Check path ends with os.sep or os.altsep.'''
1395 1395 return (path.endswith(pycompat.ossep)
1396 1396 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1397 1397
1398 1398 def splitpath(path):
1399 1399 '''Split path by os.sep.
1400 1400 Note that this function does not use os.altsep because this is
1401 1401 an alternative of simple "xxx.split(os.sep)".
1402 1402 It is recommended to use os.path.normpath() before using this
1403 1403 function if need.'''
1404 1404 return path.split(pycompat.ossep)
1405 1405
1406 1406 def gui():
1407 1407 '''Are we running in a GUI?'''
1408 1408 if pycompat.sysplatform == 'darwin':
1409 1409 if 'SSH_CONNECTION' in encoding.environ:
1410 1410 # handle SSH access to a box where the user is logged in
1411 1411 return False
1412 1412 elif getattr(osutil, 'isgui', None):
1413 1413 # check if a CoreGraphics session is available
1414 1414 return osutil.isgui()
1415 1415 else:
1416 1416 # pure build; use a safe default
1417 1417 return True
1418 1418 else:
1419 1419 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1420 1420
1421 1421 def mktempcopy(name, emptyok=False, createmode=None):
1422 1422 """Create a temporary file with the same contents from name
1423 1423
1424 1424 The permission bits are copied from the original file.
1425 1425
1426 1426 If the temporary file is going to be truncated immediately, you
1427 1427 can use emptyok=True as an optimization.
1428 1428
1429 1429 Returns the name of the temporary file.
1430 1430 """
1431 1431 d, fn = os.path.split(name)
1432 1432 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1433 1433 os.close(fd)
1434 1434 # Temporary files are created with mode 0600, which is usually not
1435 1435 # what we want. If the original file already exists, just copy
1436 1436 # its mode. Otherwise, manually obey umask.
1437 1437 copymode(name, temp, createmode)
1438 1438 if emptyok:
1439 1439 return temp
1440 1440 try:
1441 1441 try:
1442 1442 ifp = posixfile(name, "rb")
1443 1443 except IOError as inst:
1444 1444 if inst.errno == errno.ENOENT:
1445 1445 return temp
1446 1446 if not getattr(inst, 'filename', None):
1447 1447 inst.filename = name
1448 1448 raise
1449 1449 ofp = posixfile(temp, "wb")
1450 1450 for chunk in filechunkiter(ifp):
1451 1451 ofp.write(chunk)
1452 1452 ifp.close()
1453 1453 ofp.close()
1454 1454 except: # re-raises
1455 1455 try: os.unlink(temp)
1456 1456 except OSError: pass
1457 1457 raise
1458 1458 return temp
1459 1459
1460 1460 class filestat(object):
1461 1461 """help to exactly detect change of a file
1462 1462
1463 1463 'stat' attribute is result of 'os.stat()' if specified 'path'
1464 1464 exists. Otherwise, it is None. This can avoid preparative
1465 1465 'exists()' examination on client side of this class.
1466 1466 """
1467 1467 def __init__(self, path):
1468 1468 try:
1469 1469 self.stat = os.stat(path)
1470 1470 except OSError as err:
1471 1471 if err.errno != errno.ENOENT:
1472 1472 raise
1473 1473 self.stat = None
1474 1474
1475 1475 __hash__ = object.__hash__
1476 1476
1477 1477 def __eq__(self, old):
1478 1478 try:
1479 1479 # if ambiguity between stat of new and old file is
1480 1480 # avoided, comparison of size, ctime and mtime is enough
1481 1481 # to exactly detect change of a file regardless of platform
1482 1482 return (self.stat.st_size == old.stat.st_size and
1483 1483 self.stat.st_ctime == old.stat.st_ctime and
1484 1484 self.stat.st_mtime == old.stat.st_mtime)
1485 1485 except AttributeError:
1486 1486 return False
1487 1487
1488 1488 def isambig(self, old):
1489 1489 """Examine whether new (= self) stat is ambiguous against old one
1490 1490
1491 1491 "S[N]" below means stat of a file at N-th change:
1492 1492
1493 1493 - S[n-1].ctime < S[n].ctime: can detect change of a file
1494 1494 - S[n-1].ctime == S[n].ctime
1495 1495 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1496 1496 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1497 1497 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1498 1498 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1499 1499
1500 1500 Case (*2) above means that a file was changed twice or more at
1501 1501 same time in sec (= S[n-1].ctime), and comparison of timestamp
1502 1502 is ambiguous.
1503 1503
1504 1504 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1505 1505 timestamp is ambiguous".
1506 1506
1507 1507 But advancing mtime only in case (*2) doesn't work as
1508 1508 expected, because naturally advanced S[n].mtime in case (*1)
1509 1509 might be equal to manually advanced S[n-1 or earlier].mtime.
1510 1510
1511 1511 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1512 1512 treated as ambiguous regardless of mtime, to avoid overlooking
1513 1513 by confliction between such mtime.
1514 1514
1515 1515 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1516 1516 S[n].mtime", even if size of a file isn't changed.
1517 1517 """
1518 1518 try:
1519 1519 return (self.stat.st_ctime == old.stat.st_ctime)
1520 1520 except AttributeError:
1521 1521 return False
1522 1522
1523 1523 def avoidambig(self, path, old):
1524 1524 """Change file stat of specified path to avoid ambiguity
1525 1525
1526 1526 'old' should be previous filestat of 'path'.
1527 1527
1528 1528 This skips avoiding ambiguity, if a process doesn't have
1529 1529 appropriate privileges for 'path'.
1530 1530 """
1531 1531 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1532 1532 try:
1533 1533 os.utime(path, (advanced, advanced))
1534 1534 except OSError as inst:
1535 1535 if inst.errno == errno.EPERM:
1536 1536 # utime() on the file created by another user causes EPERM,
1537 1537 # if a process doesn't have appropriate privileges
1538 1538 return
1539 1539 raise
1540 1540
1541 1541 def __ne__(self, other):
1542 1542 return not self == other
1543 1543
1544 1544 class atomictempfile(object):
1545 1545 '''writable file object that atomically updates a file
1546 1546
1547 1547 All writes will go to a temporary copy of the original file. Call
1548 1548 close() when you are done writing, and atomictempfile will rename
1549 1549 the temporary copy to the original name, making the changes
1550 1550 visible. If the object is destroyed without being closed, all your
1551 1551 writes are discarded.
1552 1552
1553 1553 checkambig argument of constructor is used with filestat, and is
1554 1554 useful only if target file is guarded by any lock (e.g. repo.lock
1555 1555 or repo.wlock).
1556 1556 '''
1557 1557 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1558 1558 self.__name = name # permanent name
1559 1559 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1560 1560 createmode=createmode)
1561 1561 self._fp = posixfile(self._tempname, mode)
1562 1562 self._checkambig = checkambig
1563 1563
1564 1564 # delegated methods
1565 1565 self.read = self._fp.read
1566 1566 self.write = self._fp.write
1567 1567 self.seek = self._fp.seek
1568 1568 self.tell = self._fp.tell
1569 1569 self.fileno = self._fp.fileno
1570 1570
1571 1571 def close(self):
1572 1572 if not self._fp.closed:
1573 1573 self._fp.close()
1574 1574 filename = localpath(self.__name)
1575 1575 oldstat = self._checkambig and filestat(filename)
1576 1576 if oldstat and oldstat.stat:
1577 1577 rename(self._tempname, filename)
1578 1578 newstat = filestat(filename)
1579 1579 if newstat.isambig(oldstat):
1580 1580 # stat of changed file is ambiguous to original one
1581 1581 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1582 1582 os.utime(filename, (advanced, advanced))
1583 1583 else:
1584 1584 rename(self._tempname, filename)
1585 1585
1586 1586 def discard(self):
1587 1587 if not self._fp.closed:
1588 1588 try:
1589 1589 os.unlink(self._tempname)
1590 1590 except OSError:
1591 1591 pass
1592 1592 self._fp.close()
1593 1593
1594 1594 def __del__(self):
1595 1595 if safehasattr(self, '_fp'): # constructor actually did something
1596 1596 self.discard()
1597 1597
1598 1598 def __enter__(self):
1599 1599 return self
1600 1600
1601 1601 def __exit__(self, exctype, excvalue, traceback):
1602 1602 if exctype is not None:
1603 1603 self.discard()
1604 1604 else:
1605 1605 self.close()
1606 1606
1607 1607 def makedirs(name, mode=None, notindexed=False):
1608 1608 """recursive directory creation with parent mode inheritance
1609 1609
1610 1610 Newly created directories are marked as "not to be indexed by
1611 1611 the content indexing service", if ``notindexed`` is specified
1612 1612 for "write" mode access.
1613 1613 """
1614 1614 try:
1615 1615 makedir(name, notindexed)
1616 1616 except OSError as err:
1617 1617 if err.errno == errno.EEXIST:
1618 1618 return
1619 1619 if err.errno != errno.ENOENT or not name:
1620 1620 raise
1621 1621 parent = os.path.dirname(os.path.abspath(name))
1622 1622 if parent == name:
1623 1623 raise
1624 1624 makedirs(parent, mode, notindexed)
1625 1625 try:
1626 1626 makedir(name, notindexed)
1627 1627 except OSError as err:
1628 1628 # Catch EEXIST to handle races
1629 1629 if err.errno == errno.EEXIST:
1630 1630 return
1631 1631 raise
1632 1632 if mode is not None:
1633 1633 os.chmod(name, mode)
1634 1634
1635 1635 def readfile(path):
1636 1636 with open(path, 'rb') as fp:
1637 1637 return fp.read()
1638 1638
1639 1639 def writefile(path, text):
1640 1640 with open(path, 'wb') as fp:
1641 1641 fp.write(text)
1642 1642
1643 1643 def appendfile(path, text):
1644 1644 with open(path, 'ab') as fp:
1645 1645 fp.write(text)
1646 1646
1647 1647 class chunkbuffer(object):
1648 1648 """Allow arbitrary sized chunks of data to be efficiently read from an
1649 1649 iterator over chunks of arbitrary size."""
1650 1650
1651 1651 def __init__(self, in_iter):
1652 1652 """in_iter is the iterator that's iterating over the input chunks.
1653 1653 targetsize is how big a buffer to try to maintain."""
1654 1654 def splitbig(chunks):
1655 1655 for chunk in chunks:
1656 1656 if len(chunk) > 2**20:
1657 1657 pos = 0
1658 1658 while pos < len(chunk):
1659 1659 end = pos + 2 ** 18
1660 1660 yield chunk[pos:end]
1661 1661 pos = end
1662 1662 else:
1663 1663 yield chunk
1664 1664 self.iter = splitbig(in_iter)
1665 1665 self._queue = collections.deque()
1666 1666 self._chunkoffset = 0
1667 1667
1668 1668 def read(self, l=None):
1669 1669 """Read L bytes of data from the iterator of chunks of data.
1670 1670 Returns less than L bytes if the iterator runs dry.
1671 1671
1672 1672 If size parameter is omitted, read everything"""
1673 1673 if l is None:
1674 1674 return ''.join(self.iter)
1675 1675
1676 1676 left = l
1677 1677 buf = []
1678 1678 queue = self._queue
1679 1679 while left > 0:
1680 1680 # refill the queue
1681 1681 if not queue:
1682 1682 target = 2**18
1683 1683 for chunk in self.iter:
1684 1684 queue.append(chunk)
1685 1685 target -= len(chunk)
1686 1686 if target <= 0:
1687 1687 break
1688 1688 if not queue:
1689 1689 break
1690 1690
1691 1691 # The easy way to do this would be to queue.popleft(), modify the
1692 1692 # chunk (if necessary), then queue.appendleft(). However, for cases
1693 1693 # where we read partial chunk content, this incurs 2 dequeue
1694 1694 # mutations and creates a new str for the remaining chunk in the
1695 1695 # queue. Our code below avoids this overhead.
1696 1696
1697 1697 chunk = queue[0]
1698 1698 chunkl = len(chunk)
1699 1699 offset = self._chunkoffset
1700 1700
1701 1701 # Use full chunk.
1702 1702 if offset == 0 and left >= chunkl:
1703 1703 left -= chunkl
1704 1704 queue.popleft()
1705 1705 buf.append(chunk)
1706 1706 # self._chunkoffset remains at 0.
1707 1707 continue
1708 1708
1709 1709 chunkremaining = chunkl - offset
1710 1710
1711 1711 # Use all of unconsumed part of chunk.
1712 1712 if left >= chunkremaining:
1713 1713 left -= chunkremaining
1714 1714 queue.popleft()
1715 1715 # offset == 0 is enabled by block above, so this won't merely
1716 1716 # copy via ``chunk[0:]``.
1717 1717 buf.append(chunk[offset:])
1718 1718 self._chunkoffset = 0
1719 1719
1720 1720 # Partial chunk needed.
1721 1721 else:
1722 1722 buf.append(chunk[offset:offset + left])
1723 1723 self._chunkoffset += left
1724 1724 left -= chunkremaining
1725 1725
1726 1726 return ''.join(buf)
1727 1727
1728 1728 def filechunkiter(f, size=131072, limit=None):
1729 1729 """Create a generator that produces the data in the file size
1730 1730 (default 131072) bytes at a time, up to optional limit (default is
1731 1731 to read all data). Chunks may be less than size bytes if the
1732 1732 chunk is the last chunk in the file, or the file is a socket or
1733 1733 some other type of file that sometimes reads less data than is
1734 1734 requested."""
1735 1735 assert size >= 0
1736 1736 assert limit is None or limit >= 0
1737 1737 while True:
1738 1738 if limit is None:
1739 1739 nbytes = size
1740 1740 else:
1741 1741 nbytes = min(limit, size)
1742 1742 s = nbytes and f.read(nbytes)
1743 1743 if not s:
1744 1744 break
1745 1745 if limit:
1746 1746 limit -= len(s)
1747 1747 yield s
1748 1748
1749 1749 def makedate(timestamp=None):
1750 1750 '''Return a unix timestamp (or the current time) as a (unixtime,
1751 1751 offset) tuple based off the local timezone.'''
1752 1752 if timestamp is None:
1753 1753 timestamp = time.time()
1754 1754 if timestamp < 0:
1755 1755 hint = _("check your clock")
1756 1756 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1757 1757 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1758 1758 datetime.datetime.fromtimestamp(timestamp))
1759 1759 tz = delta.days * 86400 + delta.seconds
1760 1760 return timestamp, tz
1761 1761
1762 1762 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1763 1763 """represent a (unixtime, offset) tuple as a localized time.
1764 1764 unixtime is seconds since the epoch, and offset is the time zone's
1765 1765 number of seconds away from UTC.
1766 1766
1767 1767 >>> datestr((0, 0))
1768 1768 'Thu Jan 01 00:00:00 1970 +0000'
1769 1769 >>> datestr((42, 0))
1770 1770 'Thu Jan 01 00:00:42 1970 +0000'
1771 1771 >>> datestr((-42, 0))
1772 1772 'Wed Dec 31 23:59:18 1969 +0000'
1773 1773 >>> datestr((0x7fffffff, 0))
1774 1774 'Tue Jan 19 03:14:07 2038 +0000'
1775 1775 >>> datestr((-0x80000000, 0))
1776 1776 'Fri Dec 13 20:45:52 1901 +0000'
1777 1777 """
1778 1778 t, tz = date or makedate()
1779 1779 if "%1" in format or "%2" in format or "%z" in format:
1780 1780 sign = (tz > 0) and "-" or "+"
1781 1781 minutes = abs(tz) // 60
1782 1782 q, r = divmod(minutes, 60)
1783 1783 format = format.replace("%z", "%1%2")
1784 1784 format = format.replace("%1", "%c%02d" % (sign, q))
1785 1785 format = format.replace("%2", "%02d" % r)
1786 1786 d = t - tz
1787 1787 if d > 0x7fffffff:
1788 1788 d = 0x7fffffff
1789 1789 elif d < -0x80000000:
1790 1790 d = -0x80000000
1791 1791 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1792 1792 # because they use the gmtime() system call which is buggy on Windows
1793 1793 # for negative values.
1794 1794 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1795 1795 s = t.strftime(format)
1796 1796 return s
1797 1797
1798 1798 def shortdate(date=None):
1799 1799 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1800 1800 return datestr(date, format='%Y-%m-%d')
1801 1801
1802 1802 def parsetimezone(s):
1803 1803 """find a trailing timezone, if any, in string, and return a
1804 1804 (offset, remainder) pair"""
1805 1805
1806 1806 if s.endswith("GMT") or s.endswith("UTC"):
1807 1807 return 0, s[:-3].rstrip()
1808 1808
1809 1809 # Unix-style timezones [+-]hhmm
1810 1810 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1811 1811 sign = (s[-5] == "+") and 1 or -1
1812 1812 hours = int(s[-4:-2])
1813 1813 minutes = int(s[-2:])
1814 1814 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1815 1815
1816 1816 # ISO8601 trailing Z
1817 1817 if s.endswith("Z") and s[-2:-1].isdigit():
1818 1818 return 0, s[:-1]
1819 1819
1820 1820 # ISO8601-style [+-]hh:mm
1821 1821 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1822 1822 s[-5:-3].isdigit() and s[-2:].isdigit()):
1823 1823 sign = (s[-6] == "+") and 1 or -1
1824 1824 hours = int(s[-5:-3])
1825 1825 minutes = int(s[-2:])
1826 1826 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1827 1827
1828 1828 return None, s
1829 1829
1830 1830 def strdate(string, format, defaults=None):
1831 1831 """parse a localized time string and return a (unixtime, offset) tuple.
1832 1832 if the string cannot be parsed, ValueError is raised."""
1833 1833 defaults = defaults or {}
1834 1834
1835 1835 # NOTE: unixtime = localunixtime + offset
1836 1836 offset, date = parsetimezone(string)
1837 1837
1838 1838 # add missing elements from defaults
1839 1839 usenow = False # default to using biased defaults
1840 1840 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1841 1841 found = [True for p in part if ("%"+p) in format]
1842 1842 if not found:
1843 1843 date += "@" + defaults[part][usenow]
1844 1844 format += "@%" + part[0]
1845 1845 else:
1846 1846 # We've found a specific time element, less specific time
1847 1847 # elements are relative to today
1848 1848 usenow = True
1849 1849
1850 1850 timetuple = time.strptime(date, format)
1851 1851 localunixtime = int(calendar.timegm(timetuple))
1852 1852 if offset is None:
1853 1853 # local timezone
1854 1854 unixtime = int(time.mktime(timetuple))
1855 1855 offset = unixtime - localunixtime
1856 1856 else:
1857 1857 unixtime = localunixtime + offset
1858 1858 return unixtime, offset
1859 1859
1860 1860 def parsedate(date, formats=None, bias=None):
1861 1861 """parse a localized date/time and return a (unixtime, offset) tuple.
1862 1862
1863 1863 The date may be a "unixtime offset" string or in one of the specified
1864 1864 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1865 1865
1866 1866 >>> parsedate(' today ') == parsedate(\
1867 1867 datetime.date.today().strftime('%b %d'))
1868 1868 True
1869 1869 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1870 1870 datetime.timedelta(days=1)\
1871 1871 ).strftime('%b %d'))
1872 1872 True
1873 1873 >>> now, tz = makedate()
1874 1874 >>> strnow, strtz = parsedate('now')
1875 1875 >>> (strnow - now) < 1
1876 1876 True
1877 1877 >>> tz == strtz
1878 1878 True
1879 1879 """
1880 1880 if bias is None:
1881 1881 bias = {}
1882 1882 if not date:
1883 1883 return 0, 0
1884 1884 if isinstance(date, tuple) and len(date) == 2:
1885 1885 return date
1886 1886 if not formats:
1887 1887 formats = defaultdateformats
1888 1888 date = date.strip()
1889 1889
1890 1890 if date == 'now' or date == _('now'):
1891 1891 return makedate()
1892 1892 if date == 'today' or date == _('today'):
1893 1893 date = datetime.date.today().strftime('%b %d')
1894 1894 elif date == 'yesterday' or date == _('yesterday'):
1895 1895 date = (datetime.date.today() -
1896 1896 datetime.timedelta(days=1)).strftime('%b %d')
1897 1897
1898 1898 try:
1899 1899 when, offset = map(int, date.split(' '))
1900 1900 except ValueError:
1901 1901 # fill out defaults
1902 1902 now = makedate()
1903 1903 defaults = {}
1904 1904 for part in ("d", "mb", "yY", "HI", "M", "S"):
1905 1905 # this piece is for rounding the specific end of unknowns
1906 1906 b = bias.get(part)
1907 1907 if b is None:
1908 1908 if part[0] in "HMS":
1909 1909 b = "00"
1910 1910 else:
1911 1911 b = "0"
1912 1912
1913 1913 # this piece is for matching the generic end to today's date
1914 1914 n = datestr(now, "%" + part[0])
1915 1915
1916 1916 defaults[part] = (b, n)
1917 1917
1918 1918 for format in formats:
1919 1919 try:
1920 1920 when, offset = strdate(date, format, defaults)
1921 1921 except (ValueError, OverflowError):
1922 1922 pass
1923 1923 else:
1924 1924 break
1925 1925 else:
1926 1926 raise Abort(_('invalid date: %r') % date)
1927 1927 # validate explicit (probably user-specified) date and
1928 1928 # time zone offset. values must fit in signed 32 bits for
1929 1929 # current 32-bit linux runtimes. timezones go from UTC-12
1930 1930 # to UTC+14
1931 1931 if when < -0x80000000 or when > 0x7fffffff:
1932 1932 raise Abort(_('date exceeds 32 bits: %d') % when)
1933 1933 if offset < -50400 or offset > 43200:
1934 1934 raise Abort(_('impossible time zone offset: %d') % offset)
1935 1935 return when, offset
1936 1936
1937 1937 def matchdate(date):
1938 1938 """Return a function that matches a given date match specifier
1939 1939
1940 1940 Formats include:
1941 1941
1942 1942 '{date}' match a given date to the accuracy provided
1943 1943
1944 1944 '<{date}' on or before a given date
1945 1945
1946 1946 '>{date}' on or after a given date
1947 1947
1948 1948 >>> p1 = parsedate("10:29:59")
1949 1949 >>> p2 = parsedate("10:30:00")
1950 1950 >>> p3 = parsedate("10:30:59")
1951 1951 >>> p4 = parsedate("10:31:00")
1952 1952 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1953 1953 >>> f = matchdate("10:30")
1954 1954 >>> f(p1[0])
1955 1955 False
1956 1956 >>> f(p2[0])
1957 1957 True
1958 1958 >>> f(p3[0])
1959 1959 True
1960 1960 >>> f(p4[0])
1961 1961 False
1962 1962 >>> f(p5[0])
1963 1963 False
1964 1964 """
1965 1965
1966 1966 def lower(date):
1967 1967 d = {'mb': "1", 'd': "1"}
1968 1968 return parsedate(date, extendeddateformats, d)[0]
1969 1969
1970 1970 def upper(date):
1971 1971 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1972 1972 for days in ("31", "30", "29"):
1973 1973 try:
1974 1974 d["d"] = days
1975 1975 return parsedate(date, extendeddateformats, d)[0]
1976 1976 except Abort:
1977 1977 pass
1978 1978 d["d"] = "28"
1979 1979 return parsedate(date, extendeddateformats, d)[0]
1980 1980
1981 1981 date = date.strip()
1982 1982
1983 1983 if not date:
1984 1984 raise Abort(_("dates cannot consist entirely of whitespace"))
1985 1985 elif date[0] == "<":
1986 1986 if not date[1:]:
1987 1987 raise Abort(_("invalid day spec, use '<DATE'"))
1988 1988 when = upper(date[1:])
1989 1989 return lambda x: x <= when
1990 1990 elif date[0] == ">":
1991 1991 if not date[1:]:
1992 1992 raise Abort(_("invalid day spec, use '>DATE'"))
1993 1993 when = lower(date[1:])
1994 1994 return lambda x: x >= when
1995 1995 elif date[0] == "-":
1996 1996 try:
1997 1997 days = int(date[1:])
1998 1998 except ValueError:
1999 1999 raise Abort(_("invalid day spec: %s") % date[1:])
2000 2000 if days < 0:
2001 2001 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2002 2002 % date[1:])
2003 2003 when = makedate()[0] - days * 3600 * 24
2004 2004 return lambda x: x >= when
2005 2005 elif " to " in date:
2006 2006 a, b = date.split(" to ")
2007 2007 start, stop = lower(a), upper(b)
2008 2008 return lambda x: x >= start and x <= stop
2009 2009 else:
2010 2010 start, stop = lower(date), upper(date)
2011 2011 return lambda x: x >= start and x <= stop
2012 2012
2013 2013 def stringmatcher(pattern, casesensitive=True):
2014 2014 """
2015 2015 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2016 2016 returns the matcher name, pattern, and matcher function.
2017 2017 missing or unknown prefixes are treated as literal matches.
2018 2018
2019 2019 helper for tests:
2020 2020 >>> def test(pattern, *tests):
2021 2021 ... kind, pattern, matcher = stringmatcher(pattern)
2022 2022 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2023 2023 >>> def itest(pattern, *tests):
2024 2024 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2025 2025 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2026 2026
2027 2027 exact matching (no prefix):
2028 2028 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2029 2029 ('literal', 'abcdefg', [False, False, True])
2030 2030
2031 2031 regex matching ('re:' prefix)
2032 2032 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2033 2033 ('re', 'a.+b', [False, False, True])
2034 2034
2035 2035 force exact matches ('literal:' prefix)
2036 2036 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2037 2037 ('literal', 're:foobar', [False, True])
2038 2038
2039 2039 unknown prefixes are ignored and treated as literals
2040 2040 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2041 2041 ('literal', 'foo:bar', [False, False, True])
2042 2042
2043 2043 case insensitive regex matches
2044 2044 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2045 2045 ('re', 'A.+b', [False, False, True])
2046 2046
2047 2047 case insensitive literal matches
2048 2048 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2049 2049 ('literal', 'ABCDEFG', [False, False, True])
2050 2050 """
2051 2051 if pattern.startswith('re:'):
2052 2052 pattern = pattern[3:]
2053 2053 try:
2054 2054 flags = 0
2055 2055 if not casesensitive:
2056 2056 flags = remod.I
2057 2057 regex = remod.compile(pattern, flags)
2058 2058 except remod.error as e:
2059 2059 raise error.ParseError(_('invalid regular expression: %s')
2060 2060 % e)
2061 2061 return 're', pattern, regex.search
2062 2062 elif pattern.startswith('literal:'):
2063 2063 pattern = pattern[8:]
2064 2064
2065 2065 match = pattern.__eq__
2066 2066
2067 2067 if not casesensitive:
2068 2068 ipat = encoding.lower(pattern)
2069 2069 match = lambda s: ipat == encoding.lower(s)
2070 2070 return 'literal', pattern, match
2071 2071
2072 2072 def shortuser(user):
2073 2073 """Return a short representation of a user name or email address."""
2074 2074 f = user.find('@')
2075 2075 if f >= 0:
2076 2076 user = user[:f]
2077 2077 f = user.find('<')
2078 2078 if f >= 0:
2079 2079 user = user[f + 1:]
2080 2080 f = user.find(' ')
2081 2081 if f >= 0:
2082 2082 user = user[:f]
2083 2083 f = user.find('.')
2084 2084 if f >= 0:
2085 2085 user = user[:f]
2086 2086 return user
2087 2087
2088 2088 def emailuser(user):
2089 2089 """Return the user portion of an email address."""
2090 2090 f = user.find('@')
2091 2091 if f >= 0:
2092 2092 user = user[:f]
2093 2093 f = user.find('<')
2094 2094 if f >= 0:
2095 2095 user = user[f + 1:]
2096 2096 return user
2097 2097
2098 2098 def email(author):
2099 2099 '''get email of author.'''
2100 2100 r = author.find('>')
2101 2101 if r == -1:
2102 2102 r = None
2103 2103 return author[author.find('<') + 1:r]
2104 2104
2105 2105 def ellipsis(text, maxlength=400):
2106 2106 """Trim string to at most maxlength (default: 400) columns in display."""
2107 2107 return encoding.trim(text, maxlength, ellipsis='...')
2108 2108
2109 2109 def unitcountfn(*unittable):
2110 2110 '''return a function that renders a readable count of some quantity'''
2111 2111
2112 2112 def go(count):
2113 2113 for multiplier, divisor, format in unittable:
2114 2114 if count >= divisor * multiplier:
2115 2115 return format % (count / float(divisor))
2116 2116 return unittable[-1][2] % count
2117 2117
2118 2118 return go
2119 2119
2120 2120 bytecount = unitcountfn(
2121 2121 (100, 1 << 30, _('%.0f GB')),
2122 2122 (10, 1 << 30, _('%.1f GB')),
2123 2123 (1, 1 << 30, _('%.2f GB')),
2124 2124 (100, 1 << 20, _('%.0f MB')),
2125 2125 (10, 1 << 20, _('%.1f MB')),
2126 2126 (1, 1 << 20, _('%.2f MB')),
2127 2127 (100, 1 << 10, _('%.0f KB')),
2128 2128 (10, 1 << 10, _('%.1f KB')),
2129 2129 (1, 1 << 10, _('%.2f KB')),
2130 2130 (1, 1, _('%.0f bytes')),
2131 2131 )
2132 2132
2133 2133 def uirepr(s):
2134 2134 # Avoid double backslash in Windows path repr()
2135 2135 return repr(s).replace('\\\\', '\\')
2136 2136
2137 2137 # delay import of textwrap
2138 2138 def MBTextWrapper(**kwargs):
2139 2139 class tw(textwrap.TextWrapper):
2140 2140 """
2141 2141 Extend TextWrapper for width-awareness.
2142 2142
2143 2143 Neither number of 'bytes' in any encoding nor 'characters' is
2144 2144 appropriate to calculate terminal columns for specified string.
2145 2145
2146 2146 Original TextWrapper implementation uses built-in 'len()' directly,
2147 2147 so overriding is needed to use width information of each characters.
2148 2148
2149 2149 In addition, characters classified into 'ambiguous' width are
2150 2150 treated as wide in East Asian area, but as narrow in other.
2151 2151
2152 2152 This requires use decision to determine width of such characters.
2153 2153 """
2154 2154 def _cutdown(self, ucstr, space_left):
2155 2155 l = 0
2156 2156 colwidth = encoding.ucolwidth
2157 2157 for i in xrange(len(ucstr)):
2158 2158 l += colwidth(ucstr[i])
2159 2159 if space_left < l:
2160 2160 return (ucstr[:i], ucstr[i:])
2161 2161 return ucstr, ''
2162 2162
2163 2163 # overriding of base class
2164 2164 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2165 2165 space_left = max(width - cur_len, 1)
2166 2166
2167 2167 if self.break_long_words:
2168 2168 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2169 2169 cur_line.append(cut)
2170 2170 reversed_chunks[-1] = res
2171 2171 elif not cur_line:
2172 2172 cur_line.append(reversed_chunks.pop())
2173 2173
2174 2174 # this overriding code is imported from TextWrapper of Python 2.6
2175 2175 # to calculate columns of string by 'encoding.ucolwidth()'
2176 2176 def _wrap_chunks(self, chunks):
2177 2177 colwidth = encoding.ucolwidth
2178 2178
2179 2179 lines = []
2180 2180 if self.width <= 0:
2181 2181 raise ValueError("invalid width %r (must be > 0)" % self.width)
2182 2182
2183 2183 # Arrange in reverse order so items can be efficiently popped
2184 2184 # from a stack of chucks.
2185 2185 chunks.reverse()
2186 2186
2187 2187 while chunks:
2188 2188
2189 2189 # Start the list of chunks that will make up the current line.
2190 2190 # cur_len is just the length of all the chunks in cur_line.
2191 2191 cur_line = []
2192 2192 cur_len = 0
2193 2193
2194 2194 # Figure out which static string will prefix this line.
2195 2195 if lines:
2196 2196 indent = self.subsequent_indent
2197 2197 else:
2198 2198 indent = self.initial_indent
2199 2199
2200 2200 # Maximum width for this line.
2201 2201 width = self.width - len(indent)
2202 2202
2203 2203 # First chunk on line is whitespace -- drop it, unless this
2204 2204 # is the very beginning of the text (i.e. no lines started yet).
2205 2205 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2206 2206 del chunks[-1]
2207 2207
2208 2208 while chunks:
2209 2209 l = colwidth(chunks[-1])
2210 2210
2211 2211 # Can at least squeeze this chunk onto the current line.
2212 2212 if cur_len + l <= width:
2213 2213 cur_line.append(chunks.pop())
2214 2214 cur_len += l
2215 2215
2216 2216 # Nope, this line is full.
2217 2217 else:
2218 2218 break
2219 2219
2220 2220 # The current line is full, and the next chunk is too big to
2221 2221 # fit on *any* line (not just this one).
2222 2222 if chunks and colwidth(chunks[-1]) > width:
2223 2223 self._handle_long_word(chunks, cur_line, cur_len, width)
2224 2224
2225 2225 # If the last chunk on this line is all whitespace, drop it.
2226 2226 if (self.drop_whitespace and
2227 2227 cur_line and cur_line[-1].strip() == ''):
2228 2228 del cur_line[-1]
2229 2229
2230 2230 # Convert current line back to a string and store it in list
2231 2231 # of all lines (return value).
2232 2232 if cur_line:
2233 2233 lines.append(indent + ''.join(cur_line))
2234 2234
2235 2235 return lines
2236 2236
2237 2237 global MBTextWrapper
2238 2238 MBTextWrapper = tw
2239 2239 return tw(**kwargs)
2240 2240
2241 2241 def wrap(line, width, initindent='', hangindent=''):
2242 2242 maxindent = max(len(hangindent), len(initindent))
2243 2243 if width <= maxindent:
2244 2244 # adjust for weird terminal size
2245 2245 width = max(78, maxindent + 1)
2246 2246 line = line.decode(pycompat.sysstr(encoding.encoding),
2247 2247 pycompat.sysstr(encoding.encodingmode))
2248 2248 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2249 2249 pycompat.sysstr(encoding.encodingmode))
2250 2250 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2251 2251 pycompat.sysstr(encoding.encodingmode))
2252 2252 wrapper = MBTextWrapper(width=width,
2253 2253 initial_indent=initindent,
2254 2254 subsequent_indent=hangindent)
2255 2255 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2256 2256
2257 2257 if (pyplatform.python_implementation() == 'CPython' and
2258 2258 sys.version_info < (3, 0)):
2259 2259 # There is an issue in CPython that some IO methods do not handle EINTR
2260 2260 # correctly. The following table shows what CPython version (and functions)
2261 2261 # are affected (buggy: has the EINTR bug, okay: otherwise):
2262 2262 #
2263 2263 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2264 2264 # --------------------------------------------------
2265 2265 # fp.__iter__ | buggy | buggy | okay
2266 2266 # fp.read* | buggy | okay [1] | okay
2267 2267 #
2268 2268 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2269 2269 #
2270 2270 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2271 2271 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2272 2272 #
2273 2273 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2274 2274 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2275 2275 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2276 2276 # fp.__iter__ but not other fp.read* methods.
2277 2277 #
2278 2278 # On modern systems like Linux, the "read" syscall cannot be interrupted
2279 2279 # when reading "fast" files like on-disk files. So the EINTR issue only
2280 2280 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2281 2281 # files approximately as "fast" files and use the fast (unsafe) code path,
2282 2282 # to minimize the performance impact.
2283 2283 if sys.version_info >= (2, 7, 4):
2284 2284 # fp.readline deals with EINTR correctly, use it as a workaround.
2285 2285 def _safeiterfile(fp):
2286 2286 return iter(fp.readline, '')
2287 2287 else:
2288 2288 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2289 2289 # note: this may block longer than necessary because of bufsize.
2290 2290 def _safeiterfile(fp, bufsize=4096):
2291 2291 fd = fp.fileno()
2292 2292 line = ''
2293 2293 while True:
2294 2294 try:
2295 2295 buf = os.read(fd, bufsize)
2296 2296 except OSError as ex:
2297 2297 # os.read only raises EINTR before any data is read
2298 2298 if ex.errno == errno.EINTR:
2299 2299 continue
2300 2300 else:
2301 2301 raise
2302 2302 line += buf
2303 2303 if '\n' in buf:
2304 2304 splitted = line.splitlines(True)
2305 2305 line = ''
2306 2306 for l in splitted:
2307 2307 if l[-1] == '\n':
2308 2308 yield l
2309 2309 else:
2310 2310 line = l
2311 2311 if not buf:
2312 2312 break
2313 2313 if line:
2314 2314 yield line
2315 2315
2316 2316 def iterfile(fp):
2317 2317 fastpath = True
2318 2318 if type(fp) is file:
2319 2319 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2320 2320 if fastpath:
2321 2321 return fp
2322 2322 else:
2323 2323 return _safeiterfile(fp)
2324 2324 else:
2325 2325 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2326 2326 def iterfile(fp):
2327 2327 return fp
2328 2328
2329 2329 def iterlines(iterator):
2330 2330 for chunk in iterator:
2331 2331 for line in chunk.splitlines():
2332 2332 yield line
2333 2333
2334 2334 def expandpath(path):
2335 2335 return os.path.expanduser(os.path.expandvars(path))
2336 2336
2337 2337 def hgcmd():
2338 2338 """Return the command used to execute current hg
2339 2339
2340 2340 This is different from hgexecutable() because on Windows we want
2341 2341 to avoid things opening new shell windows like batch files, so we
2342 2342 get either the python call or current executable.
2343 2343 """
2344 2344 if mainfrozen():
2345 2345 if getattr(sys, 'frozen', None) == 'macosx_app':
2346 2346 # Env variable set by py2app
2347 2347 return [encoding.environ['EXECUTABLEPATH']]
2348 2348 else:
2349 2349 return [pycompat.sysexecutable]
2350 2350 return gethgcmd()
2351 2351
2352 2352 def rundetached(args, condfn):
2353 2353 """Execute the argument list in a detached process.
2354 2354
2355 2355 condfn is a callable which is called repeatedly and should return
2356 2356 True once the child process is known to have started successfully.
2357 2357 At this point, the child process PID is returned. If the child
2358 2358 process fails to start or finishes before condfn() evaluates to
2359 2359 True, return -1.
2360 2360 """
2361 2361 # Windows case is easier because the child process is either
2362 2362 # successfully starting and validating the condition or exiting
2363 2363 # on failure. We just poll on its PID. On Unix, if the child
2364 2364 # process fails to start, it will be left in a zombie state until
2365 2365 # the parent wait on it, which we cannot do since we expect a long
2366 2366 # running process on success. Instead we listen for SIGCHLD telling
2367 2367 # us our child process terminated.
2368 2368 terminated = set()
2369 2369 def handler(signum, frame):
2370 2370 terminated.add(os.wait())
2371 2371 prevhandler = None
2372 2372 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2373 2373 if SIGCHLD is not None:
2374 2374 prevhandler = signal.signal(SIGCHLD, handler)
2375 2375 try:
2376 2376 pid = spawndetached(args)
2377 2377 while not condfn():
2378 2378 if ((pid in terminated or not testpid(pid))
2379 2379 and not condfn()):
2380 2380 return -1
2381 2381 time.sleep(0.1)
2382 2382 return pid
2383 2383 finally:
2384 2384 if prevhandler is not None:
2385 2385 signal.signal(signal.SIGCHLD, prevhandler)
2386 2386
2387 2387 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2388 2388 """Return the result of interpolating items in the mapping into string s.
2389 2389
2390 2390 prefix is a single character string, or a two character string with
2391 2391 a backslash as the first character if the prefix needs to be escaped in
2392 2392 a regular expression.
2393 2393
2394 2394 fn is an optional function that will be applied to the replacement text
2395 2395 just before replacement.
2396 2396
2397 2397 escape_prefix is an optional flag that allows using doubled prefix for
2398 2398 its escaping.
2399 2399 """
2400 2400 fn = fn or (lambda s: s)
2401 2401 patterns = '|'.join(mapping.keys())
2402 2402 if escape_prefix:
2403 2403 patterns += '|' + prefix
2404 2404 if len(prefix) > 1:
2405 2405 prefix_char = prefix[1:]
2406 2406 else:
2407 2407 prefix_char = prefix
2408 2408 mapping[prefix_char] = prefix_char
2409 2409 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2410 2410 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2411 2411
2412 2412 def getport(port):
2413 2413 """Return the port for a given network service.
2414 2414
2415 2415 If port is an integer, it's returned as is. If it's a string, it's
2416 2416 looked up using socket.getservbyname(). If there's no matching
2417 2417 service, error.Abort is raised.
2418 2418 """
2419 2419 try:
2420 2420 return int(port)
2421 2421 except ValueError:
2422 2422 pass
2423 2423
2424 2424 try:
2425 2425 return socket.getservbyname(port)
2426 2426 except socket.error:
2427 2427 raise Abort(_("no port number associated with service '%s'") % port)
2428 2428
2429 2429 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2430 2430 '0': False, 'no': False, 'false': False, 'off': False,
2431 2431 'never': False}
2432 2432
2433 2433 def parsebool(s):
2434 2434 """Parse s into a boolean.
2435 2435
2436 2436 If s is not a valid boolean, returns None.
2437 2437 """
2438 2438 return _booleans.get(s.lower(), None)
2439 2439
2440 2440 _hextochr = dict((a + b, chr(int(a + b, 16)))
2441 2441 for a in string.hexdigits for b in string.hexdigits)
2442 2442
2443 2443 class url(object):
2444 2444 r"""Reliable URL parser.
2445 2445
2446 2446 This parses URLs and provides attributes for the following
2447 2447 components:
2448 2448
2449 2449 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2450 2450
2451 2451 Missing components are set to None. The only exception is
2452 2452 fragment, which is set to '' if present but empty.
2453 2453
2454 2454 If parsefragment is False, fragment is included in query. If
2455 2455 parsequery is False, query is included in path. If both are
2456 2456 False, both fragment and query are included in path.
2457 2457
2458 2458 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2459 2459
2460 2460 Note that for backward compatibility reasons, bundle URLs do not
2461 2461 take host names. That means 'bundle://../' has a path of '../'.
2462 2462
2463 2463 Examples:
2464 2464
2465 2465 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2466 2466 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2467 2467 >>> url('ssh://[::1]:2200//home/joe/repo')
2468 2468 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2469 2469 >>> url('file:///home/joe/repo')
2470 2470 <url scheme: 'file', path: '/home/joe/repo'>
2471 2471 >>> url('file:///c:/temp/foo/')
2472 2472 <url scheme: 'file', path: 'c:/temp/foo/'>
2473 2473 >>> url('bundle:foo')
2474 2474 <url scheme: 'bundle', path: 'foo'>
2475 2475 >>> url('bundle://../foo')
2476 2476 <url scheme: 'bundle', path: '../foo'>
2477 2477 >>> url(r'c:\foo\bar')
2478 2478 <url path: 'c:\\foo\\bar'>
2479 2479 >>> url(r'\\blah\blah\blah')
2480 2480 <url path: '\\\\blah\\blah\\blah'>
2481 2481 >>> url(r'\\blah\blah\blah#baz')
2482 2482 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2483 2483 >>> url(r'file:///C:\users\me')
2484 2484 <url scheme: 'file', path: 'C:\\users\\me'>
2485 2485
2486 2486 Authentication credentials:
2487 2487
2488 2488 >>> url('ssh://joe:xyz@x/repo')
2489 2489 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2490 2490 >>> url('ssh://joe@x/repo')
2491 2491 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2492 2492
2493 2493 Query strings and fragments:
2494 2494
2495 2495 >>> url('http://host/a?b#c')
2496 2496 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2497 2497 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2498 2498 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2499 2499
2500 2500 Empty path:
2501 2501
2502 2502 >>> url('')
2503 2503 <url path: ''>
2504 2504 >>> url('#a')
2505 2505 <url path: '', fragment: 'a'>
2506 2506 >>> url('http://host/')
2507 2507 <url scheme: 'http', host: 'host', path: ''>
2508 2508 >>> url('http://host/#a')
2509 2509 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2510 2510
2511 2511 Only scheme:
2512 2512
2513 2513 >>> url('http:')
2514 2514 <url scheme: 'http'>
2515 2515 """
2516 2516
2517 2517 _safechars = "!~*'()+"
2518 2518 _safepchars = "/!~*'()+:\\"
2519 2519 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2520 2520
2521 2521 def __init__(self, path, parsequery=True, parsefragment=True):
2522 2522 # We slowly chomp away at path until we have only the path left
2523 2523 self.scheme = self.user = self.passwd = self.host = None
2524 2524 self.port = self.path = self.query = self.fragment = None
2525 2525 self._localpath = True
2526 2526 self._hostport = ''
2527 2527 self._origpath = path
2528 2528
2529 2529 if parsefragment and '#' in path:
2530 2530 path, self.fragment = path.split('#', 1)
2531 2531
2532 2532 # special case for Windows drive letters and UNC paths
2533 2533 if hasdriveletter(path) or path.startswith('\\\\'):
2534 2534 self.path = path
2535 2535 return
2536 2536
2537 2537 # For compatibility reasons, we can't handle bundle paths as
2538 2538 # normal URLS
2539 2539 if path.startswith('bundle:'):
2540 2540 self.scheme = 'bundle'
2541 2541 path = path[7:]
2542 2542 if path.startswith('//'):
2543 2543 path = path[2:]
2544 2544 self.path = path
2545 2545 return
2546 2546
2547 2547 if self._matchscheme(path):
2548 2548 parts = path.split(':', 1)
2549 2549 if parts[0]:
2550 2550 self.scheme, path = parts
2551 2551 self._localpath = False
2552 2552
2553 2553 if not path:
2554 2554 path = None
2555 2555 if self._localpath:
2556 2556 self.path = ''
2557 2557 return
2558 2558 else:
2559 2559 if self._localpath:
2560 2560 self.path = path
2561 2561 return
2562 2562
2563 2563 if parsequery and '?' in path:
2564 2564 path, self.query = path.split('?', 1)
2565 2565 if not path:
2566 2566 path = None
2567 2567 if not self.query:
2568 2568 self.query = None
2569 2569
2570 2570 # // is required to specify a host/authority
2571 2571 if path and path.startswith('//'):
2572 2572 parts = path[2:].split('/', 1)
2573 2573 if len(parts) > 1:
2574 2574 self.host, path = parts
2575 2575 else:
2576 2576 self.host = parts[0]
2577 2577 path = None
2578 2578 if not self.host:
2579 2579 self.host = None
2580 2580 # path of file:///d is /d
2581 2581 # path of file:///d:/ is d:/, not /d:/
2582 2582 if path and not hasdriveletter(path):
2583 2583 path = '/' + path
2584 2584
2585 2585 if self.host and '@' in self.host:
2586 2586 self.user, self.host = self.host.rsplit('@', 1)
2587 2587 if ':' in self.user:
2588 2588 self.user, self.passwd = self.user.split(':', 1)
2589 2589 if not self.host:
2590 2590 self.host = None
2591 2591
2592 2592 # Don't split on colons in IPv6 addresses without ports
2593 2593 if (self.host and ':' in self.host and
2594 2594 not (self.host.startswith('[') and self.host.endswith(']'))):
2595 2595 self._hostport = self.host
2596 2596 self.host, self.port = self.host.rsplit(':', 1)
2597 2597 if not self.host:
2598 2598 self.host = None
2599 2599
2600 2600 if (self.host and self.scheme == 'file' and
2601 2601 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2602 2602 raise Abort(_('file:// URLs can only refer to localhost'))
2603 2603
2604 2604 self.path = path
2605 2605
2606 2606 # leave the query string escaped
2607 2607 for a in ('user', 'passwd', 'host', 'port',
2608 2608 'path', 'fragment'):
2609 2609 v = getattr(self, a)
2610 2610 if v is not None:
2611 2611 setattr(self, a, pycompat.urlunquote(v))
2612 2612
2613 2613 def __repr__(self):
2614 2614 attrs = []
2615 2615 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2616 2616 'query', 'fragment'):
2617 2617 v = getattr(self, a)
2618 2618 if v is not None:
2619 2619 attrs.append('%s: %r' % (a, v))
2620 2620 return '<url %s>' % ', '.join(attrs)
2621 2621
2622 2622 def __str__(self):
2623 2623 r"""Join the URL's components back into a URL string.
2624 2624
2625 2625 Examples:
2626 2626
2627 2627 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2628 2628 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2629 2629 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2630 2630 'http://user:pw@host:80/?foo=bar&baz=42'
2631 2631 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2632 2632 'http://user:pw@host:80/?foo=bar%3dbaz'
2633 2633 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2634 2634 'ssh://user:pw@[::1]:2200//home/joe#'
2635 2635 >>> str(url('http://localhost:80//'))
2636 2636 'http://localhost:80//'
2637 2637 >>> str(url('http://localhost:80/'))
2638 2638 'http://localhost:80/'
2639 2639 >>> str(url('http://localhost:80'))
2640 2640 'http://localhost:80/'
2641 2641 >>> str(url('bundle:foo'))
2642 2642 'bundle:foo'
2643 2643 >>> str(url('bundle://../foo'))
2644 2644 'bundle:../foo'
2645 2645 >>> str(url('path'))
2646 2646 'path'
2647 2647 >>> str(url('file:///tmp/foo/bar'))
2648 2648 'file:///tmp/foo/bar'
2649 2649 >>> str(url('file:///c:/tmp/foo/bar'))
2650 2650 'file:///c:/tmp/foo/bar'
2651 2651 >>> print url(r'bundle:foo\bar')
2652 2652 bundle:foo\bar
2653 2653 >>> print url(r'file:///D:\data\hg')
2654 2654 file:///D:\data\hg
2655 2655 """
2656 2656 if pycompat.ispy3:
2657 2657 return encoding.fromlocal(self.__bytes__()).decode('utf-8')
2658 2658 return self.__bytes__()
2659 2659
2660 2660 def __bytes__(self):
2661 2661 if self._localpath:
2662 2662 s = self.path
2663 2663 if self.scheme == 'bundle':
2664 2664 s = 'bundle:' + s
2665 2665 if self.fragment:
2666 2666 s += '#' + self.fragment
2667 2667 return s
2668 2668
2669 2669 s = self.scheme + ':'
2670 2670 if self.user or self.passwd or self.host:
2671 2671 s += '//'
2672 2672 elif self.scheme and (not self.path or self.path.startswith('/')
2673 2673 or hasdriveletter(self.path)):
2674 2674 s += '//'
2675 2675 if hasdriveletter(self.path):
2676 2676 s += '/'
2677 2677 if self.user:
2678 2678 s += urlreq.quote(self.user, safe=self._safechars)
2679 2679 if self.passwd:
2680 2680 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2681 2681 if self.user or self.passwd:
2682 2682 s += '@'
2683 2683 if self.host:
2684 2684 if not (self.host.startswith('[') and self.host.endswith(']')):
2685 2685 s += urlreq.quote(self.host)
2686 2686 else:
2687 2687 s += self.host
2688 2688 if self.port:
2689 2689 s += ':' + urlreq.quote(self.port)
2690 2690 if self.host:
2691 2691 s += '/'
2692 2692 if self.path:
2693 2693 # TODO: similar to the query string, we should not unescape the
2694 2694 # path when we store it, the path might contain '%2f' = '/',
2695 2695 # which we should *not* escape.
2696 2696 s += urlreq.quote(self.path, safe=self._safepchars)
2697 2697 if self.query:
2698 2698 # we store the query in escaped form.
2699 2699 s += '?' + self.query
2700 2700 if self.fragment is not None:
2701 2701 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2702 2702 return s
2703 2703
2704 2704 def authinfo(self):
2705 2705 user, passwd = self.user, self.passwd
2706 2706 try:
2707 2707 self.user, self.passwd = None, None
2708 2708 s = str(self)
2709 2709 finally:
2710 2710 self.user, self.passwd = user, passwd
2711 2711 if not self.user:
2712 2712 return (s, None)
2713 2713 # authinfo[1] is passed to urllib2 password manager, and its
2714 2714 # URIs must not contain credentials. The host is passed in the
2715 2715 # URIs list because Python < 2.4.3 uses only that to search for
2716 2716 # a password.
2717 2717 return (s, (None, (s, self.host),
2718 2718 self.user, self.passwd or ''))
2719 2719
2720 2720 def isabs(self):
2721 2721 if self.scheme and self.scheme != 'file':
2722 2722 return True # remote URL
2723 2723 if hasdriveletter(self.path):
2724 2724 return True # absolute for our purposes - can't be joined()
2725 2725 if self.path.startswith(r'\\'):
2726 2726 return True # Windows UNC path
2727 2727 if self.path.startswith('/'):
2728 2728 return True # POSIX-style
2729 2729 return False
2730 2730
2731 2731 def localpath(self):
2732 2732 if self.scheme == 'file' or self.scheme == 'bundle':
2733 2733 path = self.path or '/'
2734 2734 # For Windows, we need to promote hosts containing drive
2735 2735 # letters to paths with drive letters.
2736 2736 if hasdriveletter(self._hostport):
2737 2737 path = self._hostport + '/' + self.path
2738 2738 elif (self.host is not None and self.path
2739 2739 and not hasdriveletter(path)):
2740 2740 path = '/' + path
2741 2741 return path
2742 2742 return self._origpath
2743 2743
2744 2744 def islocal(self):
2745 2745 '''whether localpath will return something that posixfile can open'''
2746 2746 return (not self.scheme or self.scheme == 'file'
2747 2747 or self.scheme == 'bundle')
2748 2748
2749 2749 def hasscheme(path):
2750 2750 return bool(url(path).scheme)
2751 2751
2752 2752 def hasdriveletter(path):
2753 2753 return path and path[1:2] == ':' and path[0:1].isalpha()
2754 2754
2755 2755 def urllocalpath(path):
2756 2756 return url(path, parsequery=False, parsefragment=False).localpath()
2757 2757
2758 2758 def hidepassword(u):
2759 2759 '''hide user credential in a url string'''
2760 2760 u = url(u)
2761 2761 if u.passwd:
2762 2762 u.passwd = '***'
2763 2763 return str(u)
2764 2764
2765 2765 def removeauth(u):
2766 2766 '''remove all authentication information from a url string'''
2767 2767 u = url(u)
2768 2768 u.user = u.passwd = None
2769 2769 return str(u)
2770 2770
2771 2771 timecount = unitcountfn(
2772 2772 (1, 1e3, _('%.0f s')),
2773 2773 (100, 1, _('%.1f s')),
2774 2774 (10, 1, _('%.2f s')),
2775 2775 (1, 1, _('%.3f s')),
2776 2776 (100, 0.001, _('%.1f ms')),
2777 2777 (10, 0.001, _('%.2f ms')),
2778 2778 (1, 0.001, _('%.3f ms')),
2779 2779 (100, 0.000001, _('%.1f us')),
2780 2780 (10, 0.000001, _('%.2f us')),
2781 2781 (1, 0.000001, _('%.3f us')),
2782 2782 (100, 0.000000001, _('%.1f ns')),
2783 2783 (10, 0.000000001, _('%.2f ns')),
2784 2784 (1, 0.000000001, _('%.3f ns')),
2785 2785 )
2786 2786
2787 2787 _timenesting = [0]
2788 2788
2789 2789 def timed(func):
2790 2790 '''Report the execution time of a function call to stderr.
2791 2791
2792 2792 During development, use as a decorator when you need to measure
2793 2793 the cost of a function, e.g. as follows:
2794 2794
2795 2795 @util.timed
2796 2796 def foo(a, b, c):
2797 2797 pass
2798 2798 '''
2799 2799
2800 2800 def wrapper(*args, **kwargs):
2801 2801 start = timer()
2802 2802 indent = 2
2803 2803 _timenesting[0] += indent
2804 2804 try:
2805 2805 return func(*args, **kwargs)
2806 2806 finally:
2807 2807 elapsed = timer() - start
2808 2808 _timenesting[0] -= indent
2809 2809 stderr.write('%s%s: %s\n' %
2810 2810 (' ' * _timenesting[0], func.__name__,
2811 2811 timecount(elapsed)))
2812 2812 return wrapper
2813 2813
2814 2814 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2815 2815 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2816 2816
2817 2817 def sizetoint(s):
2818 2818 '''Convert a space specifier to a byte count.
2819 2819
2820 2820 >>> sizetoint('30')
2821 2821 30
2822 2822 >>> sizetoint('2.2kb')
2823 2823 2252
2824 2824 >>> sizetoint('6M')
2825 2825 6291456
2826 2826 '''
2827 2827 t = s.strip().lower()
2828 2828 try:
2829 2829 for k, u in _sizeunits:
2830 2830 if t.endswith(k):
2831 2831 return int(float(t[:-len(k)]) * u)
2832 2832 return int(t)
2833 2833 except ValueError:
2834 2834 raise error.ParseError(_("couldn't parse size: %s") % s)
2835 2835
2836 2836 class hooks(object):
2837 2837 '''A collection of hook functions that can be used to extend a
2838 2838 function's behavior. Hooks are called in lexicographic order,
2839 2839 based on the names of their sources.'''
2840 2840
2841 2841 def __init__(self):
2842 2842 self._hooks = []
2843 2843
2844 2844 def add(self, source, hook):
2845 2845 self._hooks.append((source, hook))
2846 2846
2847 2847 def __call__(self, *args):
2848 2848 self._hooks.sort(key=lambda x: x[0])
2849 2849 results = []
2850 2850 for source, hook in self._hooks:
2851 2851 results.append(hook(*args))
2852 2852 return results
2853 2853
2854 2854 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2855 2855 '''Yields lines for a nicely formatted stacktrace.
2856 2856 Skips the 'skip' last entries, then return the last 'depth' entries.
2857 2857 Each file+linenumber is formatted according to fileline.
2858 2858 Each line is formatted according to line.
2859 2859 If line is None, it yields:
2860 2860 length of longest filepath+line number,
2861 2861 filepath+linenumber,
2862 2862 function
2863 2863
2864 2864 Not be used in production code but very convenient while developing.
2865 2865 '''
2866 2866 entries = [(fileline % (fn, ln), func)
2867 2867 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2868 2868 ][-depth:]
2869 2869 if entries:
2870 2870 fnmax = max(len(entry[0]) for entry in entries)
2871 2871 for fnln, func in entries:
2872 2872 if line is None:
2873 2873 yield (fnmax, fnln, func)
2874 2874 else:
2875 2875 yield line % (fnmax, fnln, func)
2876 2876
2877 2877 def debugstacktrace(msg='stacktrace', skip=0,
2878 2878 f=stderr, otherf=stdout, depth=0):
2879 2879 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2880 2880 Skips the 'skip' entries closest to the call, then show 'depth' entries.
2881 2881 By default it will flush stdout first.
2882 2882 It can be used everywhere and intentionally does not require an ui object.
2883 2883 Not be used in production code but very convenient while developing.
2884 2884 '''
2885 2885 if otherf:
2886 2886 otherf.flush()
2887 2887 f.write('%s at:\n' % msg.rstrip())
2888 2888 for line in getstackframes(skip + 1, depth=depth):
2889 2889 f.write(line)
2890 2890 f.flush()
2891 2891
2892 2892 class dirs(object):
2893 2893 '''a multiset of directory names from a dirstate or manifest'''
2894 2894
2895 2895 def __init__(self, map, skip=None):
2896 2896 self._dirs = {}
2897 2897 addpath = self.addpath
2898 2898 if safehasattr(map, 'iteritems') and skip is not None:
2899 2899 for f, s in map.iteritems():
2900 2900 if s[0] != skip:
2901 2901 addpath(f)
2902 2902 else:
2903 2903 for f in map:
2904 2904 addpath(f)
2905 2905
2906 2906 def addpath(self, path):
2907 2907 dirs = self._dirs
2908 2908 for base in finddirs(path):
2909 2909 if base in dirs:
2910 2910 dirs[base] += 1
2911 2911 return
2912 2912 dirs[base] = 1
2913 2913
2914 2914 def delpath(self, path):
2915 2915 dirs = self._dirs
2916 2916 for base in finddirs(path):
2917 2917 if dirs[base] > 1:
2918 2918 dirs[base] -= 1
2919 2919 return
2920 2920 del dirs[base]
2921 2921
2922 2922 def __iter__(self):
2923 return self._dirs.iterkeys()
2923 return iter(self._dirs)
2924 2924
2925 2925 def __contains__(self, d):
2926 2926 return d in self._dirs
2927 2927
2928 2928 if safehasattr(parsers, 'dirs'):
2929 2929 dirs = parsers.dirs
2930 2930
2931 2931 def finddirs(path):
2932 2932 pos = path.rfind('/')
2933 2933 while pos != -1:
2934 2934 yield path[:pos]
2935 2935 pos = path.rfind('/', 0, pos)
2936 2936
2937 2937 class ctxmanager(object):
2938 2938 '''A context manager for use in 'with' blocks to allow multiple
2939 2939 contexts to be entered at once. This is both safer and more
2940 2940 flexible than contextlib.nested.
2941 2941
2942 2942 Once Mercurial supports Python 2.7+, this will become mostly
2943 2943 unnecessary.
2944 2944 '''
2945 2945
2946 2946 def __init__(self, *args):
2947 2947 '''Accepts a list of no-argument functions that return context
2948 2948 managers. These will be invoked at __call__ time.'''
2949 2949 self._pending = args
2950 2950 self._atexit = []
2951 2951
2952 2952 def __enter__(self):
2953 2953 return self
2954 2954
2955 2955 def enter(self):
2956 2956 '''Create and enter context managers in the order in which they were
2957 2957 passed to the constructor.'''
2958 2958 values = []
2959 2959 for func in self._pending:
2960 2960 obj = func()
2961 2961 values.append(obj.__enter__())
2962 2962 self._atexit.append(obj.__exit__)
2963 2963 del self._pending
2964 2964 return values
2965 2965
2966 2966 def atexit(self, func, *args, **kwargs):
2967 2967 '''Add a function to call when this context manager exits. The
2968 2968 ordering of multiple atexit calls is unspecified, save that
2969 2969 they will happen before any __exit__ functions.'''
2970 2970 def wrapper(exc_type, exc_val, exc_tb):
2971 2971 func(*args, **kwargs)
2972 2972 self._atexit.append(wrapper)
2973 2973 return func
2974 2974
2975 2975 def __exit__(self, exc_type, exc_val, exc_tb):
2976 2976 '''Context managers are exited in the reverse order from which
2977 2977 they were created.'''
2978 2978 received = exc_type is not None
2979 2979 suppressed = False
2980 2980 pending = None
2981 2981 self._atexit.reverse()
2982 2982 for exitfunc in self._atexit:
2983 2983 try:
2984 2984 if exitfunc(exc_type, exc_val, exc_tb):
2985 2985 suppressed = True
2986 2986 exc_type = None
2987 2987 exc_val = None
2988 2988 exc_tb = None
2989 2989 except BaseException:
2990 2990 pending = sys.exc_info()
2991 2991 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2992 2992 del self._atexit
2993 2993 if pending:
2994 2994 raise exc_val
2995 2995 return received and suppressed
2996 2996
2997 2997 # compression code
2998 2998
2999 2999 SERVERROLE = 'server'
3000 3000 CLIENTROLE = 'client'
3001 3001
3002 3002 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3003 3003 (u'name', u'serverpriority',
3004 3004 u'clientpriority'))
3005 3005
3006 3006 class compressormanager(object):
3007 3007 """Holds registrations of various compression engines.
3008 3008
3009 3009 This class essentially abstracts the differences between compression
3010 3010 engines to allow new compression formats to be added easily, possibly from
3011 3011 extensions.
3012 3012
3013 3013 Compressors are registered against the global instance by calling its
3014 3014 ``register()`` method.
3015 3015 """
3016 3016 def __init__(self):
3017 3017 self._engines = {}
3018 3018 # Bundle spec human name to engine name.
3019 3019 self._bundlenames = {}
3020 3020 # Internal bundle identifier to engine name.
3021 3021 self._bundletypes = {}
3022 3022 # Revlog header to engine name.
3023 3023 self._revlogheaders = {}
3024 3024 # Wire proto identifier to engine name.
3025 3025 self._wiretypes = {}
3026 3026
3027 3027 def __getitem__(self, key):
3028 3028 return self._engines[key]
3029 3029
3030 3030 def __contains__(self, key):
3031 3031 return key in self._engines
3032 3032
3033 3033 def __iter__(self):
3034 3034 return iter(self._engines.keys())
3035 3035
3036 3036 def register(self, engine):
3037 3037 """Register a compression engine with the manager.
3038 3038
3039 3039 The argument must be a ``compressionengine`` instance.
3040 3040 """
3041 3041 if not isinstance(engine, compressionengine):
3042 3042 raise ValueError(_('argument must be a compressionengine'))
3043 3043
3044 3044 name = engine.name()
3045 3045
3046 3046 if name in self._engines:
3047 3047 raise error.Abort(_('compression engine %s already registered') %
3048 3048 name)
3049 3049
3050 3050 bundleinfo = engine.bundletype()
3051 3051 if bundleinfo:
3052 3052 bundlename, bundletype = bundleinfo
3053 3053
3054 3054 if bundlename in self._bundlenames:
3055 3055 raise error.Abort(_('bundle name %s already registered') %
3056 3056 bundlename)
3057 3057 if bundletype in self._bundletypes:
3058 3058 raise error.Abort(_('bundle type %s already registered by %s') %
3059 3059 (bundletype, self._bundletypes[bundletype]))
3060 3060
3061 3061 # No external facing name declared.
3062 3062 if bundlename:
3063 3063 self._bundlenames[bundlename] = name
3064 3064
3065 3065 self._bundletypes[bundletype] = name
3066 3066
3067 3067 wiresupport = engine.wireprotosupport()
3068 3068 if wiresupport:
3069 3069 wiretype = wiresupport.name
3070 3070 if wiretype in self._wiretypes:
3071 3071 raise error.Abort(_('wire protocol compression %s already '
3072 3072 'registered by %s') %
3073 3073 (wiretype, self._wiretypes[wiretype]))
3074 3074
3075 3075 self._wiretypes[wiretype] = name
3076 3076
3077 3077 revlogheader = engine.revlogheader()
3078 3078 if revlogheader and revlogheader in self._revlogheaders:
3079 3079 raise error.Abort(_('revlog header %s already registered by %s') %
3080 3080 (revlogheader, self._revlogheaders[revlogheader]))
3081 3081
3082 3082 if revlogheader:
3083 3083 self._revlogheaders[revlogheader] = name
3084 3084
3085 3085 self._engines[name] = engine
3086 3086
3087 3087 @property
3088 3088 def supportedbundlenames(self):
3089 3089 return set(self._bundlenames.keys())
3090 3090
3091 3091 @property
3092 3092 def supportedbundletypes(self):
3093 3093 return set(self._bundletypes.keys())
3094 3094
3095 3095 def forbundlename(self, bundlename):
3096 3096 """Obtain a compression engine registered to a bundle name.
3097 3097
3098 3098 Will raise KeyError if the bundle type isn't registered.
3099 3099
3100 3100 Will abort if the engine is known but not available.
3101 3101 """
3102 3102 engine = self._engines[self._bundlenames[bundlename]]
3103 3103 if not engine.available():
3104 3104 raise error.Abort(_('compression engine %s could not be loaded') %
3105 3105 engine.name())
3106 3106 return engine
3107 3107
3108 3108 def forbundletype(self, bundletype):
3109 3109 """Obtain a compression engine registered to a bundle type.
3110 3110
3111 3111 Will raise KeyError if the bundle type isn't registered.
3112 3112
3113 3113 Will abort if the engine is known but not available.
3114 3114 """
3115 3115 engine = self._engines[self._bundletypes[bundletype]]
3116 3116 if not engine.available():
3117 3117 raise error.Abort(_('compression engine %s could not be loaded') %
3118 3118 engine.name())
3119 3119 return engine
3120 3120
3121 3121 def supportedwireengines(self, role, onlyavailable=True):
3122 3122 """Obtain compression engines that support the wire protocol.
3123 3123
3124 3124 Returns a list of engines in prioritized order, most desired first.
3125 3125
3126 3126 If ``onlyavailable`` is set, filter out engines that can't be
3127 3127 loaded.
3128 3128 """
3129 3129 assert role in (SERVERROLE, CLIENTROLE)
3130 3130
3131 3131 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3132 3132
3133 3133 engines = [self._engines[e] for e in self._wiretypes.values()]
3134 3134 if onlyavailable:
3135 3135 engines = [e for e in engines if e.available()]
3136 3136
3137 3137 def getkey(e):
3138 3138 # Sort first by priority, highest first. In case of tie, sort
3139 3139 # alphabetically. This is arbitrary, but ensures output is
3140 3140 # stable.
3141 3141 w = e.wireprotosupport()
3142 3142 return -1 * getattr(w, attr), w.name
3143 3143
3144 3144 return list(sorted(engines, key=getkey))
3145 3145
3146 3146 def forwiretype(self, wiretype):
3147 3147 engine = self._engines[self._wiretypes[wiretype]]
3148 3148 if not engine.available():
3149 3149 raise error.Abort(_('compression engine %s could not be loaded') %
3150 3150 engine.name())
3151 3151 return engine
3152 3152
3153 3153 def forrevlogheader(self, header):
3154 3154 """Obtain a compression engine registered to a revlog header.
3155 3155
3156 3156 Will raise KeyError if the revlog header value isn't registered.
3157 3157 """
3158 3158 return self._engines[self._revlogheaders[header]]
3159 3159
3160 3160 compengines = compressormanager()
3161 3161
3162 3162 class compressionengine(object):
3163 3163 """Base class for compression engines.
3164 3164
3165 3165 Compression engines must implement the interface defined by this class.
3166 3166 """
3167 3167 def name(self):
3168 3168 """Returns the name of the compression engine.
3169 3169
3170 3170 This is the key the engine is registered under.
3171 3171
3172 3172 This method must be implemented.
3173 3173 """
3174 3174 raise NotImplementedError()
3175 3175
3176 3176 def available(self):
3177 3177 """Whether the compression engine is available.
3178 3178
3179 3179 The intent of this method is to allow optional compression engines
3180 3180 that may not be available in all installations (such as engines relying
3181 3181 on C extensions that may not be present).
3182 3182 """
3183 3183 return True
3184 3184
3185 3185 def bundletype(self):
3186 3186 """Describes bundle identifiers for this engine.
3187 3187
3188 3188 If this compression engine isn't supported for bundles, returns None.
3189 3189
3190 3190 If this engine can be used for bundles, returns a 2-tuple of strings of
3191 3191 the user-facing "bundle spec" compression name and an internal
3192 3192 identifier used to denote the compression format within bundles. To
3193 3193 exclude the name from external usage, set the first element to ``None``.
3194 3194
3195 3195 If bundle compression is supported, the class must also implement
3196 3196 ``compressstream`` and `decompressorreader``.
3197 3197 """
3198 3198 return None
3199 3199
3200 3200 def wireprotosupport(self):
3201 3201 """Declare support for this compression format on the wire protocol.
3202 3202
3203 3203 If this compression engine isn't supported for compressing wire
3204 3204 protocol payloads, returns None.
3205 3205
3206 3206 Otherwise, returns ``compenginewireprotosupport`` with the following
3207 3207 fields:
3208 3208
3209 3209 * String format identifier
3210 3210 * Integer priority for the server
3211 3211 * Integer priority for the client
3212 3212
3213 3213 The integer priorities are used to order the advertisement of format
3214 3214 support by server and client. The highest integer is advertised
3215 3215 first. Integers with non-positive values aren't advertised.
3216 3216
3217 3217 The priority values are somewhat arbitrary and only used for default
3218 3218 ordering. The relative order can be changed via config options.
3219 3219
3220 3220 If wire protocol compression is supported, the class must also implement
3221 3221 ``compressstream`` and ``decompressorreader``.
3222 3222 """
3223 3223 return None
3224 3224
3225 3225 def revlogheader(self):
3226 3226 """Header added to revlog chunks that identifies this engine.
3227 3227
3228 3228 If this engine can be used to compress revlogs, this method should
3229 3229 return the bytes used to identify chunks compressed with this engine.
3230 3230 Else, the method should return ``None`` to indicate it does not
3231 3231 participate in revlog compression.
3232 3232 """
3233 3233 return None
3234 3234
3235 3235 def compressstream(self, it, opts=None):
3236 3236 """Compress an iterator of chunks.
3237 3237
3238 3238 The method receives an iterator (ideally a generator) of chunks of
3239 3239 bytes to be compressed. It returns an iterator (ideally a generator)
3240 3240 of bytes of chunks representing the compressed output.
3241 3241
3242 3242 Optionally accepts an argument defining how to perform compression.
3243 3243 Each engine treats this argument differently.
3244 3244 """
3245 3245 raise NotImplementedError()
3246 3246
3247 3247 def decompressorreader(self, fh):
3248 3248 """Perform decompression on a file object.
3249 3249
3250 3250 Argument is an object with a ``read(size)`` method that returns
3251 3251 compressed data. Return value is an object with a ``read(size)`` that
3252 3252 returns uncompressed data.
3253 3253 """
3254 3254 raise NotImplementedError()
3255 3255
3256 3256 def revlogcompressor(self, opts=None):
3257 3257 """Obtain an object that can be used to compress revlog entries.
3258 3258
3259 3259 The object has a ``compress(data)`` method that compresses binary
3260 3260 data. This method returns compressed binary data or ``None`` if
3261 3261 the data could not be compressed (too small, not compressible, etc).
3262 3262 The returned data should have a header uniquely identifying this
3263 3263 compression format so decompression can be routed to this engine.
3264 3264 This header should be identified by the ``revlogheader()`` return
3265 3265 value.
3266 3266
3267 3267 The object has a ``decompress(data)`` method that decompresses
3268 3268 data. The method will only be called if ``data`` begins with
3269 3269 ``revlogheader()``. The method should return the raw, uncompressed
3270 3270 data or raise a ``RevlogError``.
3271 3271
3272 3272 The object is reusable but is not thread safe.
3273 3273 """
3274 3274 raise NotImplementedError()
3275 3275
3276 3276 class _zlibengine(compressionengine):
3277 3277 def name(self):
3278 3278 return 'zlib'
3279 3279
3280 3280 def bundletype(self):
3281 3281 return 'gzip', 'GZ'
3282 3282
3283 3283 def wireprotosupport(self):
3284 3284 return compewireprotosupport('zlib', 20, 20)
3285 3285
3286 3286 def revlogheader(self):
3287 3287 return 'x'
3288 3288
3289 3289 def compressstream(self, it, opts=None):
3290 3290 opts = opts or {}
3291 3291
3292 3292 z = zlib.compressobj(opts.get('level', -1))
3293 3293 for chunk in it:
3294 3294 data = z.compress(chunk)
3295 3295 # Not all calls to compress emit data. It is cheaper to inspect
3296 3296 # here than to feed empty chunks through generator.
3297 3297 if data:
3298 3298 yield data
3299 3299
3300 3300 yield z.flush()
3301 3301
3302 3302 def decompressorreader(self, fh):
3303 3303 def gen():
3304 3304 d = zlib.decompressobj()
3305 3305 for chunk in filechunkiter(fh):
3306 3306 while chunk:
3307 3307 # Limit output size to limit memory.
3308 3308 yield d.decompress(chunk, 2 ** 18)
3309 3309 chunk = d.unconsumed_tail
3310 3310
3311 3311 return chunkbuffer(gen())
3312 3312
3313 3313 class zlibrevlogcompressor(object):
3314 3314 def compress(self, data):
3315 3315 insize = len(data)
3316 3316 # Caller handles empty input case.
3317 3317 assert insize > 0
3318 3318
3319 3319 if insize < 44:
3320 3320 return None
3321 3321
3322 3322 elif insize <= 1000000:
3323 3323 compressed = zlib.compress(data)
3324 3324 if len(compressed) < insize:
3325 3325 return compressed
3326 3326 return None
3327 3327
3328 3328 # zlib makes an internal copy of the input buffer, doubling
3329 3329 # memory usage for large inputs. So do streaming compression
3330 3330 # on large inputs.
3331 3331 else:
3332 3332 z = zlib.compressobj()
3333 3333 parts = []
3334 3334 pos = 0
3335 3335 while pos < insize:
3336 3336 pos2 = pos + 2**20
3337 3337 parts.append(z.compress(data[pos:pos2]))
3338 3338 pos = pos2
3339 3339 parts.append(z.flush())
3340 3340
3341 3341 if sum(map(len, parts)) < insize:
3342 3342 return ''.join(parts)
3343 3343 return None
3344 3344
3345 3345 def decompress(self, data):
3346 3346 try:
3347 3347 return zlib.decompress(data)
3348 3348 except zlib.error as e:
3349 3349 raise error.RevlogError(_('revlog decompress error: %s') %
3350 3350 str(e))
3351 3351
3352 3352 def revlogcompressor(self, opts=None):
3353 3353 return self.zlibrevlogcompressor()
3354 3354
3355 3355 compengines.register(_zlibengine())
3356 3356
3357 3357 class _bz2engine(compressionengine):
3358 3358 def name(self):
3359 3359 return 'bz2'
3360 3360
3361 3361 def bundletype(self):
3362 3362 return 'bzip2', 'BZ'
3363 3363
3364 3364 # We declare a protocol name but don't advertise by default because
3365 3365 # it is slow.
3366 3366 def wireprotosupport(self):
3367 3367 return compewireprotosupport('bzip2', 0, 0)
3368 3368
3369 3369 def compressstream(self, it, opts=None):
3370 3370 opts = opts or {}
3371 3371 z = bz2.BZ2Compressor(opts.get('level', 9))
3372 3372 for chunk in it:
3373 3373 data = z.compress(chunk)
3374 3374 if data:
3375 3375 yield data
3376 3376
3377 3377 yield z.flush()
3378 3378
3379 3379 def decompressorreader(self, fh):
3380 3380 def gen():
3381 3381 d = bz2.BZ2Decompressor()
3382 3382 for chunk in filechunkiter(fh):
3383 3383 yield d.decompress(chunk)
3384 3384
3385 3385 return chunkbuffer(gen())
3386 3386
3387 3387 compengines.register(_bz2engine())
3388 3388
3389 3389 class _truncatedbz2engine(compressionengine):
3390 3390 def name(self):
3391 3391 return 'bz2truncated'
3392 3392
3393 3393 def bundletype(self):
3394 3394 return None, '_truncatedBZ'
3395 3395
3396 3396 # We don't implement compressstream because it is hackily handled elsewhere.
3397 3397
3398 3398 def decompressorreader(self, fh):
3399 3399 def gen():
3400 3400 # The input stream doesn't have the 'BZ' header. So add it back.
3401 3401 d = bz2.BZ2Decompressor()
3402 3402 d.decompress('BZ')
3403 3403 for chunk in filechunkiter(fh):
3404 3404 yield d.decompress(chunk)
3405 3405
3406 3406 return chunkbuffer(gen())
3407 3407
3408 3408 compengines.register(_truncatedbz2engine())
3409 3409
3410 3410 class _noopengine(compressionengine):
3411 3411 def name(self):
3412 3412 return 'none'
3413 3413
3414 3414 def bundletype(self):
3415 3415 return 'none', 'UN'
3416 3416
3417 3417 # Clients always support uncompressed payloads. Servers don't because
3418 3418 # unless you are on a fast network, uncompressed payloads can easily
3419 3419 # saturate your network pipe.
3420 3420 def wireprotosupport(self):
3421 3421 return compewireprotosupport('none', 0, 10)
3422 3422
3423 3423 # We don't implement revlogheader because it is handled specially
3424 3424 # in the revlog class.
3425 3425
3426 3426 def compressstream(self, it, opts=None):
3427 3427 return it
3428 3428
3429 3429 def decompressorreader(self, fh):
3430 3430 return fh
3431 3431
3432 3432 class nooprevlogcompressor(object):
3433 3433 def compress(self, data):
3434 3434 return None
3435 3435
3436 3436 def revlogcompressor(self, opts=None):
3437 3437 return self.nooprevlogcompressor()
3438 3438
3439 3439 compengines.register(_noopengine())
3440 3440
3441 3441 class _zstdengine(compressionengine):
3442 3442 def name(self):
3443 3443 return 'zstd'
3444 3444
3445 3445 @propertycache
3446 3446 def _module(self):
3447 3447 # Not all installs have the zstd module available. So defer importing
3448 3448 # until first access.
3449 3449 try:
3450 3450 from . import zstd
3451 3451 # Force delayed import.
3452 3452 zstd.__version__
3453 3453 return zstd
3454 3454 except ImportError:
3455 3455 return None
3456 3456
3457 3457 def available(self):
3458 3458 return bool(self._module)
3459 3459
3460 3460 def bundletype(self):
3461 3461 return 'zstd', 'ZS'
3462 3462
3463 3463 def wireprotosupport(self):
3464 3464 return compewireprotosupport('zstd', 50, 50)
3465 3465
3466 3466 def revlogheader(self):
3467 3467 return '\x28'
3468 3468
3469 3469 def compressstream(self, it, opts=None):
3470 3470 opts = opts or {}
3471 3471 # zstd level 3 is almost always significantly faster than zlib
3472 3472 # while providing no worse compression. It strikes a good balance
3473 3473 # between speed and compression.
3474 3474 level = opts.get('level', 3)
3475 3475
3476 3476 zstd = self._module
3477 3477 z = zstd.ZstdCompressor(level=level).compressobj()
3478 3478 for chunk in it:
3479 3479 data = z.compress(chunk)
3480 3480 if data:
3481 3481 yield data
3482 3482
3483 3483 yield z.flush()
3484 3484
3485 3485 def decompressorreader(self, fh):
3486 3486 zstd = self._module
3487 3487 dctx = zstd.ZstdDecompressor()
3488 3488 return chunkbuffer(dctx.read_from(fh))
3489 3489
3490 3490 class zstdrevlogcompressor(object):
3491 3491 def __init__(self, zstd, level=3):
3492 3492 # Writing the content size adds a few bytes to the output. However,
3493 3493 # it allows decompression to be more optimal since we can
3494 3494 # pre-allocate a buffer to hold the result.
3495 3495 self._cctx = zstd.ZstdCompressor(level=level,
3496 3496 write_content_size=True)
3497 3497 self._dctx = zstd.ZstdDecompressor()
3498 3498 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3499 3499 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3500 3500
3501 3501 def compress(self, data):
3502 3502 insize = len(data)
3503 3503 # Caller handles empty input case.
3504 3504 assert insize > 0
3505 3505
3506 3506 if insize < 50:
3507 3507 return None
3508 3508
3509 3509 elif insize <= 1000000:
3510 3510 compressed = self._cctx.compress(data)
3511 3511 if len(compressed) < insize:
3512 3512 return compressed
3513 3513 return None
3514 3514 else:
3515 3515 z = self._cctx.compressobj()
3516 3516 chunks = []
3517 3517 pos = 0
3518 3518 while pos < insize:
3519 3519 pos2 = pos + self._compinsize
3520 3520 chunk = z.compress(data[pos:pos2])
3521 3521 if chunk:
3522 3522 chunks.append(chunk)
3523 3523 pos = pos2
3524 3524 chunks.append(z.flush())
3525 3525
3526 3526 if sum(map(len, chunks)) < insize:
3527 3527 return ''.join(chunks)
3528 3528 return None
3529 3529
3530 3530 def decompress(self, data):
3531 3531 insize = len(data)
3532 3532
3533 3533 try:
3534 3534 # This was measured to be faster than other streaming
3535 3535 # decompressors.
3536 3536 dobj = self._dctx.decompressobj()
3537 3537 chunks = []
3538 3538 pos = 0
3539 3539 while pos < insize:
3540 3540 pos2 = pos + self._decompinsize
3541 3541 chunk = dobj.decompress(data[pos:pos2])
3542 3542 if chunk:
3543 3543 chunks.append(chunk)
3544 3544 pos = pos2
3545 3545 # Frame should be exhausted, so no finish() API.
3546 3546
3547 3547 return ''.join(chunks)
3548 3548 except Exception as e:
3549 3549 raise error.RevlogError(_('revlog decompress error: %s') %
3550 3550 str(e))
3551 3551
3552 3552 def revlogcompressor(self, opts=None):
3553 3553 opts = opts or {}
3554 3554 return self.zstdrevlogcompressor(self._module,
3555 3555 level=opts.get('level', 3))
3556 3556
3557 3557 compengines.register(_zstdengine())
3558 3558
3559 3559 # convenient shortcut
3560 3560 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now