##// END OF EJS Templates
util: make sortdict.keys() return a copy...
Martin von Zweigbergk -
r30854:0126e422 stable
parent child Browse files
Show More
@@ -1,3545 +1,3545 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 if pycompat.osname == 'nt':
67 67 from . import windows as platform
68 68 stdout = platform.winstdout(pycompat.stdout)
69 69 else:
70 70 from . import posix as platform
71 71
72 72 _ = i18n._
73 73
74 74 bindunixsocket = platform.bindunixsocket
75 75 cachestat = platform.cachestat
76 76 checkexec = platform.checkexec
77 77 checklink = platform.checklink
78 78 copymode = platform.copymode
79 79 executablepath = platform.executablepath
80 80 expandglobs = platform.expandglobs
81 81 explainexit = platform.explainexit
82 82 findexe = platform.findexe
83 83 gethgcmd = platform.gethgcmd
84 84 getuser = platform.getuser
85 85 getpid = os.getpid
86 86 groupmembers = platform.groupmembers
87 87 groupname = platform.groupname
88 88 hidewindow = platform.hidewindow
89 89 isexec = platform.isexec
90 90 isowner = platform.isowner
91 91 localpath = platform.localpath
92 92 lookupreg = platform.lookupreg
93 93 makedir = platform.makedir
94 94 nlinks = platform.nlinks
95 95 normpath = platform.normpath
96 96 normcase = platform.normcase
97 97 normcasespec = platform.normcasespec
98 98 normcasefallback = platform.normcasefallback
99 99 openhardlinks = platform.openhardlinks
100 100 oslink = platform.oslink
101 101 parsepatchoutput = platform.parsepatchoutput
102 102 pconvert = platform.pconvert
103 103 poll = platform.poll
104 104 popen = platform.popen
105 105 posixfile = platform.posixfile
106 106 quotecommand = platform.quotecommand
107 107 readpipe = platform.readpipe
108 108 rename = platform.rename
109 109 removedirs = platform.removedirs
110 110 samedevice = platform.samedevice
111 111 samefile = platform.samefile
112 112 samestat = platform.samestat
113 113 setbinary = platform.setbinary
114 114 setflags = platform.setflags
115 115 setsignalhandler = platform.setsignalhandler
116 116 shellquote = platform.shellquote
117 117 spawndetached = platform.spawndetached
118 118 split = platform.split
119 119 sshargs = platform.sshargs
120 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
121 121 statisexec = platform.statisexec
122 122 statislink = platform.statislink
123 123 testpid = platform.testpid
124 124 umask = platform.umask
125 125 unlink = platform.unlink
126 126 unlinkpath = platform.unlinkpath
127 127 username = platform.username
128 128
129 129 # Python compatibility
130 130
131 131 _notset = object()
132 132
133 133 # disable Python's problematic floating point timestamps (issue4836)
134 134 # (Python hypocritically says you shouldn't change this behavior in
135 135 # libraries, and sure enough Mercurial is not a library.)
136 136 os.stat_float_times(False)
137 137
138 138 def safehasattr(thing, attr):
139 139 return getattr(thing, attr, _notset) is not _notset
140 140
141 141 def bitsfrom(container):
142 142 bits = 0
143 143 for bit in container:
144 144 bits |= bit
145 145 return bits
146 146
147 147 DIGESTS = {
148 148 'md5': hashlib.md5,
149 149 'sha1': hashlib.sha1,
150 150 'sha512': hashlib.sha512,
151 151 }
152 152 # List of digest types from strongest to weakest
153 153 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
154 154
155 155 for k in DIGESTS_BY_STRENGTH:
156 156 assert k in DIGESTS
157 157
158 158 class digester(object):
159 159 """helper to compute digests.
160 160
161 161 This helper can be used to compute one or more digests given their name.
162 162
163 163 >>> d = digester(['md5', 'sha1'])
164 164 >>> d.update('foo')
165 165 >>> [k for k in sorted(d)]
166 166 ['md5', 'sha1']
167 167 >>> d['md5']
168 168 'acbd18db4cc2f85cedef654fccc4a4d8'
169 169 >>> d['sha1']
170 170 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
171 171 >>> digester.preferred(['md5', 'sha1'])
172 172 'sha1'
173 173 """
174 174
175 175 def __init__(self, digests, s=''):
176 176 self._hashes = {}
177 177 for k in digests:
178 178 if k not in DIGESTS:
179 179 raise Abort(_('unknown digest type: %s') % k)
180 180 self._hashes[k] = DIGESTS[k]()
181 181 if s:
182 182 self.update(s)
183 183
184 184 def update(self, data):
185 185 for h in self._hashes.values():
186 186 h.update(data)
187 187
188 188 def __getitem__(self, key):
189 189 if key not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 return self._hashes[key].hexdigest()
192 192
193 193 def __iter__(self):
194 194 return iter(self._hashes)
195 195
196 196 @staticmethod
197 197 def preferred(supported):
198 198 """returns the strongest digest type in both supported and DIGESTS."""
199 199
200 200 for k in DIGESTS_BY_STRENGTH:
201 201 if k in supported:
202 202 return k
203 203 return None
204 204
205 205 class digestchecker(object):
206 206 """file handle wrapper that additionally checks content against a given
207 207 size and digests.
208 208
209 209 d = digestchecker(fh, size, {'md5': '...'})
210 210
211 211 When multiple digests are given, all of them are validated.
212 212 """
213 213
214 214 def __init__(self, fh, size, digests):
215 215 self._fh = fh
216 216 self._size = size
217 217 self._got = 0
218 218 self._digests = dict(digests)
219 219 self._digester = digester(self._digests.keys())
220 220
221 221 def read(self, length=-1):
222 222 content = self._fh.read(length)
223 223 self._digester.update(content)
224 224 self._got += len(content)
225 225 return content
226 226
227 227 def validate(self):
228 228 if self._size != self._got:
229 229 raise Abort(_('size mismatch: expected %d, got %d') %
230 230 (self._size, self._got))
231 231 for k, v in self._digests.items():
232 232 if v != self._digester[k]:
233 233 # i18n: first parameter is a digest name
234 234 raise Abort(_('%s mismatch: expected %s, got %s') %
235 235 (k, v, self._digester[k]))
236 236
237 237 try:
238 238 buffer = buffer
239 239 except NameError:
240 240 if not pycompat.ispy3:
241 241 def buffer(sliceable, offset=0, length=None):
242 242 if length is not None:
243 243 return sliceable[offset:offset + length]
244 244 return sliceable[offset:]
245 245 else:
246 246 def buffer(sliceable, offset=0, length=None):
247 247 if length is not None:
248 248 return memoryview(sliceable)[offset:offset + length]
249 249 return memoryview(sliceable)[offset:]
250 250
251 251 closefds = pycompat.osname == 'posix'
252 252
253 253 _chunksize = 4096
254 254
255 255 class bufferedinputpipe(object):
256 256 """a manually buffered input pipe
257 257
258 258 Python will not let us use buffered IO and lazy reading with 'polling' at
259 259 the same time. We cannot probe the buffer state and select will not detect
260 260 that data are ready to read if they are already buffered.
261 261
262 262 This class let us work around that by implementing its own buffering
263 263 (allowing efficient readline) while offering a way to know if the buffer is
264 264 empty from the output (allowing collaboration of the buffer with polling).
265 265
266 266 This class lives in the 'util' module because it makes use of the 'os'
267 267 module from the python stdlib.
268 268 """
269 269
270 270 def __init__(self, input):
271 271 self._input = input
272 272 self._buffer = []
273 273 self._eof = False
274 274 self._lenbuf = 0
275 275
276 276 @property
277 277 def hasbuffer(self):
278 278 """True is any data is currently buffered
279 279
280 280 This will be used externally a pre-step for polling IO. If there is
281 281 already data then no polling should be set in place."""
282 282 return bool(self._buffer)
283 283
284 284 @property
285 285 def closed(self):
286 286 return self._input.closed
287 287
288 288 def fileno(self):
289 289 return self._input.fileno()
290 290
291 291 def close(self):
292 292 return self._input.close()
293 293
294 294 def read(self, size):
295 295 while (not self._eof) and (self._lenbuf < size):
296 296 self._fillbuffer()
297 297 return self._frombuffer(size)
298 298
299 299 def readline(self, *args, **kwargs):
300 300 if 1 < len(self._buffer):
301 301 # this should not happen because both read and readline end with a
302 302 # _frombuffer call that collapse it.
303 303 self._buffer = [''.join(self._buffer)]
304 304 self._lenbuf = len(self._buffer[0])
305 305 lfi = -1
306 306 if self._buffer:
307 307 lfi = self._buffer[-1].find('\n')
308 308 while (not self._eof) and lfi < 0:
309 309 self._fillbuffer()
310 310 if self._buffer:
311 311 lfi = self._buffer[-1].find('\n')
312 312 size = lfi + 1
313 313 if lfi < 0: # end of file
314 314 size = self._lenbuf
315 315 elif 1 < len(self._buffer):
316 316 # we need to take previous chunks into account
317 317 size += self._lenbuf - len(self._buffer[-1])
318 318 return self._frombuffer(size)
319 319
320 320 def _frombuffer(self, size):
321 321 """return at most 'size' data from the buffer
322 322
323 323 The data are removed from the buffer."""
324 324 if size == 0 or not self._buffer:
325 325 return ''
326 326 buf = self._buffer[0]
327 327 if 1 < len(self._buffer):
328 328 buf = ''.join(self._buffer)
329 329
330 330 data = buf[:size]
331 331 buf = buf[len(data):]
332 332 if buf:
333 333 self._buffer = [buf]
334 334 self._lenbuf = len(buf)
335 335 else:
336 336 self._buffer = []
337 337 self._lenbuf = 0
338 338 return data
339 339
340 340 def _fillbuffer(self):
341 341 """read data to the buffer"""
342 342 data = os.read(self._input.fileno(), _chunksize)
343 343 if not data:
344 344 self._eof = True
345 345 else:
346 346 self._lenbuf += len(data)
347 347 self._buffer.append(data)
348 348
349 349 def popen2(cmd, env=None, newlines=False):
350 350 # Setting bufsize to -1 lets the system decide the buffer size.
351 351 # The default for bufsize is 0, meaning unbuffered. This leads to
352 352 # poor performance on Mac OS X: http://bugs.python.org/issue4194
353 353 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
354 354 close_fds=closefds,
355 355 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
356 356 universal_newlines=newlines,
357 357 env=env)
358 358 return p.stdin, p.stdout
359 359
360 360 def popen3(cmd, env=None, newlines=False):
361 361 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
362 362 return stdin, stdout, stderr
363 363
364 364 def popen4(cmd, env=None, newlines=False, bufsize=-1):
365 365 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
366 366 close_fds=closefds,
367 367 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
368 368 stderr=subprocess.PIPE,
369 369 universal_newlines=newlines,
370 370 env=env)
371 371 return p.stdin, p.stdout, p.stderr, p
372 372
373 373 def version():
374 374 """Return version information if available."""
375 375 try:
376 376 from . import __version__
377 377 return __version__.version
378 378 except ImportError:
379 379 return 'unknown'
380 380
381 381 def versiontuple(v=None, n=4):
382 382 """Parses a Mercurial version string into an N-tuple.
383 383
384 384 The version string to be parsed is specified with the ``v`` argument.
385 385 If it isn't defined, the current Mercurial version string will be parsed.
386 386
387 387 ``n`` can be 2, 3, or 4. Here is how some version strings map to
388 388 returned values:
389 389
390 390 >>> v = '3.6.1+190-df9b73d2d444'
391 391 >>> versiontuple(v, 2)
392 392 (3, 6)
393 393 >>> versiontuple(v, 3)
394 394 (3, 6, 1)
395 395 >>> versiontuple(v, 4)
396 396 (3, 6, 1, '190-df9b73d2d444')
397 397
398 398 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
399 399 (3, 6, 1, '190-df9b73d2d444+20151118')
400 400
401 401 >>> v = '3.6'
402 402 >>> versiontuple(v, 2)
403 403 (3, 6)
404 404 >>> versiontuple(v, 3)
405 405 (3, 6, None)
406 406 >>> versiontuple(v, 4)
407 407 (3, 6, None, None)
408 408
409 409 >>> v = '3.9-rc'
410 410 >>> versiontuple(v, 2)
411 411 (3, 9)
412 412 >>> versiontuple(v, 3)
413 413 (3, 9, None)
414 414 >>> versiontuple(v, 4)
415 415 (3, 9, None, 'rc')
416 416
417 417 >>> v = '3.9-rc+2-02a8fea4289b'
418 418 >>> versiontuple(v, 2)
419 419 (3, 9)
420 420 >>> versiontuple(v, 3)
421 421 (3, 9, None)
422 422 >>> versiontuple(v, 4)
423 423 (3, 9, None, 'rc+2-02a8fea4289b')
424 424 """
425 425 if not v:
426 426 v = version()
427 427 parts = remod.split('[\+-]', v, 1)
428 428 if len(parts) == 1:
429 429 vparts, extra = parts[0], None
430 430 else:
431 431 vparts, extra = parts
432 432
433 433 vints = []
434 434 for i in vparts.split('.'):
435 435 try:
436 436 vints.append(int(i))
437 437 except ValueError:
438 438 break
439 439 # (3, 6) -> (3, 6, None)
440 440 while len(vints) < 3:
441 441 vints.append(None)
442 442
443 443 if n == 2:
444 444 return (vints[0], vints[1])
445 445 if n == 3:
446 446 return (vints[0], vints[1], vints[2])
447 447 if n == 4:
448 448 return (vints[0], vints[1], vints[2], extra)
449 449
450 450 # used by parsedate
451 451 defaultdateformats = (
452 452 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
453 453 '%Y-%m-%dT%H:%M', # without seconds
454 454 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
455 455 '%Y-%m-%dT%H%M', # without seconds
456 456 '%Y-%m-%d %H:%M:%S', # our common legal variant
457 457 '%Y-%m-%d %H:%M', # without seconds
458 458 '%Y-%m-%d %H%M%S', # without :
459 459 '%Y-%m-%d %H%M', # without seconds
460 460 '%Y-%m-%d %I:%M:%S%p',
461 461 '%Y-%m-%d %H:%M',
462 462 '%Y-%m-%d %I:%M%p',
463 463 '%Y-%m-%d',
464 464 '%m-%d',
465 465 '%m/%d',
466 466 '%m/%d/%y',
467 467 '%m/%d/%Y',
468 468 '%a %b %d %H:%M:%S %Y',
469 469 '%a %b %d %I:%M:%S%p %Y',
470 470 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
471 471 '%b %d %H:%M:%S %Y',
472 472 '%b %d %I:%M:%S%p %Y',
473 473 '%b %d %H:%M:%S',
474 474 '%b %d %I:%M:%S%p',
475 475 '%b %d %H:%M',
476 476 '%b %d %I:%M%p',
477 477 '%b %d %Y',
478 478 '%b %d',
479 479 '%H:%M:%S',
480 480 '%I:%M:%S%p',
481 481 '%H:%M',
482 482 '%I:%M%p',
483 483 )
484 484
485 485 extendeddateformats = defaultdateformats + (
486 486 "%Y",
487 487 "%Y-%m",
488 488 "%b",
489 489 "%b %Y",
490 490 )
491 491
492 492 def cachefunc(func):
493 493 '''cache the result of function calls'''
494 494 # XXX doesn't handle keywords args
495 495 if func.__code__.co_argcount == 0:
496 496 cache = []
497 497 def f():
498 498 if len(cache) == 0:
499 499 cache.append(func())
500 500 return cache[0]
501 501 return f
502 502 cache = {}
503 503 if func.__code__.co_argcount == 1:
504 504 # we gain a small amount of time because
505 505 # we don't need to pack/unpack the list
506 506 def f(arg):
507 507 if arg not in cache:
508 508 cache[arg] = func(arg)
509 509 return cache[arg]
510 510 else:
511 511 def f(*args):
512 512 if args not in cache:
513 513 cache[args] = func(*args)
514 514 return cache[args]
515 515
516 516 return f
517 517
518 518 class sortdict(dict):
519 519 '''a simple sorted dictionary'''
520 520 def __init__(self, data=None):
521 521 self._list = []
522 522 if data:
523 523 self.update(data)
524 524 def copy(self):
525 525 return sortdict(self)
526 526 def __setitem__(self, key, val):
527 527 if key in self:
528 528 self._list.remove(key)
529 529 self._list.append(key)
530 530 dict.__setitem__(self, key, val)
531 531 def __iter__(self):
532 532 return self._list.__iter__()
533 533 def update(self, src):
534 534 if isinstance(src, dict):
535 535 src = src.iteritems()
536 536 for k, v in src:
537 537 self[k] = v
538 538 def clear(self):
539 539 dict.clear(self)
540 540 self._list = []
541 541 def items(self):
542 542 return [(k, self[k]) for k in self._list]
543 543 def __delitem__(self, key):
544 544 dict.__delitem__(self, key)
545 545 self._list.remove(key)
546 546 def pop(self, key, *args, **kwargs):
547 547 dict.pop(self, key, *args, **kwargs)
548 548 try:
549 549 self._list.remove(key)
550 550 except ValueError:
551 551 pass
552 552 def keys(self):
553 return self._list
553 return self._list[:]
554 554 def iterkeys(self):
555 555 return self._list.__iter__()
556 556 def iteritems(self):
557 557 for k in self._list:
558 558 yield k, self[k]
559 559 def insert(self, index, key, val):
560 560 self._list.insert(index, key)
561 561 dict.__setitem__(self, key, val)
562 562 def __repr__(self):
563 563 if not self:
564 564 return '%s()' % self.__class__.__name__
565 565 return '%s(%r)' % (self.__class__.__name__, self.items())
566 566
567 567 class _lrucachenode(object):
568 568 """A node in a doubly linked list.
569 569
570 570 Holds a reference to nodes on either side as well as a key-value
571 571 pair for the dictionary entry.
572 572 """
573 573 __slots__ = (u'next', u'prev', u'key', u'value')
574 574
575 575 def __init__(self):
576 576 self.next = None
577 577 self.prev = None
578 578
579 579 self.key = _notset
580 580 self.value = None
581 581
582 582 def markempty(self):
583 583 """Mark the node as emptied."""
584 584 self.key = _notset
585 585
586 586 class lrucachedict(object):
587 587 """Dict that caches most recent accesses and sets.
588 588
589 589 The dict consists of an actual backing dict - indexed by original
590 590 key - and a doubly linked circular list defining the order of entries in
591 591 the cache.
592 592
593 593 The head node is the newest entry in the cache. If the cache is full,
594 594 we recycle head.prev and make it the new head. Cache accesses result in
595 595 the node being moved to before the existing head and being marked as the
596 596 new head node.
597 597 """
598 598 def __init__(self, max):
599 599 self._cache = {}
600 600
601 601 self._head = head = _lrucachenode()
602 602 head.prev = head
603 603 head.next = head
604 604 self._size = 1
605 605 self._capacity = max
606 606
607 607 def __len__(self):
608 608 return len(self._cache)
609 609
610 610 def __contains__(self, k):
611 611 return k in self._cache
612 612
613 613 def __iter__(self):
614 614 # We don't have to iterate in cache order, but why not.
615 615 n = self._head
616 616 for i in range(len(self._cache)):
617 617 yield n.key
618 618 n = n.next
619 619
620 620 def __getitem__(self, k):
621 621 node = self._cache[k]
622 622 self._movetohead(node)
623 623 return node.value
624 624
625 625 def __setitem__(self, k, v):
626 626 node = self._cache.get(k)
627 627 # Replace existing value and mark as newest.
628 628 if node is not None:
629 629 node.value = v
630 630 self._movetohead(node)
631 631 return
632 632
633 633 if self._size < self._capacity:
634 634 node = self._addcapacity()
635 635 else:
636 636 # Grab the last/oldest item.
637 637 node = self._head.prev
638 638
639 639 # At capacity. Kill the old entry.
640 640 if node.key is not _notset:
641 641 del self._cache[node.key]
642 642
643 643 node.key = k
644 644 node.value = v
645 645 self._cache[k] = node
646 646 # And mark it as newest entry. No need to adjust order since it
647 647 # is already self._head.prev.
648 648 self._head = node
649 649
650 650 def __delitem__(self, k):
651 651 node = self._cache.pop(k)
652 652 node.markempty()
653 653
654 654 # Temporarily mark as newest item before re-adjusting head to make
655 655 # this node the oldest item.
656 656 self._movetohead(node)
657 657 self._head = node.next
658 658
659 659 # Additional dict methods.
660 660
661 661 def get(self, k, default=None):
662 662 try:
663 663 return self._cache[k].value
664 664 except KeyError:
665 665 return default
666 666
667 667 def clear(self):
668 668 n = self._head
669 669 while n.key is not _notset:
670 670 n.markempty()
671 671 n = n.next
672 672
673 673 self._cache.clear()
674 674
675 675 def copy(self):
676 676 result = lrucachedict(self._capacity)
677 677 n = self._head.prev
678 678 # Iterate in oldest-to-newest order, so the copy has the right ordering
679 679 for i in range(len(self._cache)):
680 680 result[n.key] = n.value
681 681 n = n.prev
682 682 return result
683 683
684 684 def _movetohead(self, node):
685 685 """Mark a node as the newest, making it the new head.
686 686
687 687 When a node is accessed, it becomes the freshest entry in the LRU
688 688 list, which is denoted by self._head.
689 689
690 690 Visually, let's make ``N`` the new head node (* denotes head):
691 691
692 692 previous/oldest <-> head <-> next/next newest
693 693
694 694 ----<->--- A* ---<->-----
695 695 | |
696 696 E <-> D <-> N <-> C <-> B
697 697
698 698 To:
699 699
700 700 ----<->--- N* ---<->-----
701 701 | |
702 702 E <-> D <-> C <-> B <-> A
703 703
704 704 This requires the following moves:
705 705
706 706 C.next = D (node.prev.next = node.next)
707 707 D.prev = C (node.next.prev = node.prev)
708 708 E.next = N (head.prev.next = node)
709 709 N.prev = E (node.prev = head.prev)
710 710 N.next = A (node.next = head)
711 711 A.prev = N (head.prev = node)
712 712 """
713 713 head = self._head
714 714 # C.next = D
715 715 node.prev.next = node.next
716 716 # D.prev = C
717 717 node.next.prev = node.prev
718 718 # N.prev = E
719 719 node.prev = head.prev
720 720 # N.next = A
721 721 # It is tempting to do just "head" here, however if node is
722 722 # adjacent to head, this will do bad things.
723 723 node.next = head.prev.next
724 724 # E.next = N
725 725 node.next.prev = node
726 726 # A.prev = N
727 727 node.prev.next = node
728 728
729 729 self._head = node
730 730
731 731 def _addcapacity(self):
732 732 """Add a node to the circular linked list.
733 733
734 734 The new node is inserted before the head node.
735 735 """
736 736 head = self._head
737 737 node = _lrucachenode()
738 738 head.prev.next = node
739 739 node.prev = head.prev
740 740 node.next = head
741 741 head.prev = node
742 742 self._size += 1
743 743 return node
744 744
745 745 def lrucachefunc(func):
746 746 '''cache most recent results of function calls'''
747 747 cache = {}
748 748 order = collections.deque()
749 749 if func.__code__.co_argcount == 1:
750 750 def f(arg):
751 751 if arg not in cache:
752 752 if len(cache) > 20:
753 753 del cache[order.popleft()]
754 754 cache[arg] = func(arg)
755 755 else:
756 756 order.remove(arg)
757 757 order.append(arg)
758 758 return cache[arg]
759 759 else:
760 760 def f(*args):
761 761 if args not in cache:
762 762 if len(cache) > 20:
763 763 del cache[order.popleft()]
764 764 cache[args] = func(*args)
765 765 else:
766 766 order.remove(args)
767 767 order.append(args)
768 768 return cache[args]
769 769
770 770 return f
771 771
772 772 class propertycache(object):
773 773 def __init__(self, func):
774 774 self.func = func
775 775 self.name = func.__name__
776 776 def __get__(self, obj, type=None):
777 777 result = self.func(obj)
778 778 self.cachevalue(obj, result)
779 779 return result
780 780
781 781 def cachevalue(self, obj, value):
782 782 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
783 783 obj.__dict__[self.name] = value
784 784
785 785 def pipefilter(s, cmd):
786 786 '''filter string S through command CMD, returning its output'''
787 787 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
788 788 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
789 789 pout, perr = p.communicate(s)
790 790 return pout
791 791
792 792 def tempfilter(s, cmd):
793 793 '''filter string S through a pair of temporary files with CMD.
794 794 CMD is used as a template to create the real command to be run,
795 795 with the strings INFILE and OUTFILE replaced by the real names of
796 796 the temporary files generated.'''
797 797 inname, outname = None, None
798 798 try:
799 799 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
800 800 fp = os.fdopen(infd, 'wb')
801 801 fp.write(s)
802 802 fp.close()
803 803 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
804 804 os.close(outfd)
805 805 cmd = cmd.replace('INFILE', inname)
806 806 cmd = cmd.replace('OUTFILE', outname)
807 807 code = os.system(cmd)
808 808 if pycompat.sysplatform == 'OpenVMS' and code & 1:
809 809 code = 0
810 810 if code:
811 811 raise Abort(_("command '%s' failed: %s") %
812 812 (cmd, explainexit(code)))
813 813 return readfile(outname)
814 814 finally:
815 815 try:
816 816 if inname:
817 817 os.unlink(inname)
818 818 except OSError:
819 819 pass
820 820 try:
821 821 if outname:
822 822 os.unlink(outname)
823 823 except OSError:
824 824 pass
825 825
826 826 filtertable = {
827 827 'tempfile:': tempfilter,
828 828 'pipe:': pipefilter,
829 829 }
830 830
831 831 def filter(s, cmd):
832 832 "filter a string through a command that transforms its input to its output"
833 833 for name, fn in filtertable.iteritems():
834 834 if cmd.startswith(name):
835 835 return fn(s, cmd[len(name):].lstrip())
836 836 return pipefilter(s, cmd)
837 837
838 838 def binary(s):
839 839 """return true if a string is binary data"""
840 840 return bool(s and '\0' in s)
841 841
842 842 def increasingchunks(source, min=1024, max=65536):
843 843 '''return no less than min bytes per chunk while data remains,
844 844 doubling min after each chunk until it reaches max'''
845 845 def log2(x):
846 846 if not x:
847 847 return 0
848 848 i = 0
849 849 while x:
850 850 x >>= 1
851 851 i += 1
852 852 return i - 1
853 853
854 854 buf = []
855 855 blen = 0
856 856 for chunk in source:
857 857 buf.append(chunk)
858 858 blen += len(chunk)
859 859 if blen >= min:
860 860 if min < max:
861 861 min = min << 1
862 862 nmin = 1 << log2(blen)
863 863 if nmin > min:
864 864 min = nmin
865 865 if min > max:
866 866 min = max
867 867 yield ''.join(buf)
868 868 blen = 0
869 869 buf = []
870 870 if buf:
871 871 yield ''.join(buf)
872 872
873 873 Abort = error.Abort
874 874
875 875 def always(fn):
876 876 return True
877 877
878 878 def never(fn):
879 879 return False
880 880
881 881 def nogc(func):
882 882 """disable garbage collector
883 883
884 884 Python's garbage collector triggers a GC each time a certain number of
885 885 container objects (the number being defined by gc.get_threshold()) are
886 886 allocated even when marked not to be tracked by the collector. Tracking has
887 887 no effect on when GCs are triggered, only on what objects the GC looks
888 888 into. As a workaround, disable GC while building complex (huge)
889 889 containers.
890 890
891 891 This garbage collector issue have been fixed in 2.7.
892 892 """
893 893 if sys.version_info >= (2, 7):
894 894 return func
895 895 def wrapper(*args, **kwargs):
896 896 gcenabled = gc.isenabled()
897 897 gc.disable()
898 898 try:
899 899 return func(*args, **kwargs)
900 900 finally:
901 901 if gcenabled:
902 902 gc.enable()
903 903 return wrapper
904 904
905 905 def pathto(root, n1, n2):
906 906 '''return the relative path from one place to another.
907 907 root should use os.sep to separate directories
908 908 n1 should use os.sep to separate directories
909 909 n2 should use "/" to separate directories
910 910 returns an os.sep-separated path.
911 911
912 912 If n1 is a relative path, it's assumed it's
913 913 relative to root.
914 914 n2 should always be relative to root.
915 915 '''
916 916 if not n1:
917 917 return localpath(n2)
918 918 if os.path.isabs(n1):
919 919 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
920 920 return os.path.join(root, localpath(n2))
921 921 n2 = '/'.join((pconvert(root), n2))
922 922 a, b = splitpath(n1), n2.split('/')
923 923 a.reverse()
924 924 b.reverse()
925 925 while a and b and a[-1] == b[-1]:
926 926 a.pop()
927 927 b.pop()
928 928 b.reverse()
929 929 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
930 930
931 931 def mainfrozen():
932 932 """return True if we are a frozen executable.
933 933
934 934 The code supports py2exe (most common, Windows only) and tools/freeze
935 935 (portable, not much used).
936 936 """
937 937 return (safehasattr(sys, "frozen") or # new py2exe
938 938 safehasattr(sys, "importers") or # old py2exe
939 939 imp.is_frozen(u"__main__")) # tools/freeze
940 940
941 941 # the location of data files matching the source code
942 942 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
943 943 # executable version (py2exe) doesn't support __file__
944 944 datapath = os.path.dirname(pycompat.sysexecutable)
945 945 else:
946 946 datapath = os.path.dirname(__file__)
947 947
948 948 if not isinstance(datapath, bytes):
949 949 datapath = pycompat.fsencode(datapath)
950 950
951 951 i18n.setdatapath(datapath)
952 952
953 953 _hgexecutable = None
954 954
955 955 def hgexecutable():
956 956 """return location of the 'hg' executable.
957 957
958 958 Defaults to $HG or 'hg' in the search path.
959 959 """
960 960 if _hgexecutable is None:
961 961 hg = encoding.environ.get('HG')
962 962 mainmod = sys.modules['__main__']
963 963 if hg:
964 964 _sethgexecutable(hg)
965 965 elif mainfrozen():
966 966 if getattr(sys, 'frozen', None) == 'macosx_app':
967 967 # Env variable set by py2app
968 968 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
969 969 else:
970 970 _sethgexecutable(pycompat.sysexecutable)
971 971 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
972 972 _sethgexecutable(mainmod.__file__)
973 973 else:
974 974 exe = findexe('hg') or os.path.basename(sys.argv[0])
975 975 _sethgexecutable(exe)
976 976 return _hgexecutable
977 977
978 978 def _sethgexecutable(path):
979 979 """set location of the 'hg' executable"""
980 980 global _hgexecutable
981 981 _hgexecutable = path
982 982
983 983 def _isstdout(f):
984 984 fileno = getattr(f, 'fileno', None)
985 985 return fileno and fileno() == sys.__stdout__.fileno()
986 986
987 987 def shellenviron(environ=None):
988 988 """return environ with optional override, useful for shelling out"""
989 989 def py2shell(val):
990 990 'convert python object into string that is useful to shell'
991 991 if val is None or val is False:
992 992 return '0'
993 993 if val is True:
994 994 return '1'
995 995 return str(val)
996 996 env = dict(encoding.environ)
997 997 if environ:
998 998 env.update((k, py2shell(v)) for k, v in environ.iteritems())
999 999 env['HG'] = hgexecutable()
1000 1000 return env
1001 1001
1002 1002 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
1003 1003 '''enhanced shell command execution.
1004 1004 run with environment maybe modified, maybe in different dir.
1005 1005
1006 1006 if command fails and onerr is None, return status, else raise onerr
1007 1007 object as exception.
1008 1008
1009 1009 if out is specified, it is assumed to be a file-like object that has a
1010 1010 write() method. stdout and stderr will be redirected to out.'''
1011 1011 try:
1012 1012 stdout.flush()
1013 1013 except Exception:
1014 1014 pass
1015 1015 origcmd = cmd
1016 1016 cmd = quotecommand(cmd)
1017 1017 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1018 1018 and sys.version_info[1] < 7):
1019 1019 # subprocess kludge to work around issues in half-baked Python
1020 1020 # ports, notably bichued/python:
1021 1021 if not cwd is None:
1022 1022 os.chdir(cwd)
1023 1023 rc = os.system(cmd)
1024 1024 else:
1025 1025 env = shellenviron(environ)
1026 1026 if out is None or _isstdout(out):
1027 1027 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1028 1028 env=env, cwd=cwd)
1029 1029 else:
1030 1030 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1031 1031 env=env, cwd=cwd, stdout=subprocess.PIPE,
1032 1032 stderr=subprocess.STDOUT)
1033 1033 for line in iter(proc.stdout.readline, ''):
1034 1034 out.write(line)
1035 1035 proc.wait()
1036 1036 rc = proc.returncode
1037 1037 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1038 1038 rc = 0
1039 1039 if rc and onerr:
1040 1040 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1041 1041 explainexit(rc)[0])
1042 1042 if errprefix:
1043 1043 errmsg = '%s: %s' % (errprefix, errmsg)
1044 1044 raise onerr(errmsg)
1045 1045 return rc
1046 1046
1047 1047 def checksignature(func):
1048 1048 '''wrap a function with code to check for calling errors'''
1049 1049 def check(*args, **kwargs):
1050 1050 try:
1051 1051 return func(*args, **kwargs)
1052 1052 except TypeError:
1053 1053 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1054 1054 raise error.SignatureError
1055 1055 raise
1056 1056
1057 1057 return check
1058 1058
1059 1059 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1060 1060 '''copy a file, preserving mode and optionally other stat info like
1061 1061 atime/mtime
1062 1062
1063 1063 checkambig argument is used with filestat, and is useful only if
1064 1064 destination file is guarded by any lock (e.g. repo.lock or
1065 1065 repo.wlock).
1066 1066
1067 1067 copystat and checkambig should be exclusive.
1068 1068 '''
1069 1069 assert not (copystat and checkambig)
1070 1070 oldstat = None
1071 1071 if os.path.lexists(dest):
1072 1072 if checkambig:
1073 1073 oldstat = checkambig and filestat(dest)
1074 1074 unlink(dest)
1075 1075 # hardlinks are problematic on CIFS, quietly ignore this flag
1076 1076 # until we find a way to work around it cleanly (issue4546)
1077 1077 if False and hardlink:
1078 1078 try:
1079 1079 oslink(src, dest)
1080 1080 return
1081 1081 except (IOError, OSError):
1082 1082 pass # fall back to normal copy
1083 1083 if os.path.islink(src):
1084 1084 os.symlink(os.readlink(src), dest)
1085 1085 # copytime is ignored for symlinks, but in general copytime isn't needed
1086 1086 # for them anyway
1087 1087 else:
1088 1088 try:
1089 1089 shutil.copyfile(src, dest)
1090 1090 if copystat:
1091 1091 # copystat also copies mode
1092 1092 shutil.copystat(src, dest)
1093 1093 else:
1094 1094 shutil.copymode(src, dest)
1095 1095 if oldstat and oldstat.stat:
1096 1096 newstat = filestat(dest)
1097 1097 if newstat.isambig(oldstat):
1098 1098 # stat of copied file is ambiguous to original one
1099 1099 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1100 1100 os.utime(dest, (advanced, advanced))
1101 1101 except shutil.Error as inst:
1102 1102 raise Abort(str(inst))
1103 1103
1104 1104 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1105 1105 """Copy a directory tree using hardlinks if possible."""
1106 1106 num = 0
1107 1107
1108 1108 if hardlink is None:
1109 1109 hardlink = (os.stat(src).st_dev ==
1110 1110 os.stat(os.path.dirname(dst)).st_dev)
1111 1111 if hardlink:
1112 1112 topic = _('linking')
1113 1113 else:
1114 1114 topic = _('copying')
1115 1115
1116 1116 if os.path.isdir(src):
1117 1117 os.mkdir(dst)
1118 1118 for name, kind in osutil.listdir(src):
1119 1119 srcname = os.path.join(src, name)
1120 1120 dstname = os.path.join(dst, name)
1121 1121 def nprog(t, pos):
1122 1122 if pos is not None:
1123 1123 return progress(t, pos + num)
1124 1124 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1125 1125 num += n
1126 1126 else:
1127 1127 if hardlink:
1128 1128 try:
1129 1129 oslink(src, dst)
1130 1130 except (IOError, OSError):
1131 1131 hardlink = False
1132 1132 shutil.copy(src, dst)
1133 1133 else:
1134 1134 shutil.copy(src, dst)
1135 1135 num += 1
1136 1136 progress(topic, num)
1137 1137 progress(topic, None)
1138 1138
1139 1139 return hardlink, num
1140 1140
1141 1141 _winreservednames = '''con prn aux nul
1142 1142 com1 com2 com3 com4 com5 com6 com7 com8 com9
1143 1143 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1144 1144 _winreservedchars = ':*?"<>|'
1145 1145 def checkwinfilename(path):
1146 1146 r'''Check that the base-relative path is a valid filename on Windows.
1147 1147 Returns None if the path is ok, or a UI string describing the problem.
1148 1148
1149 1149 >>> checkwinfilename("just/a/normal/path")
1150 1150 >>> checkwinfilename("foo/bar/con.xml")
1151 1151 "filename contains 'con', which is reserved on Windows"
1152 1152 >>> checkwinfilename("foo/con.xml/bar")
1153 1153 "filename contains 'con', which is reserved on Windows"
1154 1154 >>> checkwinfilename("foo/bar/xml.con")
1155 1155 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1156 1156 "filename contains 'AUX', which is reserved on Windows"
1157 1157 >>> checkwinfilename("foo/bar/bla:.txt")
1158 1158 "filename contains ':', which is reserved on Windows"
1159 1159 >>> checkwinfilename("foo/bar/b\07la.txt")
1160 1160 "filename contains '\\x07', which is invalid on Windows"
1161 1161 >>> checkwinfilename("foo/bar/bla ")
1162 1162 "filename ends with ' ', which is not allowed on Windows"
1163 1163 >>> checkwinfilename("../bar")
1164 1164 >>> checkwinfilename("foo\\")
1165 1165 "filename ends with '\\', which is invalid on Windows"
1166 1166 >>> checkwinfilename("foo\\/bar")
1167 1167 "directory name ends with '\\', which is invalid on Windows"
1168 1168 '''
1169 1169 if path.endswith('\\'):
1170 1170 return _("filename ends with '\\', which is invalid on Windows")
1171 1171 if '\\/' in path:
1172 1172 return _("directory name ends with '\\', which is invalid on Windows")
1173 1173 for n in path.replace('\\', '/').split('/'):
1174 1174 if not n:
1175 1175 continue
1176 1176 for c in n:
1177 1177 if c in _winreservedchars:
1178 1178 return _("filename contains '%s', which is reserved "
1179 1179 "on Windows") % c
1180 1180 if ord(c) <= 31:
1181 1181 return _("filename contains %r, which is invalid "
1182 1182 "on Windows") % c
1183 1183 base = n.split('.')[0]
1184 1184 if base and base.lower() in _winreservednames:
1185 1185 return _("filename contains '%s', which is reserved "
1186 1186 "on Windows") % base
1187 1187 t = n[-1]
1188 1188 if t in '. ' and n not in '..':
1189 1189 return _("filename ends with '%s', which is not allowed "
1190 1190 "on Windows") % t
1191 1191
1192 1192 if pycompat.osname == 'nt':
1193 1193 checkosfilename = checkwinfilename
1194 1194 else:
1195 1195 checkosfilename = platform.checkosfilename
1196 1196
1197 1197 def makelock(info, pathname):
1198 1198 try:
1199 1199 return os.symlink(info, pathname)
1200 1200 except OSError as why:
1201 1201 if why.errno == errno.EEXIST:
1202 1202 raise
1203 1203 except AttributeError: # no symlink in os
1204 1204 pass
1205 1205
1206 1206 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1207 1207 os.write(ld, info)
1208 1208 os.close(ld)
1209 1209
1210 1210 def readlock(pathname):
1211 1211 try:
1212 1212 return os.readlink(pathname)
1213 1213 except OSError as why:
1214 1214 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1215 1215 raise
1216 1216 except AttributeError: # no symlink in os
1217 1217 pass
1218 1218 fp = posixfile(pathname)
1219 1219 r = fp.read()
1220 1220 fp.close()
1221 1221 return r
1222 1222
1223 1223 def fstat(fp):
1224 1224 '''stat file object that may not have fileno method.'''
1225 1225 try:
1226 1226 return os.fstat(fp.fileno())
1227 1227 except AttributeError:
1228 1228 return os.stat(fp.name)
1229 1229
1230 1230 # File system features
1231 1231
1232 1232 def fscasesensitive(path):
1233 1233 """
1234 1234 Return true if the given path is on a case-sensitive filesystem
1235 1235
1236 1236 Requires a path (like /foo/.hg) ending with a foldable final
1237 1237 directory component.
1238 1238 """
1239 1239 s1 = os.lstat(path)
1240 1240 d, b = os.path.split(path)
1241 1241 b2 = b.upper()
1242 1242 if b == b2:
1243 1243 b2 = b.lower()
1244 1244 if b == b2:
1245 1245 return True # no evidence against case sensitivity
1246 1246 p2 = os.path.join(d, b2)
1247 1247 try:
1248 1248 s2 = os.lstat(p2)
1249 1249 if s2 == s1:
1250 1250 return False
1251 1251 return True
1252 1252 except OSError:
1253 1253 return True
1254 1254
1255 1255 try:
1256 1256 import re2
1257 1257 _re2 = None
1258 1258 except ImportError:
1259 1259 _re2 = False
1260 1260
1261 1261 class _re(object):
1262 1262 def _checkre2(self):
1263 1263 global _re2
1264 1264 try:
1265 1265 # check if match works, see issue3964
1266 1266 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1267 1267 except ImportError:
1268 1268 _re2 = False
1269 1269
1270 1270 def compile(self, pat, flags=0):
1271 1271 '''Compile a regular expression, using re2 if possible
1272 1272
1273 1273 For best performance, use only re2-compatible regexp features. The
1274 1274 only flags from the re module that are re2-compatible are
1275 1275 IGNORECASE and MULTILINE.'''
1276 1276 if _re2 is None:
1277 1277 self._checkre2()
1278 1278 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1279 1279 if flags & remod.IGNORECASE:
1280 1280 pat = '(?i)' + pat
1281 1281 if flags & remod.MULTILINE:
1282 1282 pat = '(?m)' + pat
1283 1283 try:
1284 1284 return re2.compile(pat)
1285 1285 except re2.error:
1286 1286 pass
1287 1287 return remod.compile(pat, flags)
1288 1288
1289 1289 @propertycache
1290 1290 def escape(self):
1291 1291 '''Return the version of escape corresponding to self.compile.
1292 1292
1293 1293 This is imperfect because whether re2 or re is used for a particular
1294 1294 function depends on the flags, etc, but it's the best we can do.
1295 1295 '''
1296 1296 global _re2
1297 1297 if _re2 is None:
1298 1298 self._checkre2()
1299 1299 if _re2:
1300 1300 return re2.escape
1301 1301 else:
1302 1302 return remod.escape
1303 1303
1304 1304 re = _re()
1305 1305
1306 1306 _fspathcache = {}
1307 1307 def fspath(name, root):
1308 1308 '''Get name in the case stored in the filesystem
1309 1309
1310 1310 The name should be relative to root, and be normcase-ed for efficiency.
1311 1311
1312 1312 Note that this function is unnecessary, and should not be
1313 1313 called, for case-sensitive filesystems (simply because it's expensive).
1314 1314
1315 1315 The root should be normcase-ed, too.
1316 1316 '''
1317 1317 def _makefspathcacheentry(dir):
1318 1318 return dict((normcase(n), n) for n in os.listdir(dir))
1319 1319
1320 1320 seps = pycompat.ossep
1321 1321 if pycompat.osaltsep:
1322 1322 seps = seps + pycompat.osaltsep
1323 1323 # Protect backslashes. This gets silly very quickly.
1324 1324 seps.replace('\\','\\\\')
1325 1325 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1326 1326 dir = os.path.normpath(root)
1327 1327 result = []
1328 1328 for part, sep in pattern.findall(name):
1329 1329 if sep:
1330 1330 result.append(sep)
1331 1331 continue
1332 1332
1333 1333 if dir not in _fspathcache:
1334 1334 _fspathcache[dir] = _makefspathcacheentry(dir)
1335 1335 contents = _fspathcache[dir]
1336 1336
1337 1337 found = contents.get(part)
1338 1338 if not found:
1339 1339 # retry "once per directory" per "dirstate.walk" which
1340 1340 # may take place for each patches of "hg qpush", for example
1341 1341 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1342 1342 found = contents.get(part)
1343 1343
1344 1344 result.append(found or part)
1345 1345 dir = os.path.join(dir, part)
1346 1346
1347 1347 return ''.join(result)
1348 1348
1349 1349 def checknlink(testfile):
1350 1350 '''check whether hardlink count reporting works properly'''
1351 1351
1352 1352 # testfile may be open, so we need a separate file for checking to
1353 1353 # work around issue2543 (or testfile may get lost on Samba shares)
1354 1354 f1 = testfile + ".hgtmp1"
1355 1355 if os.path.lexists(f1):
1356 1356 return False
1357 1357 try:
1358 1358 posixfile(f1, 'w').close()
1359 1359 except IOError:
1360 1360 try:
1361 1361 os.unlink(f1)
1362 1362 except OSError:
1363 1363 pass
1364 1364 return False
1365 1365
1366 1366 f2 = testfile + ".hgtmp2"
1367 1367 fd = None
1368 1368 try:
1369 1369 oslink(f1, f2)
1370 1370 # nlinks() may behave differently for files on Windows shares if
1371 1371 # the file is open.
1372 1372 fd = posixfile(f2)
1373 1373 return nlinks(f2) > 1
1374 1374 except OSError:
1375 1375 return False
1376 1376 finally:
1377 1377 if fd is not None:
1378 1378 fd.close()
1379 1379 for f in (f1, f2):
1380 1380 try:
1381 1381 os.unlink(f)
1382 1382 except OSError:
1383 1383 pass
1384 1384
1385 1385 def endswithsep(path):
1386 1386 '''Check path ends with os.sep or os.altsep.'''
1387 1387 return (path.endswith(pycompat.ossep)
1388 1388 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1389 1389
1390 1390 def splitpath(path):
1391 1391 '''Split path by os.sep.
1392 1392 Note that this function does not use os.altsep because this is
1393 1393 an alternative of simple "xxx.split(os.sep)".
1394 1394 It is recommended to use os.path.normpath() before using this
1395 1395 function if need.'''
1396 1396 return path.split(pycompat.ossep)
1397 1397
1398 1398 def gui():
1399 1399 '''Are we running in a GUI?'''
1400 1400 if pycompat.sysplatform == 'darwin':
1401 1401 if 'SSH_CONNECTION' in encoding.environ:
1402 1402 # handle SSH access to a box where the user is logged in
1403 1403 return False
1404 1404 elif getattr(osutil, 'isgui', None):
1405 1405 # check if a CoreGraphics session is available
1406 1406 return osutil.isgui()
1407 1407 else:
1408 1408 # pure build; use a safe default
1409 1409 return True
1410 1410 else:
1411 1411 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1412 1412
1413 1413 def mktempcopy(name, emptyok=False, createmode=None):
1414 1414 """Create a temporary file with the same contents from name
1415 1415
1416 1416 The permission bits are copied from the original file.
1417 1417
1418 1418 If the temporary file is going to be truncated immediately, you
1419 1419 can use emptyok=True as an optimization.
1420 1420
1421 1421 Returns the name of the temporary file.
1422 1422 """
1423 1423 d, fn = os.path.split(name)
1424 1424 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1425 1425 os.close(fd)
1426 1426 # Temporary files are created with mode 0600, which is usually not
1427 1427 # what we want. If the original file already exists, just copy
1428 1428 # its mode. Otherwise, manually obey umask.
1429 1429 copymode(name, temp, createmode)
1430 1430 if emptyok:
1431 1431 return temp
1432 1432 try:
1433 1433 try:
1434 1434 ifp = posixfile(name, "rb")
1435 1435 except IOError as inst:
1436 1436 if inst.errno == errno.ENOENT:
1437 1437 return temp
1438 1438 if not getattr(inst, 'filename', None):
1439 1439 inst.filename = name
1440 1440 raise
1441 1441 ofp = posixfile(temp, "wb")
1442 1442 for chunk in filechunkiter(ifp):
1443 1443 ofp.write(chunk)
1444 1444 ifp.close()
1445 1445 ofp.close()
1446 1446 except: # re-raises
1447 1447 try: os.unlink(temp)
1448 1448 except OSError: pass
1449 1449 raise
1450 1450 return temp
1451 1451
1452 1452 class filestat(object):
1453 1453 """help to exactly detect change of a file
1454 1454
1455 1455 'stat' attribute is result of 'os.stat()' if specified 'path'
1456 1456 exists. Otherwise, it is None. This can avoid preparative
1457 1457 'exists()' examination on client side of this class.
1458 1458 """
1459 1459 def __init__(self, path):
1460 1460 try:
1461 1461 self.stat = os.stat(path)
1462 1462 except OSError as err:
1463 1463 if err.errno != errno.ENOENT:
1464 1464 raise
1465 1465 self.stat = None
1466 1466
1467 1467 __hash__ = object.__hash__
1468 1468
1469 1469 def __eq__(self, old):
1470 1470 try:
1471 1471 # if ambiguity between stat of new and old file is
1472 1472 # avoided, comparison of size, ctime and mtime is enough
1473 1473 # to exactly detect change of a file regardless of platform
1474 1474 return (self.stat.st_size == old.stat.st_size and
1475 1475 self.stat.st_ctime == old.stat.st_ctime and
1476 1476 self.stat.st_mtime == old.stat.st_mtime)
1477 1477 except AttributeError:
1478 1478 return False
1479 1479
1480 1480 def isambig(self, old):
1481 1481 """Examine whether new (= self) stat is ambiguous against old one
1482 1482
1483 1483 "S[N]" below means stat of a file at N-th change:
1484 1484
1485 1485 - S[n-1].ctime < S[n].ctime: can detect change of a file
1486 1486 - S[n-1].ctime == S[n].ctime
1487 1487 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1488 1488 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1489 1489 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1490 1490 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1491 1491
1492 1492 Case (*2) above means that a file was changed twice or more at
1493 1493 same time in sec (= S[n-1].ctime), and comparison of timestamp
1494 1494 is ambiguous.
1495 1495
1496 1496 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1497 1497 timestamp is ambiguous".
1498 1498
1499 1499 But advancing mtime only in case (*2) doesn't work as
1500 1500 expected, because naturally advanced S[n].mtime in case (*1)
1501 1501 might be equal to manually advanced S[n-1 or earlier].mtime.
1502 1502
1503 1503 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1504 1504 treated as ambiguous regardless of mtime, to avoid overlooking
1505 1505 by confliction between such mtime.
1506 1506
1507 1507 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1508 1508 S[n].mtime", even if size of a file isn't changed.
1509 1509 """
1510 1510 try:
1511 1511 return (self.stat.st_ctime == old.stat.st_ctime)
1512 1512 except AttributeError:
1513 1513 return False
1514 1514
1515 1515 def avoidambig(self, path, old):
1516 1516 """Change file stat of specified path to avoid ambiguity
1517 1517
1518 1518 'old' should be previous filestat of 'path'.
1519 1519
1520 1520 This skips avoiding ambiguity, if a process doesn't have
1521 1521 appropriate privileges for 'path'.
1522 1522 """
1523 1523 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1524 1524 try:
1525 1525 os.utime(path, (advanced, advanced))
1526 1526 except OSError as inst:
1527 1527 if inst.errno == errno.EPERM:
1528 1528 # utime() on the file created by another user causes EPERM,
1529 1529 # if a process doesn't have appropriate privileges
1530 1530 return
1531 1531 raise
1532 1532
1533 1533 def __ne__(self, other):
1534 1534 return not self == other
1535 1535
1536 1536 class atomictempfile(object):
1537 1537 '''writable file object that atomically updates a file
1538 1538
1539 1539 All writes will go to a temporary copy of the original file. Call
1540 1540 close() when you are done writing, and atomictempfile will rename
1541 1541 the temporary copy to the original name, making the changes
1542 1542 visible. If the object is destroyed without being closed, all your
1543 1543 writes are discarded.
1544 1544
1545 1545 checkambig argument of constructor is used with filestat, and is
1546 1546 useful only if target file is guarded by any lock (e.g. repo.lock
1547 1547 or repo.wlock).
1548 1548 '''
1549 1549 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1550 1550 self.__name = name # permanent name
1551 1551 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1552 1552 createmode=createmode)
1553 1553 self._fp = posixfile(self._tempname, mode)
1554 1554 self._checkambig = checkambig
1555 1555
1556 1556 # delegated methods
1557 1557 self.read = self._fp.read
1558 1558 self.write = self._fp.write
1559 1559 self.seek = self._fp.seek
1560 1560 self.tell = self._fp.tell
1561 1561 self.fileno = self._fp.fileno
1562 1562
1563 1563 def close(self):
1564 1564 if not self._fp.closed:
1565 1565 self._fp.close()
1566 1566 filename = localpath(self.__name)
1567 1567 oldstat = self._checkambig and filestat(filename)
1568 1568 if oldstat and oldstat.stat:
1569 1569 rename(self._tempname, filename)
1570 1570 newstat = filestat(filename)
1571 1571 if newstat.isambig(oldstat):
1572 1572 # stat of changed file is ambiguous to original one
1573 1573 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1574 1574 os.utime(filename, (advanced, advanced))
1575 1575 else:
1576 1576 rename(self._tempname, filename)
1577 1577
1578 1578 def discard(self):
1579 1579 if not self._fp.closed:
1580 1580 try:
1581 1581 os.unlink(self._tempname)
1582 1582 except OSError:
1583 1583 pass
1584 1584 self._fp.close()
1585 1585
1586 1586 def __del__(self):
1587 1587 if safehasattr(self, '_fp'): # constructor actually did something
1588 1588 self.discard()
1589 1589
1590 1590 def __enter__(self):
1591 1591 return self
1592 1592
1593 1593 def __exit__(self, exctype, excvalue, traceback):
1594 1594 if exctype is not None:
1595 1595 self.discard()
1596 1596 else:
1597 1597 self.close()
1598 1598
1599 1599 def makedirs(name, mode=None, notindexed=False):
1600 1600 """recursive directory creation with parent mode inheritance
1601 1601
1602 1602 Newly created directories are marked as "not to be indexed by
1603 1603 the content indexing service", if ``notindexed`` is specified
1604 1604 for "write" mode access.
1605 1605 """
1606 1606 try:
1607 1607 makedir(name, notindexed)
1608 1608 except OSError as err:
1609 1609 if err.errno == errno.EEXIST:
1610 1610 return
1611 1611 if err.errno != errno.ENOENT or not name:
1612 1612 raise
1613 1613 parent = os.path.dirname(os.path.abspath(name))
1614 1614 if parent == name:
1615 1615 raise
1616 1616 makedirs(parent, mode, notindexed)
1617 1617 try:
1618 1618 makedir(name, notindexed)
1619 1619 except OSError as err:
1620 1620 # Catch EEXIST to handle races
1621 1621 if err.errno == errno.EEXIST:
1622 1622 return
1623 1623 raise
1624 1624 if mode is not None:
1625 1625 os.chmod(name, mode)
1626 1626
1627 1627 def readfile(path):
1628 1628 with open(path, 'rb') as fp:
1629 1629 return fp.read()
1630 1630
1631 1631 def writefile(path, text):
1632 1632 with open(path, 'wb') as fp:
1633 1633 fp.write(text)
1634 1634
1635 1635 def appendfile(path, text):
1636 1636 with open(path, 'ab') as fp:
1637 1637 fp.write(text)
1638 1638
1639 1639 class chunkbuffer(object):
1640 1640 """Allow arbitrary sized chunks of data to be efficiently read from an
1641 1641 iterator over chunks of arbitrary size."""
1642 1642
1643 1643 def __init__(self, in_iter):
1644 1644 """in_iter is the iterator that's iterating over the input chunks.
1645 1645 targetsize is how big a buffer to try to maintain."""
1646 1646 def splitbig(chunks):
1647 1647 for chunk in chunks:
1648 1648 if len(chunk) > 2**20:
1649 1649 pos = 0
1650 1650 while pos < len(chunk):
1651 1651 end = pos + 2 ** 18
1652 1652 yield chunk[pos:end]
1653 1653 pos = end
1654 1654 else:
1655 1655 yield chunk
1656 1656 self.iter = splitbig(in_iter)
1657 1657 self._queue = collections.deque()
1658 1658 self._chunkoffset = 0
1659 1659
1660 1660 def read(self, l=None):
1661 1661 """Read L bytes of data from the iterator of chunks of data.
1662 1662 Returns less than L bytes if the iterator runs dry.
1663 1663
1664 1664 If size parameter is omitted, read everything"""
1665 1665 if l is None:
1666 1666 return ''.join(self.iter)
1667 1667
1668 1668 left = l
1669 1669 buf = []
1670 1670 queue = self._queue
1671 1671 while left > 0:
1672 1672 # refill the queue
1673 1673 if not queue:
1674 1674 target = 2**18
1675 1675 for chunk in self.iter:
1676 1676 queue.append(chunk)
1677 1677 target -= len(chunk)
1678 1678 if target <= 0:
1679 1679 break
1680 1680 if not queue:
1681 1681 break
1682 1682
1683 1683 # The easy way to do this would be to queue.popleft(), modify the
1684 1684 # chunk (if necessary), then queue.appendleft(). However, for cases
1685 1685 # where we read partial chunk content, this incurs 2 dequeue
1686 1686 # mutations and creates a new str for the remaining chunk in the
1687 1687 # queue. Our code below avoids this overhead.
1688 1688
1689 1689 chunk = queue[0]
1690 1690 chunkl = len(chunk)
1691 1691 offset = self._chunkoffset
1692 1692
1693 1693 # Use full chunk.
1694 1694 if offset == 0 and left >= chunkl:
1695 1695 left -= chunkl
1696 1696 queue.popleft()
1697 1697 buf.append(chunk)
1698 1698 # self._chunkoffset remains at 0.
1699 1699 continue
1700 1700
1701 1701 chunkremaining = chunkl - offset
1702 1702
1703 1703 # Use all of unconsumed part of chunk.
1704 1704 if left >= chunkremaining:
1705 1705 left -= chunkremaining
1706 1706 queue.popleft()
1707 1707 # offset == 0 is enabled by block above, so this won't merely
1708 1708 # copy via ``chunk[0:]``.
1709 1709 buf.append(chunk[offset:])
1710 1710 self._chunkoffset = 0
1711 1711
1712 1712 # Partial chunk needed.
1713 1713 else:
1714 1714 buf.append(chunk[offset:offset + left])
1715 1715 self._chunkoffset += left
1716 1716 left -= chunkremaining
1717 1717
1718 1718 return ''.join(buf)
1719 1719
1720 1720 def filechunkiter(f, size=131072, limit=None):
1721 1721 """Create a generator that produces the data in the file size
1722 1722 (default 131072) bytes at a time, up to optional limit (default is
1723 1723 to read all data). Chunks may be less than size bytes if the
1724 1724 chunk is the last chunk in the file, or the file is a socket or
1725 1725 some other type of file that sometimes reads less data than is
1726 1726 requested."""
1727 1727 assert size >= 0
1728 1728 assert limit is None or limit >= 0
1729 1729 while True:
1730 1730 if limit is None:
1731 1731 nbytes = size
1732 1732 else:
1733 1733 nbytes = min(limit, size)
1734 1734 s = nbytes and f.read(nbytes)
1735 1735 if not s:
1736 1736 break
1737 1737 if limit:
1738 1738 limit -= len(s)
1739 1739 yield s
1740 1740
1741 1741 def makedate(timestamp=None):
1742 1742 '''Return a unix timestamp (or the current time) as a (unixtime,
1743 1743 offset) tuple based off the local timezone.'''
1744 1744 if timestamp is None:
1745 1745 timestamp = time.time()
1746 1746 if timestamp < 0:
1747 1747 hint = _("check your clock")
1748 1748 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1749 1749 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1750 1750 datetime.datetime.fromtimestamp(timestamp))
1751 1751 tz = delta.days * 86400 + delta.seconds
1752 1752 return timestamp, tz
1753 1753
1754 1754 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1755 1755 """represent a (unixtime, offset) tuple as a localized time.
1756 1756 unixtime is seconds since the epoch, and offset is the time zone's
1757 1757 number of seconds away from UTC.
1758 1758
1759 1759 >>> datestr((0, 0))
1760 1760 'Thu Jan 01 00:00:00 1970 +0000'
1761 1761 >>> datestr((42, 0))
1762 1762 'Thu Jan 01 00:00:42 1970 +0000'
1763 1763 >>> datestr((-42, 0))
1764 1764 'Wed Dec 31 23:59:18 1969 +0000'
1765 1765 >>> datestr((0x7fffffff, 0))
1766 1766 'Tue Jan 19 03:14:07 2038 +0000'
1767 1767 >>> datestr((-0x80000000, 0))
1768 1768 'Fri Dec 13 20:45:52 1901 +0000'
1769 1769 """
1770 1770 t, tz = date or makedate()
1771 1771 if "%1" in format or "%2" in format or "%z" in format:
1772 1772 sign = (tz > 0) and "-" or "+"
1773 1773 minutes = abs(tz) // 60
1774 1774 q, r = divmod(minutes, 60)
1775 1775 format = format.replace("%z", "%1%2")
1776 1776 format = format.replace("%1", "%c%02d" % (sign, q))
1777 1777 format = format.replace("%2", "%02d" % r)
1778 1778 d = t - tz
1779 1779 if d > 0x7fffffff:
1780 1780 d = 0x7fffffff
1781 1781 elif d < -0x80000000:
1782 1782 d = -0x80000000
1783 1783 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1784 1784 # because they use the gmtime() system call which is buggy on Windows
1785 1785 # for negative values.
1786 1786 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1787 1787 s = t.strftime(format)
1788 1788 return s
1789 1789
1790 1790 def shortdate(date=None):
1791 1791 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1792 1792 return datestr(date, format='%Y-%m-%d')
1793 1793
1794 1794 def parsetimezone(s):
1795 1795 """find a trailing timezone, if any, in string, and return a
1796 1796 (offset, remainder) pair"""
1797 1797
1798 1798 if s.endswith("GMT") or s.endswith("UTC"):
1799 1799 return 0, s[:-3].rstrip()
1800 1800
1801 1801 # Unix-style timezones [+-]hhmm
1802 1802 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1803 1803 sign = (s[-5] == "+") and 1 or -1
1804 1804 hours = int(s[-4:-2])
1805 1805 minutes = int(s[-2:])
1806 1806 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1807 1807
1808 1808 # ISO8601 trailing Z
1809 1809 if s.endswith("Z") and s[-2:-1].isdigit():
1810 1810 return 0, s[:-1]
1811 1811
1812 1812 # ISO8601-style [+-]hh:mm
1813 1813 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1814 1814 s[-5:-3].isdigit() and s[-2:].isdigit()):
1815 1815 sign = (s[-6] == "+") and 1 or -1
1816 1816 hours = int(s[-5:-3])
1817 1817 minutes = int(s[-2:])
1818 1818 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1819 1819
1820 1820 return None, s
1821 1821
1822 1822 def strdate(string, format, defaults=[]):
1823 1823 """parse a localized time string and return a (unixtime, offset) tuple.
1824 1824 if the string cannot be parsed, ValueError is raised."""
1825 1825 # NOTE: unixtime = localunixtime + offset
1826 1826 offset, date = parsetimezone(string)
1827 1827
1828 1828 # add missing elements from defaults
1829 1829 usenow = False # default to using biased defaults
1830 1830 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1831 1831 found = [True for p in part if ("%"+p) in format]
1832 1832 if not found:
1833 1833 date += "@" + defaults[part][usenow]
1834 1834 format += "@%" + part[0]
1835 1835 else:
1836 1836 # We've found a specific time element, less specific time
1837 1837 # elements are relative to today
1838 1838 usenow = True
1839 1839
1840 1840 timetuple = time.strptime(date, format)
1841 1841 localunixtime = int(calendar.timegm(timetuple))
1842 1842 if offset is None:
1843 1843 # local timezone
1844 1844 unixtime = int(time.mktime(timetuple))
1845 1845 offset = unixtime - localunixtime
1846 1846 else:
1847 1847 unixtime = localunixtime + offset
1848 1848 return unixtime, offset
1849 1849
1850 1850 def parsedate(date, formats=None, bias=None):
1851 1851 """parse a localized date/time and return a (unixtime, offset) tuple.
1852 1852
1853 1853 The date may be a "unixtime offset" string or in one of the specified
1854 1854 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1855 1855
1856 1856 >>> parsedate(' today ') == parsedate(\
1857 1857 datetime.date.today().strftime('%b %d'))
1858 1858 True
1859 1859 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1860 1860 datetime.timedelta(days=1)\
1861 1861 ).strftime('%b %d'))
1862 1862 True
1863 1863 >>> now, tz = makedate()
1864 1864 >>> strnow, strtz = parsedate('now')
1865 1865 >>> (strnow - now) < 1
1866 1866 True
1867 1867 >>> tz == strtz
1868 1868 True
1869 1869 """
1870 1870 if bias is None:
1871 1871 bias = {}
1872 1872 if not date:
1873 1873 return 0, 0
1874 1874 if isinstance(date, tuple) and len(date) == 2:
1875 1875 return date
1876 1876 if not formats:
1877 1877 formats = defaultdateformats
1878 1878 date = date.strip()
1879 1879
1880 1880 if date == 'now' or date == _('now'):
1881 1881 return makedate()
1882 1882 if date == 'today' or date == _('today'):
1883 1883 date = datetime.date.today().strftime('%b %d')
1884 1884 elif date == 'yesterday' or date == _('yesterday'):
1885 1885 date = (datetime.date.today() -
1886 1886 datetime.timedelta(days=1)).strftime('%b %d')
1887 1887
1888 1888 try:
1889 1889 when, offset = map(int, date.split(' '))
1890 1890 except ValueError:
1891 1891 # fill out defaults
1892 1892 now = makedate()
1893 1893 defaults = {}
1894 1894 for part in ("d", "mb", "yY", "HI", "M", "S"):
1895 1895 # this piece is for rounding the specific end of unknowns
1896 1896 b = bias.get(part)
1897 1897 if b is None:
1898 1898 if part[0] in "HMS":
1899 1899 b = "00"
1900 1900 else:
1901 1901 b = "0"
1902 1902
1903 1903 # this piece is for matching the generic end to today's date
1904 1904 n = datestr(now, "%" + part[0])
1905 1905
1906 1906 defaults[part] = (b, n)
1907 1907
1908 1908 for format in formats:
1909 1909 try:
1910 1910 when, offset = strdate(date, format, defaults)
1911 1911 except (ValueError, OverflowError):
1912 1912 pass
1913 1913 else:
1914 1914 break
1915 1915 else:
1916 1916 raise Abort(_('invalid date: %r') % date)
1917 1917 # validate explicit (probably user-specified) date and
1918 1918 # time zone offset. values must fit in signed 32 bits for
1919 1919 # current 32-bit linux runtimes. timezones go from UTC-12
1920 1920 # to UTC+14
1921 1921 if when < -0x80000000 or when > 0x7fffffff:
1922 1922 raise Abort(_('date exceeds 32 bits: %d') % when)
1923 1923 if offset < -50400 or offset > 43200:
1924 1924 raise Abort(_('impossible time zone offset: %d') % offset)
1925 1925 return when, offset
1926 1926
1927 1927 def matchdate(date):
1928 1928 """Return a function that matches a given date match specifier
1929 1929
1930 1930 Formats include:
1931 1931
1932 1932 '{date}' match a given date to the accuracy provided
1933 1933
1934 1934 '<{date}' on or before a given date
1935 1935
1936 1936 '>{date}' on or after a given date
1937 1937
1938 1938 >>> p1 = parsedate("10:29:59")
1939 1939 >>> p2 = parsedate("10:30:00")
1940 1940 >>> p3 = parsedate("10:30:59")
1941 1941 >>> p4 = parsedate("10:31:00")
1942 1942 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1943 1943 >>> f = matchdate("10:30")
1944 1944 >>> f(p1[0])
1945 1945 False
1946 1946 >>> f(p2[0])
1947 1947 True
1948 1948 >>> f(p3[0])
1949 1949 True
1950 1950 >>> f(p4[0])
1951 1951 False
1952 1952 >>> f(p5[0])
1953 1953 False
1954 1954 """
1955 1955
1956 1956 def lower(date):
1957 1957 d = {'mb': "1", 'd': "1"}
1958 1958 return parsedate(date, extendeddateformats, d)[0]
1959 1959
1960 1960 def upper(date):
1961 1961 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1962 1962 for days in ("31", "30", "29"):
1963 1963 try:
1964 1964 d["d"] = days
1965 1965 return parsedate(date, extendeddateformats, d)[0]
1966 1966 except Abort:
1967 1967 pass
1968 1968 d["d"] = "28"
1969 1969 return parsedate(date, extendeddateformats, d)[0]
1970 1970
1971 1971 date = date.strip()
1972 1972
1973 1973 if not date:
1974 1974 raise Abort(_("dates cannot consist entirely of whitespace"))
1975 1975 elif date[0] == "<":
1976 1976 if not date[1:]:
1977 1977 raise Abort(_("invalid day spec, use '<DATE'"))
1978 1978 when = upper(date[1:])
1979 1979 return lambda x: x <= when
1980 1980 elif date[0] == ">":
1981 1981 if not date[1:]:
1982 1982 raise Abort(_("invalid day spec, use '>DATE'"))
1983 1983 when = lower(date[1:])
1984 1984 return lambda x: x >= when
1985 1985 elif date[0] == "-":
1986 1986 try:
1987 1987 days = int(date[1:])
1988 1988 except ValueError:
1989 1989 raise Abort(_("invalid day spec: %s") % date[1:])
1990 1990 if days < 0:
1991 1991 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1992 1992 % date[1:])
1993 1993 when = makedate()[0] - days * 3600 * 24
1994 1994 return lambda x: x >= when
1995 1995 elif " to " in date:
1996 1996 a, b = date.split(" to ")
1997 1997 start, stop = lower(a), upper(b)
1998 1998 return lambda x: x >= start and x <= stop
1999 1999 else:
2000 2000 start, stop = lower(date), upper(date)
2001 2001 return lambda x: x >= start and x <= stop
2002 2002
2003 2003 def stringmatcher(pattern, casesensitive=True):
2004 2004 """
2005 2005 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2006 2006 returns the matcher name, pattern, and matcher function.
2007 2007 missing or unknown prefixes are treated as literal matches.
2008 2008
2009 2009 helper for tests:
2010 2010 >>> def test(pattern, *tests):
2011 2011 ... kind, pattern, matcher = stringmatcher(pattern)
2012 2012 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2013 2013 >>> def itest(pattern, *tests):
2014 2014 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2015 2015 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2016 2016
2017 2017 exact matching (no prefix):
2018 2018 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2019 2019 ('literal', 'abcdefg', [False, False, True])
2020 2020
2021 2021 regex matching ('re:' prefix)
2022 2022 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2023 2023 ('re', 'a.+b', [False, False, True])
2024 2024
2025 2025 force exact matches ('literal:' prefix)
2026 2026 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2027 2027 ('literal', 're:foobar', [False, True])
2028 2028
2029 2029 unknown prefixes are ignored and treated as literals
2030 2030 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2031 2031 ('literal', 'foo:bar', [False, False, True])
2032 2032
2033 2033 case insensitive regex matches
2034 2034 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2035 2035 ('re', 'A.+b', [False, False, True])
2036 2036
2037 2037 case insensitive literal matches
2038 2038 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2039 2039 ('literal', 'ABCDEFG', [False, False, True])
2040 2040 """
2041 2041 if pattern.startswith('re:'):
2042 2042 pattern = pattern[3:]
2043 2043 try:
2044 2044 flags = 0
2045 2045 if not casesensitive:
2046 2046 flags = remod.I
2047 2047 regex = remod.compile(pattern, flags)
2048 2048 except remod.error as e:
2049 2049 raise error.ParseError(_('invalid regular expression: %s')
2050 2050 % e)
2051 2051 return 're', pattern, regex.search
2052 2052 elif pattern.startswith('literal:'):
2053 2053 pattern = pattern[8:]
2054 2054
2055 2055 match = pattern.__eq__
2056 2056
2057 2057 if not casesensitive:
2058 2058 ipat = encoding.lower(pattern)
2059 2059 match = lambda s: ipat == encoding.lower(s)
2060 2060 return 'literal', pattern, match
2061 2061
2062 2062 def shortuser(user):
2063 2063 """Return a short representation of a user name or email address."""
2064 2064 f = user.find('@')
2065 2065 if f >= 0:
2066 2066 user = user[:f]
2067 2067 f = user.find('<')
2068 2068 if f >= 0:
2069 2069 user = user[f + 1:]
2070 2070 f = user.find(' ')
2071 2071 if f >= 0:
2072 2072 user = user[:f]
2073 2073 f = user.find('.')
2074 2074 if f >= 0:
2075 2075 user = user[:f]
2076 2076 return user
2077 2077
2078 2078 def emailuser(user):
2079 2079 """Return the user portion of an email address."""
2080 2080 f = user.find('@')
2081 2081 if f >= 0:
2082 2082 user = user[:f]
2083 2083 f = user.find('<')
2084 2084 if f >= 0:
2085 2085 user = user[f + 1:]
2086 2086 return user
2087 2087
2088 2088 def email(author):
2089 2089 '''get email of author.'''
2090 2090 r = author.find('>')
2091 2091 if r == -1:
2092 2092 r = None
2093 2093 return author[author.find('<') + 1:r]
2094 2094
2095 2095 def ellipsis(text, maxlength=400):
2096 2096 """Trim string to at most maxlength (default: 400) columns in display."""
2097 2097 return encoding.trim(text, maxlength, ellipsis='...')
2098 2098
2099 2099 def unitcountfn(*unittable):
2100 2100 '''return a function that renders a readable count of some quantity'''
2101 2101
2102 2102 def go(count):
2103 2103 for multiplier, divisor, format in unittable:
2104 2104 if count >= divisor * multiplier:
2105 2105 return format % (count / float(divisor))
2106 2106 return unittable[-1][2] % count
2107 2107
2108 2108 return go
2109 2109
2110 2110 bytecount = unitcountfn(
2111 2111 (100, 1 << 30, _('%.0f GB')),
2112 2112 (10, 1 << 30, _('%.1f GB')),
2113 2113 (1, 1 << 30, _('%.2f GB')),
2114 2114 (100, 1 << 20, _('%.0f MB')),
2115 2115 (10, 1 << 20, _('%.1f MB')),
2116 2116 (1, 1 << 20, _('%.2f MB')),
2117 2117 (100, 1 << 10, _('%.0f KB')),
2118 2118 (10, 1 << 10, _('%.1f KB')),
2119 2119 (1, 1 << 10, _('%.2f KB')),
2120 2120 (1, 1, _('%.0f bytes')),
2121 2121 )
2122 2122
2123 2123 def uirepr(s):
2124 2124 # Avoid double backslash in Windows path repr()
2125 2125 return repr(s).replace('\\\\', '\\')
2126 2126
2127 2127 # delay import of textwrap
2128 2128 def MBTextWrapper(**kwargs):
2129 2129 class tw(textwrap.TextWrapper):
2130 2130 """
2131 2131 Extend TextWrapper for width-awareness.
2132 2132
2133 2133 Neither number of 'bytes' in any encoding nor 'characters' is
2134 2134 appropriate to calculate terminal columns for specified string.
2135 2135
2136 2136 Original TextWrapper implementation uses built-in 'len()' directly,
2137 2137 so overriding is needed to use width information of each characters.
2138 2138
2139 2139 In addition, characters classified into 'ambiguous' width are
2140 2140 treated as wide in East Asian area, but as narrow in other.
2141 2141
2142 2142 This requires use decision to determine width of such characters.
2143 2143 """
2144 2144 def _cutdown(self, ucstr, space_left):
2145 2145 l = 0
2146 2146 colwidth = encoding.ucolwidth
2147 2147 for i in xrange(len(ucstr)):
2148 2148 l += colwidth(ucstr[i])
2149 2149 if space_left < l:
2150 2150 return (ucstr[:i], ucstr[i:])
2151 2151 return ucstr, ''
2152 2152
2153 2153 # overriding of base class
2154 2154 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2155 2155 space_left = max(width - cur_len, 1)
2156 2156
2157 2157 if self.break_long_words:
2158 2158 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2159 2159 cur_line.append(cut)
2160 2160 reversed_chunks[-1] = res
2161 2161 elif not cur_line:
2162 2162 cur_line.append(reversed_chunks.pop())
2163 2163
2164 2164 # this overriding code is imported from TextWrapper of Python 2.6
2165 2165 # to calculate columns of string by 'encoding.ucolwidth()'
2166 2166 def _wrap_chunks(self, chunks):
2167 2167 colwidth = encoding.ucolwidth
2168 2168
2169 2169 lines = []
2170 2170 if self.width <= 0:
2171 2171 raise ValueError("invalid width %r (must be > 0)" % self.width)
2172 2172
2173 2173 # Arrange in reverse order so items can be efficiently popped
2174 2174 # from a stack of chucks.
2175 2175 chunks.reverse()
2176 2176
2177 2177 while chunks:
2178 2178
2179 2179 # Start the list of chunks that will make up the current line.
2180 2180 # cur_len is just the length of all the chunks in cur_line.
2181 2181 cur_line = []
2182 2182 cur_len = 0
2183 2183
2184 2184 # Figure out which static string will prefix this line.
2185 2185 if lines:
2186 2186 indent = self.subsequent_indent
2187 2187 else:
2188 2188 indent = self.initial_indent
2189 2189
2190 2190 # Maximum width for this line.
2191 2191 width = self.width - len(indent)
2192 2192
2193 2193 # First chunk on line is whitespace -- drop it, unless this
2194 2194 # is the very beginning of the text (i.e. no lines started yet).
2195 2195 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2196 2196 del chunks[-1]
2197 2197
2198 2198 while chunks:
2199 2199 l = colwidth(chunks[-1])
2200 2200
2201 2201 # Can at least squeeze this chunk onto the current line.
2202 2202 if cur_len + l <= width:
2203 2203 cur_line.append(chunks.pop())
2204 2204 cur_len += l
2205 2205
2206 2206 # Nope, this line is full.
2207 2207 else:
2208 2208 break
2209 2209
2210 2210 # The current line is full, and the next chunk is too big to
2211 2211 # fit on *any* line (not just this one).
2212 2212 if chunks and colwidth(chunks[-1]) > width:
2213 2213 self._handle_long_word(chunks, cur_line, cur_len, width)
2214 2214
2215 2215 # If the last chunk on this line is all whitespace, drop it.
2216 2216 if (self.drop_whitespace and
2217 2217 cur_line and cur_line[-1].strip() == ''):
2218 2218 del cur_line[-1]
2219 2219
2220 2220 # Convert current line back to a string and store it in list
2221 2221 # of all lines (return value).
2222 2222 if cur_line:
2223 2223 lines.append(indent + ''.join(cur_line))
2224 2224
2225 2225 return lines
2226 2226
2227 2227 global MBTextWrapper
2228 2228 MBTextWrapper = tw
2229 2229 return tw(**kwargs)
2230 2230
2231 2231 def wrap(line, width, initindent='', hangindent=''):
2232 2232 maxindent = max(len(hangindent), len(initindent))
2233 2233 if width <= maxindent:
2234 2234 # adjust for weird terminal size
2235 2235 width = max(78, maxindent + 1)
2236 2236 line = line.decode(encoding.encoding, encoding.encodingmode)
2237 2237 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2238 2238 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2239 2239 wrapper = MBTextWrapper(width=width,
2240 2240 initial_indent=initindent,
2241 2241 subsequent_indent=hangindent)
2242 2242 return wrapper.fill(line).encode(encoding.encoding)
2243 2243
2244 2244 if (pyplatform.python_implementation() == 'CPython' and
2245 2245 sys.version_info < (3, 0)):
2246 2246 # There is an issue in CPython that some IO methods do not handle EINTR
2247 2247 # correctly. The following table shows what CPython version (and functions)
2248 2248 # are affected (buggy: has the EINTR bug, okay: otherwise):
2249 2249 #
2250 2250 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2251 2251 # --------------------------------------------------
2252 2252 # fp.__iter__ | buggy | buggy | okay
2253 2253 # fp.read* | buggy | okay [1] | okay
2254 2254 #
2255 2255 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2256 2256 #
2257 2257 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2258 2258 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2259 2259 #
2260 2260 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2261 2261 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2262 2262 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2263 2263 # fp.__iter__ but not other fp.read* methods.
2264 2264 #
2265 2265 # On modern systems like Linux, the "read" syscall cannot be interrupted
2266 2266 # when reading "fast" files like on-disk files. So the EINTR issue only
2267 2267 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2268 2268 # files approximately as "fast" files and use the fast (unsafe) code path,
2269 2269 # to minimize the performance impact.
2270 2270 if sys.version_info >= (2, 7, 4):
2271 2271 # fp.readline deals with EINTR correctly, use it as a workaround.
2272 2272 def _safeiterfile(fp):
2273 2273 return iter(fp.readline, '')
2274 2274 else:
2275 2275 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2276 2276 # note: this may block longer than necessary because of bufsize.
2277 2277 def _safeiterfile(fp, bufsize=4096):
2278 2278 fd = fp.fileno()
2279 2279 line = ''
2280 2280 while True:
2281 2281 try:
2282 2282 buf = os.read(fd, bufsize)
2283 2283 except OSError as ex:
2284 2284 # os.read only raises EINTR before any data is read
2285 2285 if ex.errno == errno.EINTR:
2286 2286 continue
2287 2287 else:
2288 2288 raise
2289 2289 line += buf
2290 2290 if '\n' in buf:
2291 2291 splitted = line.splitlines(True)
2292 2292 line = ''
2293 2293 for l in splitted:
2294 2294 if l[-1] == '\n':
2295 2295 yield l
2296 2296 else:
2297 2297 line = l
2298 2298 if not buf:
2299 2299 break
2300 2300 if line:
2301 2301 yield line
2302 2302
2303 2303 def iterfile(fp):
2304 2304 fastpath = True
2305 2305 if type(fp) is file:
2306 2306 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2307 2307 if fastpath:
2308 2308 return fp
2309 2309 else:
2310 2310 return _safeiterfile(fp)
2311 2311 else:
2312 2312 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2313 2313 def iterfile(fp):
2314 2314 return fp
2315 2315
2316 2316 def iterlines(iterator):
2317 2317 for chunk in iterator:
2318 2318 for line in chunk.splitlines():
2319 2319 yield line
2320 2320
2321 2321 def expandpath(path):
2322 2322 return os.path.expanduser(os.path.expandvars(path))
2323 2323
2324 2324 def hgcmd():
2325 2325 """Return the command used to execute current hg
2326 2326
2327 2327 This is different from hgexecutable() because on Windows we want
2328 2328 to avoid things opening new shell windows like batch files, so we
2329 2329 get either the python call or current executable.
2330 2330 """
2331 2331 if mainfrozen():
2332 2332 if getattr(sys, 'frozen', None) == 'macosx_app':
2333 2333 # Env variable set by py2app
2334 2334 return [encoding.environ['EXECUTABLEPATH']]
2335 2335 else:
2336 2336 return [pycompat.sysexecutable]
2337 2337 return gethgcmd()
2338 2338
2339 2339 def rundetached(args, condfn):
2340 2340 """Execute the argument list in a detached process.
2341 2341
2342 2342 condfn is a callable which is called repeatedly and should return
2343 2343 True once the child process is known to have started successfully.
2344 2344 At this point, the child process PID is returned. If the child
2345 2345 process fails to start or finishes before condfn() evaluates to
2346 2346 True, return -1.
2347 2347 """
2348 2348 # Windows case is easier because the child process is either
2349 2349 # successfully starting and validating the condition or exiting
2350 2350 # on failure. We just poll on its PID. On Unix, if the child
2351 2351 # process fails to start, it will be left in a zombie state until
2352 2352 # the parent wait on it, which we cannot do since we expect a long
2353 2353 # running process on success. Instead we listen for SIGCHLD telling
2354 2354 # us our child process terminated.
2355 2355 terminated = set()
2356 2356 def handler(signum, frame):
2357 2357 terminated.add(os.wait())
2358 2358 prevhandler = None
2359 2359 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2360 2360 if SIGCHLD is not None:
2361 2361 prevhandler = signal.signal(SIGCHLD, handler)
2362 2362 try:
2363 2363 pid = spawndetached(args)
2364 2364 while not condfn():
2365 2365 if ((pid in terminated or not testpid(pid))
2366 2366 and not condfn()):
2367 2367 return -1
2368 2368 time.sleep(0.1)
2369 2369 return pid
2370 2370 finally:
2371 2371 if prevhandler is not None:
2372 2372 signal.signal(signal.SIGCHLD, prevhandler)
2373 2373
2374 2374 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2375 2375 """Return the result of interpolating items in the mapping into string s.
2376 2376
2377 2377 prefix is a single character string, or a two character string with
2378 2378 a backslash as the first character if the prefix needs to be escaped in
2379 2379 a regular expression.
2380 2380
2381 2381 fn is an optional function that will be applied to the replacement text
2382 2382 just before replacement.
2383 2383
2384 2384 escape_prefix is an optional flag that allows using doubled prefix for
2385 2385 its escaping.
2386 2386 """
2387 2387 fn = fn or (lambda s: s)
2388 2388 patterns = '|'.join(mapping.keys())
2389 2389 if escape_prefix:
2390 2390 patterns += '|' + prefix
2391 2391 if len(prefix) > 1:
2392 2392 prefix_char = prefix[1:]
2393 2393 else:
2394 2394 prefix_char = prefix
2395 2395 mapping[prefix_char] = prefix_char
2396 2396 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2397 2397 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2398 2398
2399 2399 def getport(port):
2400 2400 """Return the port for a given network service.
2401 2401
2402 2402 If port is an integer, it's returned as is. If it's a string, it's
2403 2403 looked up using socket.getservbyname(). If there's no matching
2404 2404 service, error.Abort is raised.
2405 2405 """
2406 2406 try:
2407 2407 return int(port)
2408 2408 except ValueError:
2409 2409 pass
2410 2410
2411 2411 try:
2412 2412 return socket.getservbyname(port)
2413 2413 except socket.error:
2414 2414 raise Abort(_("no port number associated with service '%s'") % port)
2415 2415
2416 2416 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2417 2417 '0': False, 'no': False, 'false': False, 'off': False,
2418 2418 'never': False}
2419 2419
2420 2420 def parsebool(s):
2421 2421 """Parse s into a boolean.
2422 2422
2423 2423 If s is not a valid boolean, returns None.
2424 2424 """
2425 2425 return _booleans.get(s.lower(), None)
2426 2426
2427 2427 _hextochr = dict((a + b, chr(int(a + b, 16)))
2428 2428 for a in string.hexdigits for b in string.hexdigits)
2429 2429
2430 2430 class url(object):
2431 2431 r"""Reliable URL parser.
2432 2432
2433 2433 This parses URLs and provides attributes for the following
2434 2434 components:
2435 2435
2436 2436 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2437 2437
2438 2438 Missing components are set to None. The only exception is
2439 2439 fragment, which is set to '' if present but empty.
2440 2440
2441 2441 If parsefragment is False, fragment is included in query. If
2442 2442 parsequery is False, query is included in path. If both are
2443 2443 False, both fragment and query are included in path.
2444 2444
2445 2445 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2446 2446
2447 2447 Note that for backward compatibility reasons, bundle URLs do not
2448 2448 take host names. That means 'bundle://../' has a path of '../'.
2449 2449
2450 2450 Examples:
2451 2451
2452 2452 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2453 2453 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2454 2454 >>> url('ssh://[::1]:2200//home/joe/repo')
2455 2455 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2456 2456 >>> url('file:///home/joe/repo')
2457 2457 <url scheme: 'file', path: '/home/joe/repo'>
2458 2458 >>> url('file:///c:/temp/foo/')
2459 2459 <url scheme: 'file', path: 'c:/temp/foo/'>
2460 2460 >>> url('bundle:foo')
2461 2461 <url scheme: 'bundle', path: 'foo'>
2462 2462 >>> url('bundle://../foo')
2463 2463 <url scheme: 'bundle', path: '../foo'>
2464 2464 >>> url(r'c:\foo\bar')
2465 2465 <url path: 'c:\\foo\\bar'>
2466 2466 >>> url(r'\\blah\blah\blah')
2467 2467 <url path: '\\\\blah\\blah\\blah'>
2468 2468 >>> url(r'\\blah\blah\blah#baz')
2469 2469 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2470 2470 >>> url(r'file:///C:\users\me')
2471 2471 <url scheme: 'file', path: 'C:\\users\\me'>
2472 2472
2473 2473 Authentication credentials:
2474 2474
2475 2475 >>> url('ssh://joe:xyz@x/repo')
2476 2476 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2477 2477 >>> url('ssh://joe@x/repo')
2478 2478 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2479 2479
2480 2480 Query strings and fragments:
2481 2481
2482 2482 >>> url('http://host/a?b#c')
2483 2483 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2484 2484 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2485 2485 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2486 2486
2487 2487 Empty path:
2488 2488
2489 2489 >>> url('')
2490 2490 <url path: ''>
2491 2491 >>> url('#a')
2492 2492 <url path: '', fragment: 'a'>
2493 2493 >>> url('http://host/')
2494 2494 <url scheme: 'http', host: 'host', path: ''>
2495 2495 >>> url('http://host/#a')
2496 2496 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2497 2497
2498 2498 Only scheme:
2499 2499
2500 2500 >>> url('http:')
2501 2501 <url scheme: 'http'>
2502 2502 """
2503 2503
2504 2504 _safechars = "!~*'()+"
2505 2505 _safepchars = "/!~*'()+:\\"
2506 2506 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2507 2507
2508 2508 def __init__(self, path, parsequery=True, parsefragment=True):
2509 2509 # We slowly chomp away at path until we have only the path left
2510 2510 self.scheme = self.user = self.passwd = self.host = None
2511 2511 self.port = self.path = self.query = self.fragment = None
2512 2512 self._localpath = True
2513 2513 self._hostport = ''
2514 2514 self._origpath = path
2515 2515
2516 2516 if parsefragment and '#' in path:
2517 2517 path, self.fragment = path.split('#', 1)
2518 2518
2519 2519 # special case for Windows drive letters and UNC paths
2520 2520 if hasdriveletter(path) or path.startswith('\\\\'):
2521 2521 self.path = path
2522 2522 return
2523 2523
2524 2524 # For compatibility reasons, we can't handle bundle paths as
2525 2525 # normal URLS
2526 2526 if path.startswith('bundle:'):
2527 2527 self.scheme = 'bundle'
2528 2528 path = path[7:]
2529 2529 if path.startswith('//'):
2530 2530 path = path[2:]
2531 2531 self.path = path
2532 2532 return
2533 2533
2534 2534 if self._matchscheme(path):
2535 2535 parts = path.split(':', 1)
2536 2536 if parts[0]:
2537 2537 self.scheme, path = parts
2538 2538 self._localpath = False
2539 2539
2540 2540 if not path:
2541 2541 path = None
2542 2542 if self._localpath:
2543 2543 self.path = ''
2544 2544 return
2545 2545 else:
2546 2546 if self._localpath:
2547 2547 self.path = path
2548 2548 return
2549 2549
2550 2550 if parsequery and '?' in path:
2551 2551 path, self.query = path.split('?', 1)
2552 2552 if not path:
2553 2553 path = None
2554 2554 if not self.query:
2555 2555 self.query = None
2556 2556
2557 2557 # // is required to specify a host/authority
2558 2558 if path and path.startswith('//'):
2559 2559 parts = path[2:].split('/', 1)
2560 2560 if len(parts) > 1:
2561 2561 self.host, path = parts
2562 2562 else:
2563 2563 self.host = parts[0]
2564 2564 path = None
2565 2565 if not self.host:
2566 2566 self.host = None
2567 2567 # path of file:///d is /d
2568 2568 # path of file:///d:/ is d:/, not /d:/
2569 2569 if path and not hasdriveletter(path):
2570 2570 path = '/' + path
2571 2571
2572 2572 if self.host and '@' in self.host:
2573 2573 self.user, self.host = self.host.rsplit('@', 1)
2574 2574 if ':' in self.user:
2575 2575 self.user, self.passwd = self.user.split(':', 1)
2576 2576 if not self.host:
2577 2577 self.host = None
2578 2578
2579 2579 # Don't split on colons in IPv6 addresses without ports
2580 2580 if (self.host and ':' in self.host and
2581 2581 not (self.host.startswith('[') and self.host.endswith(']'))):
2582 2582 self._hostport = self.host
2583 2583 self.host, self.port = self.host.rsplit(':', 1)
2584 2584 if not self.host:
2585 2585 self.host = None
2586 2586
2587 2587 if (self.host and self.scheme == 'file' and
2588 2588 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2589 2589 raise Abort(_('file:// URLs can only refer to localhost'))
2590 2590
2591 2591 self.path = path
2592 2592
2593 2593 # leave the query string escaped
2594 2594 for a in ('user', 'passwd', 'host', 'port',
2595 2595 'path', 'fragment'):
2596 2596 v = getattr(self, a)
2597 2597 if v is not None:
2598 2598 setattr(self, a, pycompat.urlunquote(v))
2599 2599
2600 2600 def __repr__(self):
2601 2601 attrs = []
2602 2602 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2603 2603 'query', 'fragment'):
2604 2604 v = getattr(self, a)
2605 2605 if v is not None:
2606 2606 attrs.append('%s: %r' % (a, v))
2607 2607 return '<url %s>' % ', '.join(attrs)
2608 2608
2609 2609 def __str__(self):
2610 2610 r"""Join the URL's components back into a URL string.
2611 2611
2612 2612 Examples:
2613 2613
2614 2614 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2615 2615 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2616 2616 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2617 2617 'http://user:pw@host:80/?foo=bar&baz=42'
2618 2618 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2619 2619 'http://user:pw@host:80/?foo=bar%3dbaz'
2620 2620 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2621 2621 'ssh://user:pw@[::1]:2200//home/joe#'
2622 2622 >>> str(url('http://localhost:80//'))
2623 2623 'http://localhost:80//'
2624 2624 >>> str(url('http://localhost:80/'))
2625 2625 'http://localhost:80/'
2626 2626 >>> str(url('http://localhost:80'))
2627 2627 'http://localhost:80/'
2628 2628 >>> str(url('bundle:foo'))
2629 2629 'bundle:foo'
2630 2630 >>> str(url('bundle://../foo'))
2631 2631 'bundle:../foo'
2632 2632 >>> str(url('path'))
2633 2633 'path'
2634 2634 >>> str(url('file:///tmp/foo/bar'))
2635 2635 'file:///tmp/foo/bar'
2636 2636 >>> str(url('file:///c:/tmp/foo/bar'))
2637 2637 'file:///c:/tmp/foo/bar'
2638 2638 >>> print url(r'bundle:foo\bar')
2639 2639 bundle:foo\bar
2640 2640 >>> print url(r'file:///D:\data\hg')
2641 2641 file:///D:\data\hg
2642 2642 """
2643 2643 if self._localpath:
2644 2644 s = self.path
2645 2645 if self.scheme == 'bundle':
2646 2646 s = 'bundle:' + s
2647 2647 if self.fragment:
2648 2648 s += '#' + self.fragment
2649 2649 return s
2650 2650
2651 2651 s = self.scheme + ':'
2652 2652 if self.user or self.passwd or self.host:
2653 2653 s += '//'
2654 2654 elif self.scheme and (not self.path or self.path.startswith('/')
2655 2655 or hasdriveletter(self.path)):
2656 2656 s += '//'
2657 2657 if hasdriveletter(self.path):
2658 2658 s += '/'
2659 2659 if self.user:
2660 2660 s += urlreq.quote(self.user, safe=self._safechars)
2661 2661 if self.passwd:
2662 2662 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2663 2663 if self.user or self.passwd:
2664 2664 s += '@'
2665 2665 if self.host:
2666 2666 if not (self.host.startswith('[') and self.host.endswith(']')):
2667 2667 s += urlreq.quote(self.host)
2668 2668 else:
2669 2669 s += self.host
2670 2670 if self.port:
2671 2671 s += ':' + urlreq.quote(self.port)
2672 2672 if self.host:
2673 2673 s += '/'
2674 2674 if self.path:
2675 2675 # TODO: similar to the query string, we should not unescape the
2676 2676 # path when we store it, the path might contain '%2f' = '/',
2677 2677 # which we should *not* escape.
2678 2678 s += urlreq.quote(self.path, safe=self._safepchars)
2679 2679 if self.query:
2680 2680 # we store the query in escaped form.
2681 2681 s += '?' + self.query
2682 2682 if self.fragment is not None:
2683 2683 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2684 2684 return s
2685 2685
2686 2686 def authinfo(self):
2687 2687 user, passwd = self.user, self.passwd
2688 2688 try:
2689 2689 self.user, self.passwd = None, None
2690 2690 s = str(self)
2691 2691 finally:
2692 2692 self.user, self.passwd = user, passwd
2693 2693 if not self.user:
2694 2694 return (s, None)
2695 2695 # authinfo[1] is passed to urllib2 password manager, and its
2696 2696 # URIs must not contain credentials. The host is passed in the
2697 2697 # URIs list because Python < 2.4.3 uses only that to search for
2698 2698 # a password.
2699 2699 return (s, (None, (s, self.host),
2700 2700 self.user, self.passwd or ''))
2701 2701
2702 2702 def isabs(self):
2703 2703 if self.scheme and self.scheme != 'file':
2704 2704 return True # remote URL
2705 2705 if hasdriveletter(self.path):
2706 2706 return True # absolute for our purposes - can't be joined()
2707 2707 if self.path.startswith(r'\\'):
2708 2708 return True # Windows UNC path
2709 2709 if self.path.startswith('/'):
2710 2710 return True # POSIX-style
2711 2711 return False
2712 2712
2713 2713 def localpath(self):
2714 2714 if self.scheme == 'file' or self.scheme == 'bundle':
2715 2715 path = self.path or '/'
2716 2716 # For Windows, we need to promote hosts containing drive
2717 2717 # letters to paths with drive letters.
2718 2718 if hasdriveletter(self._hostport):
2719 2719 path = self._hostport + '/' + self.path
2720 2720 elif (self.host is not None and self.path
2721 2721 and not hasdriveletter(path)):
2722 2722 path = '/' + path
2723 2723 return path
2724 2724 return self._origpath
2725 2725
2726 2726 def islocal(self):
2727 2727 '''whether localpath will return something that posixfile can open'''
2728 2728 return (not self.scheme or self.scheme == 'file'
2729 2729 or self.scheme == 'bundle')
2730 2730
2731 2731 def hasscheme(path):
2732 2732 return bool(url(path).scheme)
2733 2733
2734 2734 def hasdriveletter(path):
2735 2735 return path and path[1:2] == ':' and path[0:1].isalpha()
2736 2736
2737 2737 def urllocalpath(path):
2738 2738 return url(path, parsequery=False, parsefragment=False).localpath()
2739 2739
2740 2740 def hidepassword(u):
2741 2741 '''hide user credential in a url string'''
2742 2742 u = url(u)
2743 2743 if u.passwd:
2744 2744 u.passwd = '***'
2745 2745 return str(u)
2746 2746
2747 2747 def removeauth(u):
2748 2748 '''remove all authentication information from a url string'''
2749 2749 u = url(u)
2750 2750 u.user = u.passwd = None
2751 2751 return str(u)
2752 2752
2753 2753 def isatty(fp):
2754 2754 try:
2755 2755 return fp.isatty()
2756 2756 except AttributeError:
2757 2757 return False
2758 2758
2759 2759 timecount = unitcountfn(
2760 2760 (1, 1e3, _('%.0f s')),
2761 2761 (100, 1, _('%.1f s')),
2762 2762 (10, 1, _('%.2f s')),
2763 2763 (1, 1, _('%.3f s')),
2764 2764 (100, 0.001, _('%.1f ms')),
2765 2765 (10, 0.001, _('%.2f ms')),
2766 2766 (1, 0.001, _('%.3f ms')),
2767 2767 (100, 0.000001, _('%.1f us')),
2768 2768 (10, 0.000001, _('%.2f us')),
2769 2769 (1, 0.000001, _('%.3f us')),
2770 2770 (100, 0.000000001, _('%.1f ns')),
2771 2771 (10, 0.000000001, _('%.2f ns')),
2772 2772 (1, 0.000000001, _('%.3f ns')),
2773 2773 )
2774 2774
2775 2775 _timenesting = [0]
2776 2776
2777 2777 def timed(func):
2778 2778 '''Report the execution time of a function call to stderr.
2779 2779
2780 2780 During development, use as a decorator when you need to measure
2781 2781 the cost of a function, e.g. as follows:
2782 2782
2783 2783 @util.timed
2784 2784 def foo(a, b, c):
2785 2785 pass
2786 2786 '''
2787 2787
2788 2788 def wrapper(*args, **kwargs):
2789 2789 start = time.time()
2790 2790 indent = 2
2791 2791 _timenesting[0] += indent
2792 2792 try:
2793 2793 return func(*args, **kwargs)
2794 2794 finally:
2795 2795 elapsed = time.time() - start
2796 2796 _timenesting[0] -= indent
2797 2797 stderr.write('%s%s: %s\n' %
2798 2798 (' ' * _timenesting[0], func.__name__,
2799 2799 timecount(elapsed)))
2800 2800 return wrapper
2801 2801
2802 2802 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2803 2803 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2804 2804
2805 2805 def sizetoint(s):
2806 2806 '''Convert a space specifier to a byte count.
2807 2807
2808 2808 >>> sizetoint('30')
2809 2809 30
2810 2810 >>> sizetoint('2.2kb')
2811 2811 2252
2812 2812 >>> sizetoint('6M')
2813 2813 6291456
2814 2814 '''
2815 2815 t = s.strip().lower()
2816 2816 try:
2817 2817 for k, u in _sizeunits:
2818 2818 if t.endswith(k):
2819 2819 return int(float(t[:-len(k)]) * u)
2820 2820 return int(t)
2821 2821 except ValueError:
2822 2822 raise error.ParseError(_("couldn't parse size: %s") % s)
2823 2823
2824 2824 class hooks(object):
2825 2825 '''A collection of hook functions that can be used to extend a
2826 2826 function's behavior. Hooks are called in lexicographic order,
2827 2827 based on the names of their sources.'''
2828 2828
2829 2829 def __init__(self):
2830 2830 self._hooks = []
2831 2831
2832 2832 def add(self, source, hook):
2833 2833 self._hooks.append((source, hook))
2834 2834
2835 2835 def __call__(self, *args):
2836 2836 self._hooks.sort(key=lambda x: x[0])
2837 2837 results = []
2838 2838 for source, hook in self._hooks:
2839 2839 results.append(hook(*args))
2840 2840 return results
2841 2841
2842 2842 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2843 2843 '''Yields lines for a nicely formatted stacktrace.
2844 2844 Skips the 'skip' last entries.
2845 2845 Each file+linenumber is formatted according to fileline.
2846 2846 Each line is formatted according to line.
2847 2847 If line is None, it yields:
2848 2848 length of longest filepath+line number,
2849 2849 filepath+linenumber,
2850 2850 function
2851 2851
2852 2852 Not be used in production code but very convenient while developing.
2853 2853 '''
2854 2854 entries = [(fileline % (fn, ln), func)
2855 2855 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2856 2856 if entries:
2857 2857 fnmax = max(len(entry[0]) for entry in entries)
2858 2858 for fnln, func in entries:
2859 2859 if line is None:
2860 2860 yield (fnmax, fnln, func)
2861 2861 else:
2862 2862 yield line % (fnmax, fnln, func)
2863 2863
2864 2864 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2865 2865 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2866 2866 Skips the 'skip' last entries. By default it will flush stdout first.
2867 2867 It can be used everywhere and intentionally does not require an ui object.
2868 2868 Not be used in production code but very convenient while developing.
2869 2869 '''
2870 2870 if otherf:
2871 2871 otherf.flush()
2872 2872 f.write('%s at:\n' % msg)
2873 2873 for line in getstackframes(skip + 1):
2874 2874 f.write(line)
2875 2875 f.flush()
2876 2876
2877 2877 class dirs(object):
2878 2878 '''a multiset of directory names from a dirstate or manifest'''
2879 2879
2880 2880 def __init__(self, map, skip=None):
2881 2881 self._dirs = {}
2882 2882 addpath = self.addpath
2883 2883 if safehasattr(map, 'iteritems') and skip is not None:
2884 2884 for f, s in map.iteritems():
2885 2885 if s[0] != skip:
2886 2886 addpath(f)
2887 2887 else:
2888 2888 for f in map:
2889 2889 addpath(f)
2890 2890
2891 2891 def addpath(self, path):
2892 2892 dirs = self._dirs
2893 2893 for base in finddirs(path):
2894 2894 if base in dirs:
2895 2895 dirs[base] += 1
2896 2896 return
2897 2897 dirs[base] = 1
2898 2898
2899 2899 def delpath(self, path):
2900 2900 dirs = self._dirs
2901 2901 for base in finddirs(path):
2902 2902 if dirs[base] > 1:
2903 2903 dirs[base] -= 1
2904 2904 return
2905 2905 del dirs[base]
2906 2906
2907 2907 def __iter__(self):
2908 2908 return self._dirs.iterkeys()
2909 2909
2910 2910 def __contains__(self, d):
2911 2911 return d in self._dirs
2912 2912
2913 2913 if safehasattr(parsers, 'dirs'):
2914 2914 dirs = parsers.dirs
2915 2915
2916 2916 def finddirs(path):
2917 2917 pos = path.rfind('/')
2918 2918 while pos != -1:
2919 2919 yield path[:pos]
2920 2920 pos = path.rfind('/', 0, pos)
2921 2921
2922 2922 class ctxmanager(object):
2923 2923 '''A context manager for use in 'with' blocks to allow multiple
2924 2924 contexts to be entered at once. This is both safer and more
2925 2925 flexible than contextlib.nested.
2926 2926
2927 2927 Once Mercurial supports Python 2.7+, this will become mostly
2928 2928 unnecessary.
2929 2929 '''
2930 2930
2931 2931 def __init__(self, *args):
2932 2932 '''Accepts a list of no-argument functions that return context
2933 2933 managers. These will be invoked at __call__ time.'''
2934 2934 self._pending = args
2935 2935 self._atexit = []
2936 2936
2937 2937 def __enter__(self):
2938 2938 return self
2939 2939
2940 2940 def enter(self):
2941 2941 '''Create and enter context managers in the order in which they were
2942 2942 passed to the constructor.'''
2943 2943 values = []
2944 2944 for func in self._pending:
2945 2945 obj = func()
2946 2946 values.append(obj.__enter__())
2947 2947 self._atexit.append(obj.__exit__)
2948 2948 del self._pending
2949 2949 return values
2950 2950
2951 2951 def atexit(self, func, *args, **kwargs):
2952 2952 '''Add a function to call when this context manager exits. The
2953 2953 ordering of multiple atexit calls is unspecified, save that
2954 2954 they will happen before any __exit__ functions.'''
2955 2955 def wrapper(exc_type, exc_val, exc_tb):
2956 2956 func(*args, **kwargs)
2957 2957 self._atexit.append(wrapper)
2958 2958 return func
2959 2959
2960 2960 def __exit__(self, exc_type, exc_val, exc_tb):
2961 2961 '''Context managers are exited in the reverse order from which
2962 2962 they were created.'''
2963 2963 received = exc_type is not None
2964 2964 suppressed = False
2965 2965 pending = None
2966 2966 self._atexit.reverse()
2967 2967 for exitfunc in self._atexit:
2968 2968 try:
2969 2969 if exitfunc(exc_type, exc_val, exc_tb):
2970 2970 suppressed = True
2971 2971 exc_type = None
2972 2972 exc_val = None
2973 2973 exc_tb = None
2974 2974 except BaseException:
2975 2975 pending = sys.exc_info()
2976 2976 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2977 2977 del self._atexit
2978 2978 if pending:
2979 2979 raise exc_val
2980 2980 return received and suppressed
2981 2981
2982 2982 # compression code
2983 2983
2984 2984 SERVERROLE = 'server'
2985 2985 CLIENTROLE = 'client'
2986 2986
2987 2987 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
2988 2988 (u'name', u'serverpriority',
2989 2989 u'clientpriority'))
2990 2990
2991 2991 class compressormanager(object):
2992 2992 """Holds registrations of various compression engines.
2993 2993
2994 2994 This class essentially abstracts the differences between compression
2995 2995 engines to allow new compression formats to be added easily, possibly from
2996 2996 extensions.
2997 2997
2998 2998 Compressors are registered against the global instance by calling its
2999 2999 ``register()`` method.
3000 3000 """
3001 3001 def __init__(self):
3002 3002 self._engines = {}
3003 3003 # Bundle spec human name to engine name.
3004 3004 self._bundlenames = {}
3005 3005 # Internal bundle identifier to engine name.
3006 3006 self._bundletypes = {}
3007 3007 # Revlog header to engine name.
3008 3008 self._revlogheaders = {}
3009 3009 # Wire proto identifier to engine name.
3010 3010 self._wiretypes = {}
3011 3011
3012 3012 def __getitem__(self, key):
3013 3013 return self._engines[key]
3014 3014
3015 3015 def __contains__(self, key):
3016 3016 return key in self._engines
3017 3017
3018 3018 def __iter__(self):
3019 3019 return iter(self._engines.keys())
3020 3020
3021 3021 def register(self, engine):
3022 3022 """Register a compression engine with the manager.
3023 3023
3024 3024 The argument must be a ``compressionengine`` instance.
3025 3025 """
3026 3026 if not isinstance(engine, compressionengine):
3027 3027 raise ValueError(_('argument must be a compressionengine'))
3028 3028
3029 3029 name = engine.name()
3030 3030
3031 3031 if name in self._engines:
3032 3032 raise error.Abort(_('compression engine %s already registered') %
3033 3033 name)
3034 3034
3035 3035 bundleinfo = engine.bundletype()
3036 3036 if bundleinfo:
3037 3037 bundlename, bundletype = bundleinfo
3038 3038
3039 3039 if bundlename in self._bundlenames:
3040 3040 raise error.Abort(_('bundle name %s already registered') %
3041 3041 bundlename)
3042 3042 if bundletype in self._bundletypes:
3043 3043 raise error.Abort(_('bundle type %s already registered by %s') %
3044 3044 (bundletype, self._bundletypes[bundletype]))
3045 3045
3046 3046 # No external facing name declared.
3047 3047 if bundlename:
3048 3048 self._bundlenames[bundlename] = name
3049 3049
3050 3050 self._bundletypes[bundletype] = name
3051 3051
3052 3052 wiresupport = engine.wireprotosupport()
3053 3053 if wiresupport:
3054 3054 wiretype = wiresupport.name
3055 3055 if wiretype in self._wiretypes:
3056 3056 raise error.Abort(_('wire protocol compression %s already '
3057 3057 'registered by %s') %
3058 3058 (wiretype, self._wiretypes[wiretype]))
3059 3059
3060 3060 self._wiretypes[wiretype] = name
3061 3061
3062 3062 revlogheader = engine.revlogheader()
3063 3063 if revlogheader and revlogheader in self._revlogheaders:
3064 3064 raise error.Abort(_('revlog header %s already registered by %s') %
3065 3065 (revlogheader, self._revlogheaders[revlogheader]))
3066 3066
3067 3067 if revlogheader:
3068 3068 self._revlogheaders[revlogheader] = name
3069 3069
3070 3070 self._engines[name] = engine
3071 3071
3072 3072 @property
3073 3073 def supportedbundlenames(self):
3074 3074 return set(self._bundlenames.keys())
3075 3075
3076 3076 @property
3077 3077 def supportedbundletypes(self):
3078 3078 return set(self._bundletypes.keys())
3079 3079
3080 3080 def forbundlename(self, bundlename):
3081 3081 """Obtain a compression engine registered to a bundle name.
3082 3082
3083 3083 Will raise KeyError if the bundle type isn't registered.
3084 3084
3085 3085 Will abort if the engine is known but not available.
3086 3086 """
3087 3087 engine = self._engines[self._bundlenames[bundlename]]
3088 3088 if not engine.available():
3089 3089 raise error.Abort(_('compression engine %s could not be loaded') %
3090 3090 engine.name())
3091 3091 return engine
3092 3092
3093 3093 def forbundletype(self, bundletype):
3094 3094 """Obtain a compression engine registered to a bundle type.
3095 3095
3096 3096 Will raise KeyError if the bundle type isn't registered.
3097 3097
3098 3098 Will abort if the engine is known but not available.
3099 3099 """
3100 3100 engine = self._engines[self._bundletypes[bundletype]]
3101 3101 if not engine.available():
3102 3102 raise error.Abort(_('compression engine %s could not be loaded') %
3103 3103 engine.name())
3104 3104 return engine
3105 3105
3106 3106 def supportedwireengines(self, role, onlyavailable=True):
3107 3107 """Obtain compression engines that support the wire protocol.
3108 3108
3109 3109 Returns a list of engines in prioritized order, most desired first.
3110 3110
3111 3111 If ``onlyavailable`` is set, filter out engines that can't be
3112 3112 loaded.
3113 3113 """
3114 3114 assert role in (SERVERROLE, CLIENTROLE)
3115 3115
3116 3116 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3117 3117
3118 3118 engines = [self._engines[e] for e in self._wiretypes.values()]
3119 3119 if onlyavailable:
3120 3120 engines = [e for e in engines if e.available()]
3121 3121
3122 3122 def getkey(e):
3123 3123 # Sort first by priority, highest first. In case of tie, sort
3124 3124 # alphabetically. This is arbitrary, but ensures output is
3125 3125 # stable.
3126 3126 w = e.wireprotosupport()
3127 3127 return -1 * getattr(w, attr), w.name
3128 3128
3129 3129 return list(sorted(engines, key=getkey))
3130 3130
3131 3131 def forwiretype(self, wiretype):
3132 3132 engine = self._engines[self._wiretypes[wiretype]]
3133 3133 if not engine.available():
3134 3134 raise error.Abort(_('compression engine %s could not be loaded') %
3135 3135 engine.name())
3136 3136 return engine
3137 3137
3138 3138 def forrevlogheader(self, header):
3139 3139 """Obtain a compression engine registered to a revlog header.
3140 3140
3141 3141 Will raise KeyError if the revlog header value isn't registered.
3142 3142 """
3143 3143 return self._engines[self._revlogheaders[header]]
3144 3144
3145 3145 compengines = compressormanager()
3146 3146
3147 3147 class compressionengine(object):
3148 3148 """Base class for compression engines.
3149 3149
3150 3150 Compression engines must implement the interface defined by this class.
3151 3151 """
3152 3152 def name(self):
3153 3153 """Returns the name of the compression engine.
3154 3154
3155 3155 This is the key the engine is registered under.
3156 3156
3157 3157 This method must be implemented.
3158 3158 """
3159 3159 raise NotImplementedError()
3160 3160
3161 3161 def available(self):
3162 3162 """Whether the compression engine is available.
3163 3163
3164 3164 The intent of this method is to allow optional compression engines
3165 3165 that may not be available in all installations (such as engines relying
3166 3166 on C extensions that may not be present).
3167 3167 """
3168 3168 return True
3169 3169
3170 3170 def bundletype(self):
3171 3171 """Describes bundle identifiers for this engine.
3172 3172
3173 3173 If this compression engine isn't supported for bundles, returns None.
3174 3174
3175 3175 If this engine can be used for bundles, returns a 2-tuple of strings of
3176 3176 the user-facing "bundle spec" compression name and an internal
3177 3177 identifier used to denote the compression format within bundles. To
3178 3178 exclude the name from external usage, set the first element to ``None``.
3179 3179
3180 3180 If bundle compression is supported, the class must also implement
3181 3181 ``compressstream`` and `decompressorreader``.
3182 3182 """
3183 3183 return None
3184 3184
3185 3185 def wireprotosupport(self):
3186 3186 """Declare support for this compression format on the wire protocol.
3187 3187
3188 3188 If this compression engine isn't supported for compressing wire
3189 3189 protocol payloads, returns None.
3190 3190
3191 3191 Otherwise, returns ``compenginewireprotosupport`` with the following
3192 3192 fields:
3193 3193
3194 3194 * String format identifier
3195 3195 * Integer priority for the server
3196 3196 * Integer priority for the client
3197 3197
3198 3198 The integer priorities are used to order the advertisement of format
3199 3199 support by server and client. The highest integer is advertised
3200 3200 first. Integers with non-positive values aren't advertised.
3201 3201
3202 3202 The priority values are somewhat arbitrary and only used for default
3203 3203 ordering. The relative order can be changed via config options.
3204 3204
3205 3205 If wire protocol compression is supported, the class must also implement
3206 3206 ``compressstream`` and ``decompressorreader``.
3207 3207 """
3208 3208 return None
3209 3209
3210 3210 def revlogheader(self):
3211 3211 """Header added to revlog chunks that identifies this engine.
3212 3212
3213 3213 If this engine can be used to compress revlogs, this method should
3214 3214 return the bytes used to identify chunks compressed with this engine.
3215 3215 Else, the method should return ``None`` to indicate it does not
3216 3216 participate in revlog compression.
3217 3217 """
3218 3218 return None
3219 3219
3220 3220 def compressstream(self, it, opts=None):
3221 3221 """Compress an iterator of chunks.
3222 3222
3223 3223 The method receives an iterator (ideally a generator) of chunks of
3224 3224 bytes to be compressed. It returns an iterator (ideally a generator)
3225 3225 of bytes of chunks representing the compressed output.
3226 3226
3227 3227 Optionally accepts an argument defining how to perform compression.
3228 3228 Each engine treats this argument differently.
3229 3229 """
3230 3230 raise NotImplementedError()
3231 3231
3232 3232 def decompressorreader(self, fh):
3233 3233 """Perform decompression on a file object.
3234 3234
3235 3235 Argument is an object with a ``read(size)`` method that returns
3236 3236 compressed data. Return value is an object with a ``read(size)`` that
3237 3237 returns uncompressed data.
3238 3238 """
3239 3239 raise NotImplementedError()
3240 3240
3241 3241 def revlogcompressor(self, opts=None):
3242 3242 """Obtain an object that can be used to compress revlog entries.
3243 3243
3244 3244 The object has a ``compress(data)`` method that compresses binary
3245 3245 data. This method returns compressed binary data or ``None`` if
3246 3246 the data could not be compressed (too small, not compressible, etc).
3247 3247 The returned data should have a header uniquely identifying this
3248 3248 compression format so decompression can be routed to this engine.
3249 3249 This header should be identified by the ``revlogheader()`` return
3250 3250 value.
3251 3251
3252 3252 The object has a ``decompress(data)`` method that decompresses
3253 3253 data. The method will only be called if ``data`` begins with
3254 3254 ``revlogheader()``. The method should return the raw, uncompressed
3255 3255 data or raise a ``RevlogError``.
3256 3256
3257 3257 The object is reusable but is not thread safe.
3258 3258 """
3259 3259 raise NotImplementedError()
3260 3260
3261 3261 class _zlibengine(compressionengine):
3262 3262 def name(self):
3263 3263 return 'zlib'
3264 3264
3265 3265 def bundletype(self):
3266 3266 return 'gzip', 'GZ'
3267 3267
3268 3268 def wireprotosupport(self):
3269 3269 return compewireprotosupport('zlib', 20, 20)
3270 3270
3271 3271 def revlogheader(self):
3272 3272 return 'x'
3273 3273
3274 3274 def compressstream(self, it, opts=None):
3275 3275 opts = opts or {}
3276 3276
3277 3277 z = zlib.compressobj(opts.get('level', -1))
3278 3278 for chunk in it:
3279 3279 data = z.compress(chunk)
3280 3280 # Not all calls to compress emit data. It is cheaper to inspect
3281 3281 # here than to feed empty chunks through generator.
3282 3282 if data:
3283 3283 yield data
3284 3284
3285 3285 yield z.flush()
3286 3286
3287 3287 def decompressorreader(self, fh):
3288 3288 def gen():
3289 3289 d = zlib.decompressobj()
3290 3290 for chunk in filechunkiter(fh):
3291 3291 while chunk:
3292 3292 # Limit output size to limit memory.
3293 3293 yield d.decompress(chunk, 2 ** 18)
3294 3294 chunk = d.unconsumed_tail
3295 3295
3296 3296 return chunkbuffer(gen())
3297 3297
3298 3298 class zlibrevlogcompressor(object):
3299 3299 def compress(self, data):
3300 3300 insize = len(data)
3301 3301 # Caller handles empty input case.
3302 3302 assert insize > 0
3303 3303
3304 3304 if insize < 44:
3305 3305 return None
3306 3306
3307 3307 elif insize <= 1000000:
3308 3308 compressed = zlib.compress(data)
3309 3309 if len(compressed) < insize:
3310 3310 return compressed
3311 3311 return None
3312 3312
3313 3313 # zlib makes an internal copy of the input buffer, doubling
3314 3314 # memory usage for large inputs. So do streaming compression
3315 3315 # on large inputs.
3316 3316 else:
3317 3317 z = zlib.compressobj()
3318 3318 parts = []
3319 3319 pos = 0
3320 3320 while pos < insize:
3321 3321 pos2 = pos + 2**20
3322 3322 parts.append(z.compress(data[pos:pos2]))
3323 3323 pos = pos2
3324 3324 parts.append(z.flush())
3325 3325
3326 3326 if sum(map(len, parts)) < insize:
3327 3327 return ''.join(parts)
3328 3328 return None
3329 3329
3330 3330 def decompress(self, data):
3331 3331 try:
3332 3332 return zlib.decompress(data)
3333 3333 except zlib.error as e:
3334 3334 raise error.RevlogError(_('revlog decompress error: %s') %
3335 3335 str(e))
3336 3336
3337 3337 def revlogcompressor(self, opts=None):
3338 3338 return self.zlibrevlogcompressor()
3339 3339
3340 3340 compengines.register(_zlibengine())
3341 3341
3342 3342 class _bz2engine(compressionengine):
3343 3343 def name(self):
3344 3344 return 'bz2'
3345 3345
3346 3346 def bundletype(self):
3347 3347 return 'bzip2', 'BZ'
3348 3348
3349 3349 # We declare a protocol name but don't advertise by default because
3350 3350 # it is slow.
3351 3351 def wireprotosupport(self):
3352 3352 return compewireprotosupport('bzip2', 0, 0)
3353 3353
3354 3354 def compressstream(self, it, opts=None):
3355 3355 opts = opts or {}
3356 3356 z = bz2.BZ2Compressor(opts.get('level', 9))
3357 3357 for chunk in it:
3358 3358 data = z.compress(chunk)
3359 3359 if data:
3360 3360 yield data
3361 3361
3362 3362 yield z.flush()
3363 3363
3364 3364 def decompressorreader(self, fh):
3365 3365 def gen():
3366 3366 d = bz2.BZ2Decompressor()
3367 3367 for chunk in filechunkiter(fh):
3368 3368 yield d.decompress(chunk)
3369 3369
3370 3370 return chunkbuffer(gen())
3371 3371
3372 3372 compengines.register(_bz2engine())
3373 3373
3374 3374 class _truncatedbz2engine(compressionengine):
3375 3375 def name(self):
3376 3376 return 'bz2truncated'
3377 3377
3378 3378 def bundletype(self):
3379 3379 return None, '_truncatedBZ'
3380 3380
3381 3381 # We don't implement compressstream because it is hackily handled elsewhere.
3382 3382
3383 3383 def decompressorreader(self, fh):
3384 3384 def gen():
3385 3385 # The input stream doesn't have the 'BZ' header. So add it back.
3386 3386 d = bz2.BZ2Decompressor()
3387 3387 d.decompress('BZ')
3388 3388 for chunk in filechunkiter(fh):
3389 3389 yield d.decompress(chunk)
3390 3390
3391 3391 return chunkbuffer(gen())
3392 3392
3393 3393 compengines.register(_truncatedbz2engine())
3394 3394
3395 3395 class _noopengine(compressionengine):
3396 3396 def name(self):
3397 3397 return 'none'
3398 3398
3399 3399 def bundletype(self):
3400 3400 return 'none', 'UN'
3401 3401
3402 3402 # Clients always support uncompressed payloads. Servers don't because
3403 3403 # unless you are on a fast network, uncompressed payloads can easily
3404 3404 # saturate your network pipe.
3405 3405 def wireprotosupport(self):
3406 3406 return compewireprotosupport('none', 0, 10)
3407 3407
3408 3408 # We don't implement revlogheader because it is handled specially
3409 3409 # in the revlog class.
3410 3410
3411 3411 def compressstream(self, it, opts=None):
3412 3412 return it
3413 3413
3414 3414 def decompressorreader(self, fh):
3415 3415 return fh
3416 3416
3417 3417 class nooprevlogcompressor(object):
3418 3418 def compress(self, data):
3419 3419 return None
3420 3420
3421 3421 def revlogcompressor(self, opts=None):
3422 3422 return self.nooprevlogcompressor()
3423 3423
3424 3424 compengines.register(_noopengine())
3425 3425
3426 3426 class _zstdengine(compressionengine):
3427 3427 def name(self):
3428 3428 return 'zstd'
3429 3429
3430 3430 @propertycache
3431 3431 def _module(self):
3432 3432 # Not all installs have the zstd module available. So defer importing
3433 3433 # until first access.
3434 3434 try:
3435 3435 from . import zstd
3436 3436 # Force delayed import.
3437 3437 zstd.__version__
3438 3438 return zstd
3439 3439 except ImportError:
3440 3440 return None
3441 3441
3442 3442 def available(self):
3443 3443 return bool(self._module)
3444 3444
3445 3445 def bundletype(self):
3446 3446 return 'zstd', 'ZS'
3447 3447
3448 3448 def wireprotosupport(self):
3449 3449 return compewireprotosupport('zstd', 50, 50)
3450 3450
3451 3451 def revlogheader(self):
3452 3452 return '\x28'
3453 3453
3454 3454 def compressstream(self, it, opts=None):
3455 3455 opts = opts or {}
3456 3456 # zstd level 3 is almost always significantly faster than zlib
3457 3457 # while providing no worse compression. It strikes a good balance
3458 3458 # between speed and compression.
3459 3459 level = opts.get('level', 3)
3460 3460
3461 3461 zstd = self._module
3462 3462 z = zstd.ZstdCompressor(level=level).compressobj()
3463 3463 for chunk in it:
3464 3464 data = z.compress(chunk)
3465 3465 if data:
3466 3466 yield data
3467 3467
3468 3468 yield z.flush()
3469 3469
3470 3470 def decompressorreader(self, fh):
3471 3471 zstd = self._module
3472 3472 dctx = zstd.ZstdDecompressor()
3473 3473 return chunkbuffer(dctx.read_from(fh))
3474 3474
3475 3475 class zstdrevlogcompressor(object):
3476 3476 def __init__(self, zstd, level=3):
3477 3477 # Writing the content size adds a few bytes to the output. However,
3478 3478 # it allows decompression to be more optimal since we can
3479 3479 # pre-allocate a buffer to hold the result.
3480 3480 self._cctx = zstd.ZstdCompressor(level=level,
3481 3481 write_content_size=True)
3482 3482 self._dctx = zstd.ZstdDecompressor()
3483 3483 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3484 3484 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3485 3485
3486 3486 def compress(self, data):
3487 3487 insize = len(data)
3488 3488 # Caller handles empty input case.
3489 3489 assert insize > 0
3490 3490
3491 3491 if insize < 50:
3492 3492 return None
3493 3493
3494 3494 elif insize <= 1000000:
3495 3495 compressed = self._cctx.compress(data)
3496 3496 if len(compressed) < insize:
3497 3497 return compressed
3498 3498 return None
3499 3499 else:
3500 3500 z = self._cctx.compressobj()
3501 3501 chunks = []
3502 3502 pos = 0
3503 3503 while pos < insize:
3504 3504 pos2 = pos + self._compinsize
3505 3505 chunk = z.compress(data[pos:pos2])
3506 3506 if chunk:
3507 3507 chunks.append(chunk)
3508 3508 pos = pos2
3509 3509 chunks.append(z.flush())
3510 3510
3511 3511 if sum(map(len, chunks)) < insize:
3512 3512 return ''.join(chunks)
3513 3513 return None
3514 3514
3515 3515 def decompress(self, data):
3516 3516 insize = len(data)
3517 3517
3518 3518 try:
3519 3519 # This was measured to be faster than other streaming
3520 3520 # decompressors.
3521 3521 dobj = self._dctx.decompressobj()
3522 3522 chunks = []
3523 3523 pos = 0
3524 3524 while pos < insize:
3525 3525 pos2 = pos + self._decompinsize
3526 3526 chunk = dobj.decompress(data[pos:pos2])
3527 3527 if chunk:
3528 3528 chunks.append(chunk)
3529 3529 pos = pos2
3530 3530 # Frame should be exhausted, so no finish() API.
3531 3531
3532 3532 return ''.join(chunks)
3533 3533 except Exception as e:
3534 3534 raise error.RevlogError(_('revlog decompress error: %s') %
3535 3535 str(e))
3536 3536
3537 3537 def revlogcompressor(self, opts=None):
3538 3538 opts = opts or {}
3539 3539 return self.zstdrevlogcompressor(self._module,
3540 3540 level=opts.get('level', 3))
3541 3541
3542 3542 compengines.register(_zstdengine())
3543 3543
3544 3544 # convenient shortcut
3545 3545 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now