##// END OF EJS Templates
util: compression APIs to support revlog decompression...
Gregory Szorc -
r30798:f50c0db5 default
parent child Browse files
Show More
@@ -1,3467 +1,3541 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import collections
21 21 import datetime
22 22 import errno
23 23 import gc
24 24 import hashlib
25 25 import imp
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import signal
31 31 import socket
32 32 import stat
33 33 import string
34 34 import subprocess
35 35 import sys
36 36 import tempfile
37 37 import textwrap
38 38 import time
39 39 import traceback
40 40 import zlib
41 41
42 42 from . import (
43 43 encoding,
44 44 error,
45 45 i18n,
46 46 osutil,
47 47 parsers,
48 48 pycompat,
49 49 )
50 50
51 51 empty = pycompat.empty
52 52 httplib = pycompat.httplib
53 53 httpserver = pycompat.httpserver
54 54 pickle = pycompat.pickle
55 55 queue = pycompat.queue
56 56 socketserver = pycompat.socketserver
57 57 stderr = pycompat.stderr
58 58 stdin = pycompat.stdin
59 59 stdout = pycompat.stdout
60 60 stringio = pycompat.stringio
61 61 urlerr = pycompat.urlerr
62 62 urlparse = pycompat.urlparse
63 63 urlreq = pycompat.urlreq
64 64 xmlrpclib = pycompat.xmlrpclib
65 65
66 66 if pycompat.osname == 'nt':
67 67 from . import windows as platform
68 68 stdout = platform.winstdout(pycompat.stdout)
69 69 else:
70 70 from . import posix as platform
71 71
72 72 _ = i18n._
73 73
74 74 bindunixsocket = platform.bindunixsocket
75 75 cachestat = platform.cachestat
76 76 checkexec = platform.checkexec
77 77 checklink = platform.checklink
78 78 copymode = platform.copymode
79 79 executablepath = platform.executablepath
80 80 expandglobs = platform.expandglobs
81 81 explainexit = platform.explainexit
82 82 findexe = platform.findexe
83 83 gethgcmd = platform.gethgcmd
84 84 getuser = platform.getuser
85 85 getpid = os.getpid
86 86 groupmembers = platform.groupmembers
87 87 groupname = platform.groupname
88 88 hidewindow = platform.hidewindow
89 89 isexec = platform.isexec
90 90 isowner = platform.isowner
91 91 localpath = platform.localpath
92 92 lookupreg = platform.lookupreg
93 93 makedir = platform.makedir
94 94 nlinks = platform.nlinks
95 95 normpath = platform.normpath
96 96 normcase = platform.normcase
97 97 normcasespec = platform.normcasespec
98 98 normcasefallback = platform.normcasefallback
99 99 openhardlinks = platform.openhardlinks
100 100 oslink = platform.oslink
101 101 parsepatchoutput = platform.parsepatchoutput
102 102 pconvert = platform.pconvert
103 103 poll = platform.poll
104 104 popen = platform.popen
105 105 posixfile = platform.posixfile
106 106 quotecommand = platform.quotecommand
107 107 readpipe = platform.readpipe
108 108 rename = platform.rename
109 109 removedirs = platform.removedirs
110 110 samedevice = platform.samedevice
111 111 samefile = platform.samefile
112 112 samestat = platform.samestat
113 113 setbinary = platform.setbinary
114 114 setflags = platform.setflags
115 115 setsignalhandler = platform.setsignalhandler
116 116 shellquote = platform.shellquote
117 117 spawndetached = platform.spawndetached
118 118 split = platform.split
119 119 sshargs = platform.sshargs
120 120 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
121 121 statisexec = platform.statisexec
122 122 statislink = platform.statislink
123 123 testpid = platform.testpid
124 124 umask = platform.umask
125 125 unlink = platform.unlink
126 126 unlinkpath = platform.unlinkpath
127 127 username = platform.username
128 128
129 129 # Python compatibility
130 130
131 131 _notset = object()
132 132
133 133 # disable Python's problematic floating point timestamps (issue4836)
134 134 # (Python hypocritically says you shouldn't change this behavior in
135 135 # libraries, and sure enough Mercurial is not a library.)
136 136 os.stat_float_times(False)
137 137
138 138 def safehasattr(thing, attr):
139 139 return getattr(thing, attr, _notset) is not _notset
140 140
141 141 def bitsfrom(container):
142 142 bits = 0
143 143 for bit in container:
144 144 bits |= bit
145 145 return bits
146 146
147 147 DIGESTS = {
148 148 'md5': hashlib.md5,
149 149 'sha1': hashlib.sha1,
150 150 'sha512': hashlib.sha512,
151 151 }
152 152 # List of digest types from strongest to weakest
153 153 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
154 154
155 155 for k in DIGESTS_BY_STRENGTH:
156 156 assert k in DIGESTS
157 157
158 158 class digester(object):
159 159 """helper to compute digests.
160 160
161 161 This helper can be used to compute one or more digests given their name.
162 162
163 163 >>> d = digester(['md5', 'sha1'])
164 164 >>> d.update('foo')
165 165 >>> [k for k in sorted(d)]
166 166 ['md5', 'sha1']
167 167 >>> d['md5']
168 168 'acbd18db4cc2f85cedef654fccc4a4d8'
169 169 >>> d['sha1']
170 170 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
171 171 >>> digester.preferred(['md5', 'sha1'])
172 172 'sha1'
173 173 """
174 174
175 175 def __init__(self, digests, s=''):
176 176 self._hashes = {}
177 177 for k in digests:
178 178 if k not in DIGESTS:
179 179 raise Abort(_('unknown digest type: %s') % k)
180 180 self._hashes[k] = DIGESTS[k]()
181 181 if s:
182 182 self.update(s)
183 183
184 184 def update(self, data):
185 185 for h in self._hashes.values():
186 186 h.update(data)
187 187
188 188 def __getitem__(self, key):
189 189 if key not in DIGESTS:
190 190 raise Abort(_('unknown digest type: %s') % k)
191 191 return self._hashes[key].hexdigest()
192 192
193 193 def __iter__(self):
194 194 return iter(self._hashes)
195 195
196 196 @staticmethod
197 197 def preferred(supported):
198 198 """returns the strongest digest type in both supported and DIGESTS."""
199 199
200 200 for k in DIGESTS_BY_STRENGTH:
201 201 if k in supported:
202 202 return k
203 203 return None
204 204
205 205 class digestchecker(object):
206 206 """file handle wrapper that additionally checks content against a given
207 207 size and digests.
208 208
209 209 d = digestchecker(fh, size, {'md5': '...'})
210 210
211 211 When multiple digests are given, all of them are validated.
212 212 """
213 213
214 214 def __init__(self, fh, size, digests):
215 215 self._fh = fh
216 216 self._size = size
217 217 self._got = 0
218 218 self._digests = dict(digests)
219 219 self._digester = digester(self._digests.keys())
220 220
221 221 def read(self, length=-1):
222 222 content = self._fh.read(length)
223 223 self._digester.update(content)
224 224 self._got += len(content)
225 225 return content
226 226
227 227 def validate(self):
228 228 if self._size != self._got:
229 229 raise Abort(_('size mismatch: expected %d, got %d') %
230 230 (self._size, self._got))
231 231 for k, v in self._digests.items():
232 232 if v != self._digester[k]:
233 233 # i18n: first parameter is a digest name
234 234 raise Abort(_('%s mismatch: expected %s, got %s') %
235 235 (k, v, self._digester[k]))
236 236
237 237 try:
238 238 buffer = buffer
239 239 except NameError:
240 240 if not pycompat.ispy3:
241 241 def buffer(sliceable, offset=0):
242 242 return sliceable[offset:]
243 243 else:
244 244 def buffer(sliceable, offset=0):
245 245 return memoryview(sliceable)[offset:]
246 246
247 247 closefds = pycompat.osname == 'posix'
248 248
249 249 _chunksize = 4096
250 250
251 251 class bufferedinputpipe(object):
252 252 """a manually buffered input pipe
253 253
254 254 Python will not let us use buffered IO and lazy reading with 'polling' at
255 255 the same time. We cannot probe the buffer state and select will not detect
256 256 that data are ready to read if they are already buffered.
257 257
258 258 This class let us work around that by implementing its own buffering
259 259 (allowing efficient readline) while offering a way to know if the buffer is
260 260 empty from the output (allowing collaboration of the buffer with polling).
261 261
262 262 This class lives in the 'util' module because it makes use of the 'os'
263 263 module from the python stdlib.
264 264 """
265 265
266 266 def __init__(self, input):
267 267 self._input = input
268 268 self._buffer = []
269 269 self._eof = False
270 270 self._lenbuf = 0
271 271
272 272 @property
273 273 def hasbuffer(self):
274 274 """True is any data is currently buffered
275 275
276 276 This will be used externally a pre-step for polling IO. If there is
277 277 already data then no polling should be set in place."""
278 278 return bool(self._buffer)
279 279
280 280 @property
281 281 def closed(self):
282 282 return self._input.closed
283 283
284 284 def fileno(self):
285 285 return self._input.fileno()
286 286
287 287 def close(self):
288 288 return self._input.close()
289 289
290 290 def read(self, size):
291 291 while (not self._eof) and (self._lenbuf < size):
292 292 self._fillbuffer()
293 293 return self._frombuffer(size)
294 294
295 295 def readline(self, *args, **kwargs):
296 296 if 1 < len(self._buffer):
297 297 # this should not happen because both read and readline end with a
298 298 # _frombuffer call that collapse it.
299 299 self._buffer = [''.join(self._buffer)]
300 300 self._lenbuf = len(self._buffer[0])
301 301 lfi = -1
302 302 if self._buffer:
303 303 lfi = self._buffer[-1].find('\n')
304 304 while (not self._eof) and lfi < 0:
305 305 self._fillbuffer()
306 306 if self._buffer:
307 307 lfi = self._buffer[-1].find('\n')
308 308 size = lfi + 1
309 309 if lfi < 0: # end of file
310 310 size = self._lenbuf
311 311 elif 1 < len(self._buffer):
312 312 # we need to take previous chunks into account
313 313 size += self._lenbuf - len(self._buffer[-1])
314 314 return self._frombuffer(size)
315 315
316 316 def _frombuffer(self, size):
317 317 """return at most 'size' data from the buffer
318 318
319 319 The data are removed from the buffer."""
320 320 if size == 0 or not self._buffer:
321 321 return ''
322 322 buf = self._buffer[0]
323 323 if 1 < len(self._buffer):
324 324 buf = ''.join(self._buffer)
325 325
326 326 data = buf[:size]
327 327 buf = buf[len(data):]
328 328 if buf:
329 329 self._buffer = [buf]
330 330 self._lenbuf = len(buf)
331 331 else:
332 332 self._buffer = []
333 333 self._lenbuf = 0
334 334 return data
335 335
336 336 def _fillbuffer(self):
337 337 """read data to the buffer"""
338 338 data = os.read(self._input.fileno(), _chunksize)
339 339 if not data:
340 340 self._eof = True
341 341 else:
342 342 self._lenbuf += len(data)
343 343 self._buffer.append(data)
344 344
345 345 def popen2(cmd, env=None, newlines=False):
346 346 # Setting bufsize to -1 lets the system decide the buffer size.
347 347 # The default for bufsize is 0, meaning unbuffered. This leads to
348 348 # poor performance on Mac OS X: http://bugs.python.org/issue4194
349 349 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
350 350 close_fds=closefds,
351 351 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
352 352 universal_newlines=newlines,
353 353 env=env)
354 354 return p.stdin, p.stdout
355 355
356 356 def popen3(cmd, env=None, newlines=False):
357 357 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
358 358 return stdin, stdout, stderr
359 359
360 360 def popen4(cmd, env=None, newlines=False, bufsize=-1):
361 361 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
362 362 close_fds=closefds,
363 363 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
364 364 stderr=subprocess.PIPE,
365 365 universal_newlines=newlines,
366 366 env=env)
367 367 return p.stdin, p.stdout, p.stderr, p
368 368
369 369 def version():
370 370 """Return version information if available."""
371 371 try:
372 372 from . import __version__
373 373 return __version__.version
374 374 except ImportError:
375 375 return 'unknown'
376 376
377 377 def versiontuple(v=None, n=4):
378 378 """Parses a Mercurial version string into an N-tuple.
379 379
380 380 The version string to be parsed is specified with the ``v`` argument.
381 381 If it isn't defined, the current Mercurial version string will be parsed.
382 382
383 383 ``n`` can be 2, 3, or 4. Here is how some version strings map to
384 384 returned values:
385 385
386 386 >>> v = '3.6.1+190-df9b73d2d444'
387 387 >>> versiontuple(v, 2)
388 388 (3, 6)
389 389 >>> versiontuple(v, 3)
390 390 (3, 6, 1)
391 391 >>> versiontuple(v, 4)
392 392 (3, 6, 1, '190-df9b73d2d444')
393 393
394 394 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
395 395 (3, 6, 1, '190-df9b73d2d444+20151118')
396 396
397 397 >>> v = '3.6'
398 398 >>> versiontuple(v, 2)
399 399 (3, 6)
400 400 >>> versiontuple(v, 3)
401 401 (3, 6, None)
402 402 >>> versiontuple(v, 4)
403 403 (3, 6, None, None)
404 404
405 405 >>> v = '3.9-rc'
406 406 >>> versiontuple(v, 2)
407 407 (3, 9)
408 408 >>> versiontuple(v, 3)
409 409 (3, 9, None)
410 410 >>> versiontuple(v, 4)
411 411 (3, 9, None, 'rc')
412 412
413 413 >>> v = '3.9-rc+2-02a8fea4289b'
414 414 >>> versiontuple(v, 2)
415 415 (3, 9)
416 416 >>> versiontuple(v, 3)
417 417 (3, 9, None)
418 418 >>> versiontuple(v, 4)
419 419 (3, 9, None, 'rc+2-02a8fea4289b')
420 420 """
421 421 if not v:
422 422 v = version()
423 423 parts = remod.split('[\+-]', v, 1)
424 424 if len(parts) == 1:
425 425 vparts, extra = parts[0], None
426 426 else:
427 427 vparts, extra = parts
428 428
429 429 vints = []
430 430 for i in vparts.split('.'):
431 431 try:
432 432 vints.append(int(i))
433 433 except ValueError:
434 434 break
435 435 # (3, 6) -> (3, 6, None)
436 436 while len(vints) < 3:
437 437 vints.append(None)
438 438
439 439 if n == 2:
440 440 return (vints[0], vints[1])
441 441 if n == 3:
442 442 return (vints[0], vints[1], vints[2])
443 443 if n == 4:
444 444 return (vints[0], vints[1], vints[2], extra)
445 445
446 446 # used by parsedate
447 447 defaultdateformats = (
448 448 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
449 449 '%Y-%m-%dT%H:%M', # without seconds
450 450 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
451 451 '%Y-%m-%dT%H%M', # without seconds
452 452 '%Y-%m-%d %H:%M:%S', # our common legal variant
453 453 '%Y-%m-%d %H:%M', # without seconds
454 454 '%Y-%m-%d %H%M%S', # without :
455 455 '%Y-%m-%d %H%M', # without seconds
456 456 '%Y-%m-%d %I:%M:%S%p',
457 457 '%Y-%m-%d %H:%M',
458 458 '%Y-%m-%d %I:%M%p',
459 459 '%Y-%m-%d',
460 460 '%m-%d',
461 461 '%m/%d',
462 462 '%m/%d/%y',
463 463 '%m/%d/%Y',
464 464 '%a %b %d %H:%M:%S %Y',
465 465 '%a %b %d %I:%M:%S%p %Y',
466 466 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
467 467 '%b %d %H:%M:%S %Y',
468 468 '%b %d %I:%M:%S%p %Y',
469 469 '%b %d %H:%M:%S',
470 470 '%b %d %I:%M:%S%p',
471 471 '%b %d %H:%M',
472 472 '%b %d %I:%M%p',
473 473 '%b %d %Y',
474 474 '%b %d',
475 475 '%H:%M:%S',
476 476 '%I:%M:%S%p',
477 477 '%H:%M',
478 478 '%I:%M%p',
479 479 )
480 480
481 481 extendeddateformats = defaultdateformats + (
482 482 "%Y",
483 483 "%Y-%m",
484 484 "%b",
485 485 "%b %Y",
486 486 )
487 487
488 488 def cachefunc(func):
489 489 '''cache the result of function calls'''
490 490 # XXX doesn't handle keywords args
491 491 if func.__code__.co_argcount == 0:
492 492 cache = []
493 493 def f():
494 494 if len(cache) == 0:
495 495 cache.append(func())
496 496 return cache[0]
497 497 return f
498 498 cache = {}
499 499 if func.__code__.co_argcount == 1:
500 500 # we gain a small amount of time because
501 501 # we don't need to pack/unpack the list
502 502 def f(arg):
503 503 if arg not in cache:
504 504 cache[arg] = func(arg)
505 505 return cache[arg]
506 506 else:
507 507 def f(*args):
508 508 if args not in cache:
509 509 cache[args] = func(*args)
510 510 return cache[args]
511 511
512 512 return f
513 513
514 514 class sortdict(dict):
515 515 '''a simple sorted dictionary'''
516 516 def __init__(self, data=None):
517 517 self._list = []
518 518 if data:
519 519 self.update(data)
520 520 def copy(self):
521 521 return sortdict(self)
522 522 def __setitem__(self, key, val):
523 523 if key in self:
524 524 self._list.remove(key)
525 525 self._list.append(key)
526 526 dict.__setitem__(self, key, val)
527 527 def __iter__(self):
528 528 return self._list.__iter__()
529 529 def update(self, src):
530 530 if isinstance(src, dict):
531 531 src = src.iteritems()
532 532 for k, v in src:
533 533 self[k] = v
534 534 def clear(self):
535 535 dict.clear(self)
536 536 self._list = []
537 537 def items(self):
538 538 return [(k, self[k]) for k in self._list]
539 539 def __delitem__(self, key):
540 540 dict.__delitem__(self, key)
541 541 self._list.remove(key)
542 542 def pop(self, key, *args, **kwargs):
543 543 dict.pop(self, key, *args, **kwargs)
544 544 try:
545 545 self._list.remove(key)
546 546 except ValueError:
547 547 pass
548 548 def keys(self):
549 549 return self._list
550 550 def iterkeys(self):
551 551 return self._list.__iter__()
552 552 def iteritems(self):
553 553 for k in self._list:
554 554 yield k, self[k]
555 555 def insert(self, index, key, val):
556 556 self._list.insert(index, key)
557 557 dict.__setitem__(self, key, val)
558 558 def __repr__(self):
559 559 if not self:
560 560 return '%s()' % self.__class__.__name__
561 561 return '%s(%r)' % (self.__class__.__name__, self.items())
562 562
563 563 class _lrucachenode(object):
564 564 """A node in a doubly linked list.
565 565
566 566 Holds a reference to nodes on either side as well as a key-value
567 567 pair for the dictionary entry.
568 568 """
569 569 __slots__ = (u'next', u'prev', u'key', u'value')
570 570
571 571 def __init__(self):
572 572 self.next = None
573 573 self.prev = None
574 574
575 575 self.key = _notset
576 576 self.value = None
577 577
578 578 def markempty(self):
579 579 """Mark the node as emptied."""
580 580 self.key = _notset
581 581
582 582 class lrucachedict(object):
583 583 """Dict that caches most recent accesses and sets.
584 584
585 585 The dict consists of an actual backing dict - indexed by original
586 586 key - and a doubly linked circular list defining the order of entries in
587 587 the cache.
588 588
589 589 The head node is the newest entry in the cache. If the cache is full,
590 590 we recycle head.prev and make it the new head. Cache accesses result in
591 591 the node being moved to before the existing head and being marked as the
592 592 new head node.
593 593 """
594 594 def __init__(self, max):
595 595 self._cache = {}
596 596
597 597 self._head = head = _lrucachenode()
598 598 head.prev = head
599 599 head.next = head
600 600 self._size = 1
601 601 self._capacity = max
602 602
603 603 def __len__(self):
604 604 return len(self._cache)
605 605
606 606 def __contains__(self, k):
607 607 return k in self._cache
608 608
609 609 def __iter__(self):
610 610 # We don't have to iterate in cache order, but why not.
611 611 n = self._head
612 612 for i in range(len(self._cache)):
613 613 yield n.key
614 614 n = n.next
615 615
616 616 def __getitem__(self, k):
617 617 node = self._cache[k]
618 618 self._movetohead(node)
619 619 return node.value
620 620
621 621 def __setitem__(self, k, v):
622 622 node = self._cache.get(k)
623 623 # Replace existing value and mark as newest.
624 624 if node is not None:
625 625 node.value = v
626 626 self._movetohead(node)
627 627 return
628 628
629 629 if self._size < self._capacity:
630 630 node = self._addcapacity()
631 631 else:
632 632 # Grab the last/oldest item.
633 633 node = self._head.prev
634 634
635 635 # At capacity. Kill the old entry.
636 636 if node.key is not _notset:
637 637 del self._cache[node.key]
638 638
639 639 node.key = k
640 640 node.value = v
641 641 self._cache[k] = node
642 642 # And mark it as newest entry. No need to adjust order since it
643 643 # is already self._head.prev.
644 644 self._head = node
645 645
646 646 def __delitem__(self, k):
647 647 node = self._cache.pop(k)
648 648 node.markempty()
649 649
650 650 # Temporarily mark as newest item before re-adjusting head to make
651 651 # this node the oldest item.
652 652 self._movetohead(node)
653 653 self._head = node.next
654 654
655 655 # Additional dict methods.
656 656
657 657 def get(self, k, default=None):
658 658 try:
659 659 return self._cache[k].value
660 660 except KeyError:
661 661 return default
662 662
663 663 def clear(self):
664 664 n = self._head
665 665 while n.key is not _notset:
666 666 n.markempty()
667 667 n = n.next
668 668
669 669 self._cache.clear()
670 670
671 671 def copy(self):
672 672 result = lrucachedict(self._capacity)
673 673 n = self._head.prev
674 674 # Iterate in oldest-to-newest order, so the copy has the right ordering
675 675 for i in range(len(self._cache)):
676 676 result[n.key] = n.value
677 677 n = n.prev
678 678 return result
679 679
680 680 def _movetohead(self, node):
681 681 """Mark a node as the newest, making it the new head.
682 682
683 683 When a node is accessed, it becomes the freshest entry in the LRU
684 684 list, which is denoted by self._head.
685 685
686 686 Visually, let's make ``N`` the new head node (* denotes head):
687 687
688 688 previous/oldest <-> head <-> next/next newest
689 689
690 690 ----<->--- A* ---<->-----
691 691 | |
692 692 E <-> D <-> N <-> C <-> B
693 693
694 694 To:
695 695
696 696 ----<->--- N* ---<->-----
697 697 | |
698 698 E <-> D <-> C <-> B <-> A
699 699
700 700 This requires the following moves:
701 701
702 702 C.next = D (node.prev.next = node.next)
703 703 D.prev = C (node.next.prev = node.prev)
704 704 E.next = N (head.prev.next = node)
705 705 N.prev = E (node.prev = head.prev)
706 706 N.next = A (node.next = head)
707 707 A.prev = N (head.prev = node)
708 708 """
709 709 head = self._head
710 710 # C.next = D
711 711 node.prev.next = node.next
712 712 # D.prev = C
713 713 node.next.prev = node.prev
714 714 # N.prev = E
715 715 node.prev = head.prev
716 716 # N.next = A
717 717 # It is tempting to do just "head" here, however if node is
718 718 # adjacent to head, this will do bad things.
719 719 node.next = head.prev.next
720 720 # E.next = N
721 721 node.next.prev = node
722 722 # A.prev = N
723 723 node.prev.next = node
724 724
725 725 self._head = node
726 726
727 727 def _addcapacity(self):
728 728 """Add a node to the circular linked list.
729 729
730 730 The new node is inserted before the head node.
731 731 """
732 732 head = self._head
733 733 node = _lrucachenode()
734 734 head.prev.next = node
735 735 node.prev = head.prev
736 736 node.next = head
737 737 head.prev = node
738 738 self._size += 1
739 739 return node
740 740
741 741 def lrucachefunc(func):
742 742 '''cache most recent results of function calls'''
743 743 cache = {}
744 744 order = collections.deque()
745 745 if func.__code__.co_argcount == 1:
746 746 def f(arg):
747 747 if arg not in cache:
748 748 if len(cache) > 20:
749 749 del cache[order.popleft()]
750 750 cache[arg] = func(arg)
751 751 else:
752 752 order.remove(arg)
753 753 order.append(arg)
754 754 return cache[arg]
755 755 else:
756 756 def f(*args):
757 757 if args not in cache:
758 758 if len(cache) > 20:
759 759 del cache[order.popleft()]
760 760 cache[args] = func(*args)
761 761 else:
762 762 order.remove(args)
763 763 order.append(args)
764 764 return cache[args]
765 765
766 766 return f
767 767
768 768 class propertycache(object):
769 769 def __init__(self, func):
770 770 self.func = func
771 771 self.name = func.__name__
772 772 def __get__(self, obj, type=None):
773 773 result = self.func(obj)
774 774 self.cachevalue(obj, result)
775 775 return result
776 776
777 777 def cachevalue(self, obj, value):
778 778 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
779 779 obj.__dict__[self.name] = value
780 780
781 781 def pipefilter(s, cmd):
782 782 '''filter string S through command CMD, returning its output'''
783 783 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
784 784 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
785 785 pout, perr = p.communicate(s)
786 786 return pout
787 787
788 788 def tempfilter(s, cmd):
789 789 '''filter string S through a pair of temporary files with CMD.
790 790 CMD is used as a template to create the real command to be run,
791 791 with the strings INFILE and OUTFILE replaced by the real names of
792 792 the temporary files generated.'''
793 793 inname, outname = None, None
794 794 try:
795 795 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
796 796 fp = os.fdopen(infd, 'wb')
797 797 fp.write(s)
798 798 fp.close()
799 799 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
800 800 os.close(outfd)
801 801 cmd = cmd.replace('INFILE', inname)
802 802 cmd = cmd.replace('OUTFILE', outname)
803 803 code = os.system(cmd)
804 804 if pycompat.sysplatform == 'OpenVMS' and code & 1:
805 805 code = 0
806 806 if code:
807 807 raise Abort(_("command '%s' failed: %s") %
808 808 (cmd, explainexit(code)))
809 809 return readfile(outname)
810 810 finally:
811 811 try:
812 812 if inname:
813 813 os.unlink(inname)
814 814 except OSError:
815 815 pass
816 816 try:
817 817 if outname:
818 818 os.unlink(outname)
819 819 except OSError:
820 820 pass
821 821
822 822 filtertable = {
823 823 'tempfile:': tempfilter,
824 824 'pipe:': pipefilter,
825 825 }
826 826
827 827 def filter(s, cmd):
828 828 "filter a string through a command that transforms its input to its output"
829 829 for name, fn in filtertable.iteritems():
830 830 if cmd.startswith(name):
831 831 return fn(s, cmd[len(name):].lstrip())
832 832 return pipefilter(s, cmd)
833 833
834 834 def binary(s):
835 835 """return true if a string is binary data"""
836 836 return bool(s and '\0' in s)
837 837
838 838 def increasingchunks(source, min=1024, max=65536):
839 839 '''return no less than min bytes per chunk while data remains,
840 840 doubling min after each chunk until it reaches max'''
841 841 def log2(x):
842 842 if not x:
843 843 return 0
844 844 i = 0
845 845 while x:
846 846 x >>= 1
847 847 i += 1
848 848 return i - 1
849 849
850 850 buf = []
851 851 blen = 0
852 852 for chunk in source:
853 853 buf.append(chunk)
854 854 blen += len(chunk)
855 855 if blen >= min:
856 856 if min < max:
857 857 min = min << 1
858 858 nmin = 1 << log2(blen)
859 859 if nmin > min:
860 860 min = nmin
861 861 if min > max:
862 862 min = max
863 863 yield ''.join(buf)
864 864 blen = 0
865 865 buf = []
866 866 if buf:
867 867 yield ''.join(buf)
868 868
869 869 Abort = error.Abort
870 870
871 871 def always(fn):
872 872 return True
873 873
874 874 def never(fn):
875 875 return False
876 876
877 877 def nogc(func):
878 878 """disable garbage collector
879 879
880 880 Python's garbage collector triggers a GC each time a certain number of
881 881 container objects (the number being defined by gc.get_threshold()) are
882 882 allocated even when marked not to be tracked by the collector. Tracking has
883 883 no effect on when GCs are triggered, only on what objects the GC looks
884 884 into. As a workaround, disable GC while building complex (huge)
885 885 containers.
886 886
887 887 This garbage collector issue have been fixed in 2.7.
888 888 """
889 889 if sys.version_info >= (2, 7):
890 890 return func
891 891 def wrapper(*args, **kwargs):
892 892 gcenabled = gc.isenabled()
893 893 gc.disable()
894 894 try:
895 895 return func(*args, **kwargs)
896 896 finally:
897 897 if gcenabled:
898 898 gc.enable()
899 899 return wrapper
900 900
901 901 def pathto(root, n1, n2):
902 902 '''return the relative path from one place to another.
903 903 root should use os.sep to separate directories
904 904 n1 should use os.sep to separate directories
905 905 n2 should use "/" to separate directories
906 906 returns an os.sep-separated path.
907 907
908 908 If n1 is a relative path, it's assumed it's
909 909 relative to root.
910 910 n2 should always be relative to root.
911 911 '''
912 912 if not n1:
913 913 return localpath(n2)
914 914 if os.path.isabs(n1):
915 915 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
916 916 return os.path.join(root, localpath(n2))
917 917 n2 = '/'.join((pconvert(root), n2))
918 918 a, b = splitpath(n1), n2.split('/')
919 919 a.reverse()
920 920 b.reverse()
921 921 while a and b and a[-1] == b[-1]:
922 922 a.pop()
923 923 b.pop()
924 924 b.reverse()
925 925 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
926 926
927 927 def mainfrozen():
928 928 """return True if we are a frozen executable.
929 929
930 930 The code supports py2exe (most common, Windows only) and tools/freeze
931 931 (portable, not much used).
932 932 """
933 933 return (safehasattr(sys, "frozen") or # new py2exe
934 934 safehasattr(sys, "importers") or # old py2exe
935 935 imp.is_frozen(u"__main__")) # tools/freeze
936 936
937 937 # the location of data files matching the source code
938 938 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
939 939 # executable version (py2exe) doesn't support __file__
940 940 datapath = os.path.dirname(pycompat.sysexecutable)
941 941 else:
942 942 datapath = os.path.dirname(__file__)
943 943
944 944 if not isinstance(datapath, bytes):
945 945 datapath = pycompat.fsencode(datapath)
946 946
947 947 i18n.setdatapath(datapath)
948 948
949 949 _hgexecutable = None
950 950
951 951 def hgexecutable():
952 952 """return location of the 'hg' executable.
953 953
954 954 Defaults to $HG or 'hg' in the search path.
955 955 """
956 956 if _hgexecutable is None:
957 957 hg = encoding.environ.get('HG')
958 958 mainmod = sys.modules['__main__']
959 959 if hg:
960 960 _sethgexecutable(hg)
961 961 elif mainfrozen():
962 962 if getattr(sys, 'frozen', None) == 'macosx_app':
963 963 # Env variable set by py2app
964 964 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
965 965 else:
966 966 _sethgexecutable(pycompat.sysexecutable)
967 967 elif os.path.basename(getattr(mainmod, '__file__', '')) == 'hg':
968 968 _sethgexecutable(mainmod.__file__)
969 969 else:
970 970 exe = findexe('hg') or os.path.basename(sys.argv[0])
971 971 _sethgexecutable(exe)
972 972 return _hgexecutable
973 973
974 974 def _sethgexecutable(path):
975 975 """set location of the 'hg' executable"""
976 976 global _hgexecutable
977 977 _hgexecutable = path
978 978
979 979 def _isstdout(f):
980 980 fileno = getattr(f, 'fileno', None)
981 981 return fileno and fileno() == sys.__stdout__.fileno()
982 982
983 983 def shellenviron(environ=None):
984 984 """return environ with optional override, useful for shelling out"""
985 985 def py2shell(val):
986 986 'convert python object into string that is useful to shell'
987 987 if val is None or val is False:
988 988 return '0'
989 989 if val is True:
990 990 return '1'
991 991 return str(val)
992 992 env = dict(encoding.environ)
993 993 if environ:
994 994 env.update((k, py2shell(v)) for k, v in environ.iteritems())
995 995 env['HG'] = hgexecutable()
996 996 return env
997 997
998 998 def system(cmd, environ=None, cwd=None, onerr=None, errprefix=None, out=None):
999 999 '''enhanced shell command execution.
1000 1000 run with environment maybe modified, maybe in different dir.
1001 1001
1002 1002 if command fails and onerr is None, return status, else raise onerr
1003 1003 object as exception.
1004 1004
1005 1005 if out is specified, it is assumed to be a file-like object that has a
1006 1006 write() method. stdout and stderr will be redirected to out.'''
1007 1007 try:
1008 1008 stdout.flush()
1009 1009 except Exception:
1010 1010 pass
1011 1011 origcmd = cmd
1012 1012 cmd = quotecommand(cmd)
1013 1013 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1014 1014 and sys.version_info[1] < 7):
1015 1015 # subprocess kludge to work around issues in half-baked Python
1016 1016 # ports, notably bichued/python:
1017 1017 if not cwd is None:
1018 1018 os.chdir(cwd)
1019 1019 rc = os.system(cmd)
1020 1020 else:
1021 1021 env = shellenviron(environ)
1022 1022 if out is None or _isstdout(out):
1023 1023 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1024 1024 env=env, cwd=cwd)
1025 1025 else:
1026 1026 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1027 1027 env=env, cwd=cwd, stdout=subprocess.PIPE,
1028 1028 stderr=subprocess.STDOUT)
1029 1029 for line in iter(proc.stdout.readline, ''):
1030 1030 out.write(line)
1031 1031 proc.wait()
1032 1032 rc = proc.returncode
1033 1033 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1034 1034 rc = 0
1035 1035 if rc and onerr:
1036 1036 errmsg = '%s %s' % (os.path.basename(origcmd.split(None, 1)[0]),
1037 1037 explainexit(rc)[0])
1038 1038 if errprefix:
1039 1039 errmsg = '%s: %s' % (errprefix, errmsg)
1040 1040 raise onerr(errmsg)
1041 1041 return rc
1042 1042
1043 1043 def checksignature(func):
1044 1044 '''wrap a function with code to check for calling errors'''
1045 1045 def check(*args, **kwargs):
1046 1046 try:
1047 1047 return func(*args, **kwargs)
1048 1048 except TypeError:
1049 1049 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1050 1050 raise error.SignatureError
1051 1051 raise
1052 1052
1053 1053 return check
1054 1054
1055 1055 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1056 1056 '''copy a file, preserving mode and optionally other stat info like
1057 1057 atime/mtime
1058 1058
1059 1059 checkambig argument is used with filestat, and is useful only if
1060 1060 destination file is guarded by any lock (e.g. repo.lock or
1061 1061 repo.wlock).
1062 1062
1063 1063 copystat and checkambig should be exclusive.
1064 1064 '''
1065 1065 assert not (copystat and checkambig)
1066 1066 oldstat = None
1067 1067 if os.path.lexists(dest):
1068 1068 if checkambig:
1069 1069 oldstat = checkambig and filestat(dest)
1070 1070 unlink(dest)
1071 1071 # hardlinks are problematic on CIFS, quietly ignore this flag
1072 1072 # until we find a way to work around it cleanly (issue4546)
1073 1073 if False and hardlink:
1074 1074 try:
1075 1075 oslink(src, dest)
1076 1076 return
1077 1077 except (IOError, OSError):
1078 1078 pass # fall back to normal copy
1079 1079 if os.path.islink(src):
1080 1080 os.symlink(os.readlink(src), dest)
1081 1081 # copytime is ignored for symlinks, but in general copytime isn't needed
1082 1082 # for them anyway
1083 1083 else:
1084 1084 try:
1085 1085 shutil.copyfile(src, dest)
1086 1086 if copystat:
1087 1087 # copystat also copies mode
1088 1088 shutil.copystat(src, dest)
1089 1089 else:
1090 1090 shutil.copymode(src, dest)
1091 1091 if oldstat and oldstat.stat:
1092 1092 newstat = filestat(dest)
1093 1093 if newstat.isambig(oldstat):
1094 1094 # stat of copied file is ambiguous to original one
1095 1095 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1096 1096 os.utime(dest, (advanced, advanced))
1097 1097 except shutil.Error as inst:
1098 1098 raise Abort(str(inst))
1099 1099
1100 1100 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1101 1101 """Copy a directory tree using hardlinks if possible."""
1102 1102 num = 0
1103 1103
1104 1104 if hardlink is None:
1105 1105 hardlink = (os.stat(src).st_dev ==
1106 1106 os.stat(os.path.dirname(dst)).st_dev)
1107 1107 if hardlink:
1108 1108 topic = _('linking')
1109 1109 else:
1110 1110 topic = _('copying')
1111 1111
1112 1112 if os.path.isdir(src):
1113 1113 os.mkdir(dst)
1114 1114 for name, kind in osutil.listdir(src):
1115 1115 srcname = os.path.join(src, name)
1116 1116 dstname = os.path.join(dst, name)
1117 1117 def nprog(t, pos):
1118 1118 if pos is not None:
1119 1119 return progress(t, pos + num)
1120 1120 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1121 1121 num += n
1122 1122 else:
1123 1123 if hardlink:
1124 1124 try:
1125 1125 oslink(src, dst)
1126 1126 except (IOError, OSError):
1127 1127 hardlink = False
1128 1128 shutil.copy(src, dst)
1129 1129 else:
1130 1130 shutil.copy(src, dst)
1131 1131 num += 1
1132 1132 progress(topic, num)
1133 1133 progress(topic, None)
1134 1134
1135 1135 return hardlink, num
1136 1136
1137 1137 _winreservednames = '''con prn aux nul
1138 1138 com1 com2 com3 com4 com5 com6 com7 com8 com9
1139 1139 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1140 1140 _winreservedchars = ':*?"<>|'
1141 1141 def checkwinfilename(path):
1142 1142 r'''Check that the base-relative path is a valid filename on Windows.
1143 1143 Returns None if the path is ok, or a UI string describing the problem.
1144 1144
1145 1145 >>> checkwinfilename("just/a/normal/path")
1146 1146 >>> checkwinfilename("foo/bar/con.xml")
1147 1147 "filename contains 'con', which is reserved on Windows"
1148 1148 >>> checkwinfilename("foo/con.xml/bar")
1149 1149 "filename contains 'con', which is reserved on Windows"
1150 1150 >>> checkwinfilename("foo/bar/xml.con")
1151 1151 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1152 1152 "filename contains 'AUX', which is reserved on Windows"
1153 1153 >>> checkwinfilename("foo/bar/bla:.txt")
1154 1154 "filename contains ':', which is reserved on Windows"
1155 1155 >>> checkwinfilename("foo/bar/b\07la.txt")
1156 1156 "filename contains '\\x07', which is invalid on Windows"
1157 1157 >>> checkwinfilename("foo/bar/bla ")
1158 1158 "filename ends with ' ', which is not allowed on Windows"
1159 1159 >>> checkwinfilename("../bar")
1160 1160 >>> checkwinfilename("foo\\")
1161 1161 "filename ends with '\\', which is invalid on Windows"
1162 1162 >>> checkwinfilename("foo\\/bar")
1163 1163 "directory name ends with '\\', which is invalid on Windows"
1164 1164 '''
1165 1165 if path.endswith('\\'):
1166 1166 return _("filename ends with '\\', which is invalid on Windows")
1167 1167 if '\\/' in path:
1168 1168 return _("directory name ends with '\\', which is invalid on Windows")
1169 1169 for n in path.replace('\\', '/').split('/'):
1170 1170 if not n:
1171 1171 continue
1172 1172 for c in n:
1173 1173 if c in _winreservedchars:
1174 1174 return _("filename contains '%s', which is reserved "
1175 1175 "on Windows") % c
1176 1176 if ord(c) <= 31:
1177 1177 return _("filename contains %r, which is invalid "
1178 1178 "on Windows") % c
1179 1179 base = n.split('.')[0]
1180 1180 if base and base.lower() in _winreservednames:
1181 1181 return _("filename contains '%s', which is reserved "
1182 1182 "on Windows") % base
1183 1183 t = n[-1]
1184 1184 if t in '. ' and n not in '..':
1185 1185 return _("filename ends with '%s', which is not allowed "
1186 1186 "on Windows") % t
1187 1187
1188 1188 if pycompat.osname == 'nt':
1189 1189 checkosfilename = checkwinfilename
1190 1190 else:
1191 1191 checkosfilename = platform.checkosfilename
1192 1192
1193 1193 def makelock(info, pathname):
1194 1194 try:
1195 1195 return os.symlink(info, pathname)
1196 1196 except OSError as why:
1197 1197 if why.errno == errno.EEXIST:
1198 1198 raise
1199 1199 except AttributeError: # no symlink in os
1200 1200 pass
1201 1201
1202 1202 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1203 1203 os.write(ld, info)
1204 1204 os.close(ld)
1205 1205
1206 1206 def readlock(pathname):
1207 1207 try:
1208 1208 return os.readlink(pathname)
1209 1209 except OSError as why:
1210 1210 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1211 1211 raise
1212 1212 except AttributeError: # no symlink in os
1213 1213 pass
1214 1214 fp = posixfile(pathname)
1215 1215 r = fp.read()
1216 1216 fp.close()
1217 1217 return r
1218 1218
1219 1219 def fstat(fp):
1220 1220 '''stat file object that may not have fileno method.'''
1221 1221 try:
1222 1222 return os.fstat(fp.fileno())
1223 1223 except AttributeError:
1224 1224 return os.stat(fp.name)
1225 1225
1226 1226 # File system features
1227 1227
1228 1228 def fscasesensitive(path):
1229 1229 """
1230 1230 Return true if the given path is on a case-sensitive filesystem
1231 1231
1232 1232 Requires a path (like /foo/.hg) ending with a foldable final
1233 1233 directory component.
1234 1234 """
1235 1235 s1 = os.lstat(path)
1236 1236 d, b = os.path.split(path)
1237 1237 b2 = b.upper()
1238 1238 if b == b2:
1239 1239 b2 = b.lower()
1240 1240 if b == b2:
1241 1241 return True # no evidence against case sensitivity
1242 1242 p2 = os.path.join(d, b2)
1243 1243 try:
1244 1244 s2 = os.lstat(p2)
1245 1245 if s2 == s1:
1246 1246 return False
1247 1247 return True
1248 1248 except OSError:
1249 1249 return True
1250 1250
1251 1251 try:
1252 1252 import re2
1253 1253 _re2 = None
1254 1254 except ImportError:
1255 1255 _re2 = False
1256 1256
1257 1257 class _re(object):
1258 1258 def _checkre2(self):
1259 1259 global _re2
1260 1260 try:
1261 1261 # check if match works, see issue3964
1262 1262 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1263 1263 except ImportError:
1264 1264 _re2 = False
1265 1265
1266 1266 def compile(self, pat, flags=0):
1267 1267 '''Compile a regular expression, using re2 if possible
1268 1268
1269 1269 For best performance, use only re2-compatible regexp features. The
1270 1270 only flags from the re module that are re2-compatible are
1271 1271 IGNORECASE and MULTILINE.'''
1272 1272 if _re2 is None:
1273 1273 self._checkre2()
1274 1274 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1275 1275 if flags & remod.IGNORECASE:
1276 1276 pat = '(?i)' + pat
1277 1277 if flags & remod.MULTILINE:
1278 1278 pat = '(?m)' + pat
1279 1279 try:
1280 1280 return re2.compile(pat)
1281 1281 except re2.error:
1282 1282 pass
1283 1283 return remod.compile(pat, flags)
1284 1284
1285 1285 @propertycache
1286 1286 def escape(self):
1287 1287 '''Return the version of escape corresponding to self.compile.
1288 1288
1289 1289 This is imperfect because whether re2 or re is used for a particular
1290 1290 function depends on the flags, etc, but it's the best we can do.
1291 1291 '''
1292 1292 global _re2
1293 1293 if _re2 is None:
1294 1294 self._checkre2()
1295 1295 if _re2:
1296 1296 return re2.escape
1297 1297 else:
1298 1298 return remod.escape
1299 1299
1300 1300 re = _re()
1301 1301
1302 1302 _fspathcache = {}
1303 1303 def fspath(name, root):
1304 1304 '''Get name in the case stored in the filesystem
1305 1305
1306 1306 The name should be relative to root, and be normcase-ed for efficiency.
1307 1307
1308 1308 Note that this function is unnecessary, and should not be
1309 1309 called, for case-sensitive filesystems (simply because it's expensive).
1310 1310
1311 1311 The root should be normcase-ed, too.
1312 1312 '''
1313 1313 def _makefspathcacheentry(dir):
1314 1314 return dict((normcase(n), n) for n in os.listdir(dir))
1315 1315
1316 1316 seps = pycompat.ossep
1317 1317 if pycompat.osaltsep:
1318 1318 seps = seps + pycompat.osaltsep
1319 1319 # Protect backslashes. This gets silly very quickly.
1320 1320 seps.replace('\\','\\\\')
1321 1321 pattern = remod.compile(r'([^%s]+)|([%s]+)' % (seps, seps))
1322 1322 dir = os.path.normpath(root)
1323 1323 result = []
1324 1324 for part, sep in pattern.findall(name):
1325 1325 if sep:
1326 1326 result.append(sep)
1327 1327 continue
1328 1328
1329 1329 if dir not in _fspathcache:
1330 1330 _fspathcache[dir] = _makefspathcacheentry(dir)
1331 1331 contents = _fspathcache[dir]
1332 1332
1333 1333 found = contents.get(part)
1334 1334 if not found:
1335 1335 # retry "once per directory" per "dirstate.walk" which
1336 1336 # may take place for each patches of "hg qpush", for example
1337 1337 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1338 1338 found = contents.get(part)
1339 1339
1340 1340 result.append(found or part)
1341 1341 dir = os.path.join(dir, part)
1342 1342
1343 1343 return ''.join(result)
1344 1344
1345 1345 def checknlink(testfile):
1346 1346 '''check whether hardlink count reporting works properly'''
1347 1347
1348 1348 # testfile may be open, so we need a separate file for checking to
1349 1349 # work around issue2543 (or testfile may get lost on Samba shares)
1350 1350 f1 = testfile + ".hgtmp1"
1351 1351 if os.path.lexists(f1):
1352 1352 return False
1353 1353 try:
1354 1354 posixfile(f1, 'w').close()
1355 1355 except IOError:
1356 1356 try:
1357 1357 os.unlink(f1)
1358 1358 except OSError:
1359 1359 pass
1360 1360 return False
1361 1361
1362 1362 f2 = testfile + ".hgtmp2"
1363 1363 fd = None
1364 1364 try:
1365 1365 oslink(f1, f2)
1366 1366 # nlinks() may behave differently for files on Windows shares if
1367 1367 # the file is open.
1368 1368 fd = posixfile(f2)
1369 1369 return nlinks(f2) > 1
1370 1370 except OSError:
1371 1371 return False
1372 1372 finally:
1373 1373 if fd is not None:
1374 1374 fd.close()
1375 1375 for f in (f1, f2):
1376 1376 try:
1377 1377 os.unlink(f)
1378 1378 except OSError:
1379 1379 pass
1380 1380
1381 1381 def endswithsep(path):
1382 1382 '''Check path ends with os.sep or os.altsep.'''
1383 1383 return (path.endswith(pycompat.ossep)
1384 1384 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1385 1385
1386 1386 def splitpath(path):
1387 1387 '''Split path by os.sep.
1388 1388 Note that this function does not use os.altsep because this is
1389 1389 an alternative of simple "xxx.split(os.sep)".
1390 1390 It is recommended to use os.path.normpath() before using this
1391 1391 function if need.'''
1392 1392 return path.split(pycompat.ossep)
1393 1393
1394 1394 def gui():
1395 1395 '''Are we running in a GUI?'''
1396 1396 if pycompat.sysplatform == 'darwin':
1397 1397 if 'SSH_CONNECTION' in encoding.environ:
1398 1398 # handle SSH access to a box where the user is logged in
1399 1399 return False
1400 1400 elif getattr(osutil, 'isgui', None):
1401 1401 # check if a CoreGraphics session is available
1402 1402 return osutil.isgui()
1403 1403 else:
1404 1404 # pure build; use a safe default
1405 1405 return True
1406 1406 else:
1407 1407 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1408 1408
1409 1409 def mktempcopy(name, emptyok=False, createmode=None):
1410 1410 """Create a temporary file with the same contents from name
1411 1411
1412 1412 The permission bits are copied from the original file.
1413 1413
1414 1414 If the temporary file is going to be truncated immediately, you
1415 1415 can use emptyok=True as an optimization.
1416 1416
1417 1417 Returns the name of the temporary file.
1418 1418 """
1419 1419 d, fn = os.path.split(name)
1420 1420 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1421 1421 os.close(fd)
1422 1422 # Temporary files are created with mode 0600, which is usually not
1423 1423 # what we want. If the original file already exists, just copy
1424 1424 # its mode. Otherwise, manually obey umask.
1425 1425 copymode(name, temp, createmode)
1426 1426 if emptyok:
1427 1427 return temp
1428 1428 try:
1429 1429 try:
1430 1430 ifp = posixfile(name, "rb")
1431 1431 except IOError as inst:
1432 1432 if inst.errno == errno.ENOENT:
1433 1433 return temp
1434 1434 if not getattr(inst, 'filename', None):
1435 1435 inst.filename = name
1436 1436 raise
1437 1437 ofp = posixfile(temp, "wb")
1438 1438 for chunk in filechunkiter(ifp):
1439 1439 ofp.write(chunk)
1440 1440 ifp.close()
1441 1441 ofp.close()
1442 1442 except: # re-raises
1443 1443 try: os.unlink(temp)
1444 1444 except OSError: pass
1445 1445 raise
1446 1446 return temp
1447 1447
1448 1448 class filestat(object):
1449 1449 """help to exactly detect change of a file
1450 1450
1451 1451 'stat' attribute is result of 'os.stat()' if specified 'path'
1452 1452 exists. Otherwise, it is None. This can avoid preparative
1453 1453 'exists()' examination on client side of this class.
1454 1454 """
1455 1455 def __init__(self, path):
1456 1456 try:
1457 1457 self.stat = os.stat(path)
1458 1458 except OSError as err:
1459 1459 if err.errno != errno.ENOENT:
1460 1460 raise
1461 1461 self.stat = None
1462 1462
1463 1463 __hash__ = object.__hash__
1464 1464
1465 1465 def __eq__(self, old):
1466 1466 try:
1467 1467 # if ambiguity between stat of new and old file is
1468 1468 # avoided, comparison of size, ctime and mtime is enough
1469 1469 # to exactly detect change of a file regardless of platform
1470 1470 return (self.stat.st_size == old.stat.st_size and
1471 1471 self.stat.st_ctime == old.stat.st_ctime and
1472 1472 self.stat.st_mtime == old.stat.st_mtime)
1473 1473 except AttributeError:
1474 1474 return False
1475 1475
1476 1476 def isambig(self, old):
1477 1477 """Examine whether new (= self) stat is ambiguous against old one
1478 1478
1479 1479 "S[N]" below means stat of a file at N-th change:
1480 1480
1481 1481 - S[n-1].ctime < S[n].ctime: can detect change of a file
1482 1482 - S[n-1].ctime == S[n].ctime
1483 1483 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1484 1484 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1485 1485 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1486 1486 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1487 1487
1488 1488 Case (*2) above means that a file was changed twice or more at
1489 1489 same time in sec (= S[n-1].ctime), and comparison of timestamp
1490 1490 is ambiguous.
1491 1491
1492 1492 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1493 1493 timestamp is ambiguous".
1494 1494
1495 1495 But advancing mtime only in case (*2) doesn't work as
1496 1496 expected, because naturally advanced S[n].mtime in case (*1)
1497 1497 might be equal to manually advanced S[n-1 or earlier].mtime.
1498 1498
1499 1499 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1500 1500 treated as ambiguous regardless of mtime, to avoid overlooking
1501 1501 by confliction between such mtime.
1502 1502
1503 1503 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1504 1504 S[n].mtime", even if size of a file isn't changed.
1505 1505 """
1506 1506 try:
1507 1507 return (self.stat.st_ctime == old.stat.st_ctime)
1508 1508 except AttributeError:
1509 1509 return False
1510 1510
1511 1511 def avoidambig(self, path, old):
1512 1512 """Change file stat of specified path to avoid ambiguity
1513 1513
1514 1514 'old' should be previous filestat of 'path'.
1515 1515
1516 1516 This skips avoiding ambiguity, if a process doesn't have
1517 1517 appropriate privileges for 'path'.
1518 1518 """
1519 1519 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1520 1520 try:
1521 1521 os.utime(path, (advanced, advanced))
1522 1522 except OSError as inst:
1523 1523 if inst.errno == errno.EPERM:
1524 1524 # utime() on the file created by another user causes EPERM,
1525 1525 # if a process doesn't have appropriate privileges
1526 1526 return
1527 1527 raise
1528 1528
1529 1529 def __ne__(self, other):
1530 1530 return not self == other
1531 1531
1532 1532 class atomictempfile(object):
1533 1533 '''writable file object that atomically updates a file
1534 1534
1535 1535 All writes will go to a temporary copy of the original file. Call
1536 1536 close() when you are done writing, and atomictempfile will rename
1537 1537 the temporary copy to the original name, making the changes
1538 1538 visible. If the object is destroyed without being closed, all your
1539 1539 writes are discarded.
1540 1540
1541 1541 checkambig argument of constructor is used with filestat, and is
1542 1542 useful only if target file is guarded by any lock (e.g. repo.lock
1543 1543 or repo.wlock).
1544 1544 '''
1545 1545 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1546 1546 self.__name = name # permanent name
1547 1547 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1548 1548 createmode=createmode)
1549 1549 self._fp = posixfile(self._tempname, mode)
1550 1550 self._checkambig = checkambig
1551 1551
1552 1552 # delegated methods
1553 1553 self.read = self._fp.read
1554 1554 self.write = self._fp.write
1555 1555 self.seek = self._fp.seek
1556 1556 self.tell = self._fp.tell
1557 1557 self.fileno = self._fp.fileno
1558 1558
1559 1559 def close(self):
1560 1560 if not self._fp.closed:
1561 1561 self._fp.close()
1562 1562 filename = localpath(self.__name)
1563 1563 oldstat = self._checkambig and filestat(filename)
1564 1564 if oldstat and oldstat.stat:
1565 1565 rename(self._tempname, filename)
1566 1566 newstat = filestat(filename)
1567 1567 if newstat.isambig(oldstat):
1568 1568 # stat of changed file is ambiguous to original one
1569 1569 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1570 1570 os.utime(filename, (advanced, advanced))
1571 1571 else:
1572 1572 rename(self._tempname, filename)
1573 1573
1574 1574 def discard(self):
1575 1575 if not self._fp.closed:
1576 1576 try:
1577 1577 os.unlink(self._tempname)
1578 1578 except OSError:
1579 1579 pass
1580 1580 self._fp.close()
1581 1581
1582 1582 def __del__(self):
1583 1583 if safehasattr(self, '_fp'): # constructor actually did something
1584 1584 self.discard()
1585 1585
1586 1586 def __enter__(self):
1587 1587 return self
1588 1588
1589 1589 def __exit__(self, exctype, excvalue, traceback):
1590 1590 if exctype is not None:
1591 1591 self.discard()
1592 1592 else:
1593 1593 self.close()
1594 1594
1595 1595 def makedirs(name, mode=None, notindexed=False):
1596 1596 """recursive directory creation with parent mode inheritance
1597 1597
1598 1598 Newly created directories are marked as "not to be indexed by
1599 1599 the content indexing service", if ``notindexed`` is specified
1600 1600 for "write" mode access.
1601 1601 """
1602 1602 try:
1603 1603 makedir(name, notindexed)
1604 1604 except OSError as err:
1605 1605 if err.errno == errno.EEXIST:
1606 1606 return
1607 1607 if err.errno != errno.ENOENT or not name:
1608 1608 raise
1609 1609 parent = os.path.dirname(os.path.abspath(name))
1610 1610 if parent == name:
1611 1611 raise
1612 1612 makedirs(parent, mode, notindexed)
1613 1613 try:
1614 1614 makedir(name, notindexed)
1615 1615 except OSError as err:
1616 1616 # Catch EEXIST to handle races
1617 1617 if err.errno == errno.EEXIST:
1618 1618 return
1619 1619 raise
1620 1620 if mode is not None:
1621 1621 os.chmod(name, mode)
1622 1622
1623 1623 def readfile(path):
1624 1624 with open(path, 'rb') as fp:
1625 1625 return fp.read()
1626 1626
1627 1627 def writefile(path, text):
1628 1628 with open(path, 'wb') as fp:
1629 1629 fp.write(text)
1630 1630
1631 1631 def appendfile(path, text):
1632 1632 with open(path, 'ab') as fp:
1633 1633 fp.write(text)
1634 1634
1635 1635 class chunkbuffer(object):
1636 1636 """Allow arbitrary sized chunks of data to be efficiently read from an
1637 1637 iterator over chunks of arbitrary size."""
1638 1638
1639 1639 def __init__(self, in_iter):
1640 1640 """in_iter is the iterator that's iterating over the input chunks.
1641 1641 targetsize is how big a buffer to try to maintain."""
1642 1642 def splitbig(chunks):
1643 1643 for chunk in chunks:
1644 1644 if len(chunk) > 2**20:
1645 1645 pos = 0
1646 1646 while pos < len(chunk):
1647 1647 end = pos + 2 ** 18
1648 1648 yield chunk[pos:end]
1649 1649 pos = end
1650 1650 else:
1651 1651 yield chunk
1652 1652 self.iter = splitbig(in_iter)
1653 1653 self._queue = collections.deque()
1654 1654 self._chunkoffset = 0
1655 1655
1656 1656 def read(self, l=None):
1657 1657 """Read L bytes of data from the iterator of chunks of data.
1658 1658 Returns less than L bytes if the iterator runs dry.
1659 1659
1660 1660 If size parameter is omitted, read everything"""
1661 1661 if l is None:
1662 1662 return ''.join(self.iter)
1663 1663
1664 1664 left = l
1665 1665 buf = []
1666 1666 queue = self._queue
1667 1667 while left > 0:
1668 1668 # refill the queue
1669 1669 if not queue:
1670 1670 target = 2**18
1671 1671 for chunk in self.iter:
1672 1672 queue.append(chunk)
1673 1673 target -= len(chunk)
1674 1674 if target <= 0:
1675 1675 break
1676 1676 if not queue:
1677 1677 break
1678 1678
1679 1679 # The easy way to do this would be to queue.popleft(), modify the
1680 1680 # chunk (if necessary), then queue.appendleft(). However, for cases
1681 1681 # where we read partial chunk content, this incurs 2 dequeue
1682 1682 # mutations and creates a new str for the remaining chunk in the
1683 1683 # queue. Our code below avoids this overhead.
1684 1684
1685 1685 chunk = queue[0]
1686 1686 chunkl = len(chunk)
1687 1687 offset = self._chunkoffset
1688 1688
1689 1689 # Use full chunk.
1690 1690 if offset == 0 and left >= chunkl:
1691 1691 left -= chunkl
1692 1692 queue.popleft()
1693 1693 buf.append(chunk)
1694 1694 # self._chunkoffset remains at 0.
1695 1695 continue
1696 1696
1697 1697 chunkremaining = chunkl - offset
1698 1698
1699 1699 # Use all of unconsumed part of chunk.
1700 1700 if left >= chunkremaining:
1701 1701 left -= chunkremaining
1702 1702 queue.popleft()
1703 1703 # offset == 0 is enabled by block above, so this won't merely
1704 1704 # copy via ``chunk[0:]``.
1705 1705 buf.append(chunk[offset:])
1706 1706 self._chunkoffset = 0
1707 1707
1708 1708 # Partial chunk needed.
1709 1709 else:
1710 1710 buf.append(chunk[offset:offset + left])
1711 1711 self._chunkoffset += left
1712 1712 left -= chunkremaining
1713 1713
1714 1714 return ''.join(buf)
1715 1715
1716 1716 def filechunkiter(f, size=131072, limit=None):
1717 1717 """Create a generator that produces the data in the file size
1718 1718 (default 131072) bytes at a time, up to optional limit (default is
1719 1719 to read all data). Chunks may be less than size bytes if the
1720 1720 chunk is the last chunk in the file, or the file is a socket or
1721 1721 some other type of file that sometimes reads less data than is
1722 1722 requested."""
1723 1723 assert size >= 0
1724 1724 assert limit is None or limit >= 0
1725 1725 while True:
1726 1726 if limit is None:
1727 1727 nbytes = size
1728 1728 else:
1729 1729 nbytes = min(limit, size)
1730 1730 s = nbytes and f.read(nbytes)
1731 1731 if not s:
1732 1732 break
1733 1733 if limit:
1734 1734 limit -= len(s)
1735 1735 yield s
1736 1736
1737 1737 def makedate(timestamp=None):
1738 1738 '''Return a unix timestamp (or the current time) as a (unixtime,
1739 1739 offset) tuple based off the local timezone.'''
1740 1740 if timestamp is None:
1741 1741 timestamp = time.time()
1742 1742 if timestamp < 0:
1743 1743 hint = _("check your clock")
1744 1744 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1745 1745 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1746 1746 datetime.datetime.fromtimestamp(timestamp))
1747 1747 tz = delta.days * 86400 + delta.seconds
1748 1748 return timestamp, tz
1749 1749
1750 1750 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1751 1751 """represent a (unixtime, offset) tuple as a localized time.
1752 1752 unixtime is seconds since the epoch, and offset is the time zone's
1753 1753 number of seconds away from UTC.
1754 1754
1755 1755 >>> datestr((0, 0))
1756 1756 'Thu Jan 01 00:00:00 1970 +0000'
1757 1757 >>> datestr((42, 0))
1758 1758 'Thu Jan 01 00:00:42 1970 +0000'
1759 1759 >>> datestr((-42, 0))
1760 1760 'Wed Dec 31 23:59:18 1969 +0000'
1761 1761 >>> datestr((0x7fffffff, 0))
1762 1762 'Tue Jan 19 03:14:07 2038 +0000'
1763 1763 >>> datestr((-0x80000000, 0))
1764 1764 'Fri Dec 13 20:45:52 1901 +0000'
1765 1765 """
1766 1766 t, tz = date or makedate()
1767 1767 if "%1" in format or "%2" in format or "%z" in format:
1768 1768 sign = (tz > 0) and "-" or "+"
1769 1769 minutes = abs(tz) // 60
1770 1770 q, r = divmod(minutes, 60)
1771 1771 format = format.replace("%z", "%1%2")
1772 1772 format = format.replace("%1", "%c%02d" % (sign, q))
1773 1773 format = format.replace("%2", "%02d" % r)
1774 1774 d = t - tz
1775 1775 if d > 0x7fffffff:
1776 1776 d = 0x7fffffff
1777 1777 elif d < -0x80000000:
1778 1778 d = -0x80000000
1779 1779 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1780 1780 # because they use the gmtime() system call which is buggy on Windows
1781 1781 # for negative values.
1782 1782 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1783 1783 s = t.strftime(format)
1784 1784 return s
1785 1785
1786 1786 def shortdate(date=None):
1787 1787 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1788 1788 return datestr(date, format='%Y-%m-%d')
1789 1789
1790 1790 def parsetimezone(s):
1791 1791 """find a trailing timezone, if any, in string, and return a
1792 1792 (offset, remainder) pair"""
1793 1793
1794 1794 if s.endswith("GMT") or s.endswith("UTC"):
1795 1795 return 0, s[:-3].rstrip()
1796 1796
1797 1797 # Unix-style timezones [+-]hhmm
1798 1798 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1799 1799 sign = (s[-5] == "+") and 1 or -1
1800 1800 hours = int(s[-4:-2])
1801 1801 minutes = int(s[-2:])
1802 1802 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1803 1803
1804 1804 # ISO8601 trailing Z
1805 1805 if s.endswith("Z") and s[-2:-1].isdigit():
1806 1806 return 0, s[:-1]
1807 1807
1808 1808 # ISO8601-style [+-]hh:mm
1809 1809 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1810 1810 s[-5:-3].isdigit() and s[-2:].isdigit()):
1811 1811 sign = (s[-6] == "+") and 1 or -1
1812 1812 hours = int(s[-5:-3])
1813 1813 minutes = int(s[-2:])
1814 1814 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1815 1815
1816 1816 return None, s
1817 1817
1818 1818 def strdate(string, format, defaults=[]):
1819 1819 """parse a localized time string and return a (unixtime, offset) tuple.
1820 1820 if the string cannot be parsed, ValueError is raised."""
1821 1821 # NOTE: unixtime = localunixtime + offset
1822 1822 offset, date = parsetimezone(string)
1823 1823
1824 1824 # add missing elements from defaults
1825 1825 usenow = False # default to using biased defaults
1826 1826 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1827 1827 found = [True for p in part if ("%"+p) in format]
1828 1828 if not found:
1829 1829 date += "@" + defaults[part][usenow]
1830 1830 format += "@%" + part[0]
1831 1831 else:
1832 1832 # We've found a specific time element, less specific time
1833 1833 # elements are relative to today
1834 1834 usenow = True
1835 1835
1836 1836 timetuple = time.strptime(date, format)
1837 1837 localunixtime = int(calendar.timegm(timetuple))
1838 1838 if offset is None:
1839 1839 # local timezone
1840 1840 unixtime = int(time.mktime(timetuple))
1841 1841 offset = unixtime - localunixtime
1842 1842 else:
1843 1843 unixtime = localunixtime + offset
1844 1844 return unixtime, offset
1845 1845
1846 1846 def parsedate(date, formats=None, bias=None):
1847 1847 """parse a localized date/time and return a (unixtime, offset) tuple.
1848 1848
1849 1849 The date may be a "unixtime offset" string or in one of the specified
1850 1850 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1851 1851
1852 1852 >>> parsedate(' today ') == parsedate(\
1853 1853 datetime.date.today().strftime('%b %d'))
1854 1854 True
1855 1855 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1856 1856 datetime.timedelta(days=1)\
1857 1857 ).strftime('%b %d'))
1858 1858 True
1859 1859 >>> now, tz = makedate()
1860 1860 >>> strnow, strtz = parsedate('now')
1861 1861 >>> (strnow - now) < 1
1862 1862 True
1863 1863 >>> tz == strtz
1864 1864 True
1865 1865 """
1866 1866 if bias is None:
1867 1867 bias = {}
1868 1868 if not date:
1869 1869 return 0, 0
1870 1870 if isinstance(date, tuple) and len(date) == 2:
1871 1871 return date
1872 1872 if not formats:
1873 1873 formats = defaultdateformats
1874 1874 date = date.strip()
1875 1875
1876 1876 if date == 'now' or date == _('now'):
1877 1877 return makedate()
1878 1878 if date == 'today' or date == _('today'):
1879 1879 date = datetime.date.today().strftime('%b %d')
1880 1880 elif date == 'yesterday' or date == _('yesterday'):
1881 1881 date = (datetime.date.today() -
1882 1882 datetime.timedelta(days=1)).strftime('%b %d')
1883 1883
1884 1884 try:
1885 1885 when, offset = map(int, date.split(' '))
1886 1886 except ValueError:
1887 1887 # fill out defaults
1888 1888 now = makedate()
1889 1889 defaults = {}
1890 1890 for part in ("d", "mb", "yY", "HI", "M", "S"):
1891 1891 # this piece is for rounding the specific end of unknowns
1892 1892 b = bias.get(part)
1893 1893 if b is None:
1894 1894 if part[0] in "HMS":
1895 1895 b = "00"
1896 1896 else:
1897 1897 b = "0"
1898 1898
1899 1899 # this piece is for matching the generic end to today's date
1900 1900 n = datestr(now, "%" + part[0])
1901 1901
1902 1902 defaults[part] = (b, n)
1903 1903
1904 1904 for format in formats:
1905 1905 try:
1906 1906 when, offset = strdate(date, format, defaults)
1907 1907 except (ValueError, OverflowError):
1908 1908 pass
1909 1909 else:
1910 1910 break
1911 1911 else:
1912 1912 raise Abort(_('invalid date: %r') % date)
1913 1913 # validate explicit (probably user-specified) date and
1914 1914 # time zone offset. values must fit in signed 32 bits for
1915 1915 # current 32-bit linux runtimes. timezones go from UTC-12
1916 1916 # to UTC+14
1917 1917 if when < -0x80000000 or when > 0x7fffffff:
1918 1918 raise Abort(_('date exceeds 32 bits: %d') % when)
1919 1919 if offset < -50400 or offset > 43200:
1920 1920 raise Abort(_('impossible time zone offset: %d') % offset)
1921 1921 return when, offset
1922 1922
1923 1923 def matchdate(date):
1924 1924 """Return a function that matches a given date match specifier
1925 1925
1926 1926 Formats include:
1927 1927
1928 1928 '{date}' match a given date to the accuracy provided
1929 1929
1930 1930 '<{date}' on or before a given date
1931 1931
1932 1932 '>{date}' on or after a given date
1933 1933
1934 1934 >>> p1 = parsedate("10:29:59")
1935 1935 >>> p2 = parsedate("10:30:00")
1936 1936 >>> p3 = parsedate("10:30:59")
1937 1937 >>> p4 = parsedate("10:31:00")
1938 1938 >>> p5 = parsedate("Sep 15 10:30:00 1999")
1939 1939 >>> f = matchdate("10:30")
1940 1940 >>> f(p1[0])
1941 1941 False
1942 1942 >>> f(p2[0])
1943 1943 True
1944 1944 >>> f(p3[0])
1945 1945 True
1946 1946 >>> f(p4[0])
1947 1947 False
1948 1948 >>> f(p5[0])
1949 1949 False
1950 1950 """
1951 1951
1952 1952 def lower(date):
1953 1953 d = {'mb': "1", 'd': "1"}
1954 1954 return parsedate(date, extendeddateformats, d)[0]
1955 1955
1956 1956 def upper(date):
1957 1957 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
1958 1958 for days in ("31", "30", "29"):
1959 1959 try:
1960 1960 d["d"] = days
1961 1961 return parsedate(date, extendeddateformats, d)[0]
1962 1962 except Abort:
1963 1963 pass
1964 1964 d["d"] = "28"
1965 1965 return parsedate(date, extendeddateformats, d)[0]
1966 1966
1967 1967 date = date.strip()
1968 1968
1969 1969 if not date:
1970 1970 raise Abort(_("dates cannot consist entirely of whitespace"))
1971 1971 elif date[0] == "<":
1972 1972 if not date[1:]:
1973 1973 raise Abort(_("invalid day spec, use '<DATE'"))
1974 1974 when = upper(date[1:])
1975 1975 return lambda x: x <= when
1976 1976 elif date[0] == ">":
1977 1977 if not date[1:]:
1978 1978 raise Abort(_("invalid day spec, use '>DATE'"))
1979 1979 when = lower(date[1:])
1980 1980 return lambda x: x >= when
1981 1981 elif date[0] == "-":
1982 1982 try:
1983 1983 days = int(date[1:])
1984 1984 except ValueError:
1985 1985 raise Abort(_("invalid day spec: %s") % date[1:])
1986 1986 if days < 0:
1987 1987 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
1988 1988 % date[1:])
1989 1989 when = makedate()[0] - days * 3600 * 24
1990 1990 return lambda x: x >= when
1991 1991 elif " to " in date:
1992 1992 a, b = date.split(" to ")
1993 1993 start, stop = lower(a), upper(b)
1994 1994 return lambda x: x >= start and x <= stop
1995 1995 else:
1996 1996 start, stop = lower(date), upper(date)
1997 1997 return lambda x: x >= start and x <= stop
1998 1998
1999 1999 def stringmatcher(pattern, casesensitive=True):
2000 2000 """
2001 2001 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2002 2002 returns the matcher name, pattern, and matcher function.
2003 2003 missing or unknown prefixes are treated as literal matches.
2004 2004
2005 2005 helper for tests:
2006 2006 >>> def test(pattern, *tests):
2007 2007 ... kind, pattern, matcher = stringmatcher(pattern)
2008 2008 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2009 2009 >>> def itest(pattern, *tests):
2010 2010 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2011 2011 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2012 2012
2013 2013 exact matching (no prefix):
2014 2014 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2015 2015 ('literal', 'abcdefg', [False, False, True])
2016 2016
2017 2017 regex matching ('re:' prefix)
2018 2018 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2019 2019 ('re', 'a.+b', [False, False, True])
2020 2020
2021 2021 force exact matches ('literal:' prefix)
2022 2022 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2023 2023 ('literal', 're:foobar', [False, True])
2024 2024
2025 2025 unknown prefixes are ignored and treated as literals
2026 2026 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2027 2027 ('literal', 'foo:bar', [False, False, True])
2028 2028
2029 2029 case insensitive regex matches
2030 2030 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2031 2031 ('re', 'A.+b', [False, False, True])
2032 2032
2033 2033 case insensitive literal matches
2034 2034 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2035 2035 ('literal', 'ABCDEFG', [False, False, True])
2036 2036 """
2037 2037 if pattern.startswith('re:'):
2038 2038 pattern = pattern[3:]
2039 2039 try:
2040 2040 flags = 0
2041 2041 if not casesensitive:
2042 2042 flags = remod.I
2043 2043 regex = remod.compile(pattern, flags)
2044 2044 except remod.error as e:
2045 2045 raise error.ParseError(_('invalid regular expression: %s')
2046 2046 % e)
2047 2047 return 're', pattern, regex.search
2048 2048 elif pattern.startswith('literal:'):
2049 2049 pattern = pattern[8:]
2050 2050
2051 2051 match = pattern.__eq__
2052 2052
2053 2053 if not casesensitive:
2054 2054 ipat = encoding.lower(pattern)
2055 2055 match = lambda s: ipat == encoding.lower(s)
2056 2056 return 'literal', pattern, match
2057 2057
2058 2058 def shortuser(user):
2059 2059 """Return a short representation of a user name or email address."""
2060 2060 f = user.find('@')
2061 2061 if f >= 0:
2062 2062 user = user[:f]
2063 2063 f = user.find('<')
2064 2064 if f >= 0:
2065 2065 user = user[f + 1:]
2066 2066 f = user.find(' ')
2067 2067 if f >= 0:
2068 2068 user = user[:f]
2069 2069 f = user.find('.')
2070 2070 if f >= 0:
2071 2071 user = user[:f]
2072 2072 return user
2073 2073
2074 2074 def emailuser(user):
2075 2075 """Return the user portion of an email address."""
2076 2076 f = user.find('@')
2077 2077 if f >= 0:
2078 2078 user = user[:f]
2079 2079 f = user.find('<')
2080 2080 if f >= 0:
2081 2081 user = user[f + 1:]
2082 2082 return user
2083 2083
2084 2084 def email(author):
2085 2085 '''get email of author.'''
2086 2086 r = author.find('>')
2087 2087 if r == -1:
2088 2088 r = None
2089 2089 return author[author.find('<') + 1:r]
2090 2090
2091 2091 def ellipsis(text, maxlength=400):
2092 2092 """Trim string to at most maxlength (default: 400) columns in display."""
2093 2093 return encoding.trim(text, maxlength, ellipsis='...')
2094 2094
2095 2095 def unitcountfn(*unittable):
2096 2096 '''return a function that renders a readable count of some quantity'''
2097 2097
2098 2098 def go(count):
2099 2099 for multiplier, divisor, format in unittable:
2100 2100 if count >= divisor * multiplier:
2101 2101 return format % (count / float(divisor))
2102 2102 return unittable[-1][2] % count
2103 2103
2104 2104 return go
2105 2105
2106 2106 bytecount = unitcountfn(
2107 2107 (100, 1 << 30, _('%.0f GB')),
2108 2108 (10, 1 << 30, _('%.1f GB')),
2109 2109 (1, 1 << 30, _('%.2f GB')),
2110 2110 (100, 1 << 20, _('%.0f MB')),
2111 2111 (10, 1 << 20, _('%.1f MB')),
2112 2112 (1, 1 << 20, _('%.2f MB')),
2113 2113 (100, 1 << 10, _('%.0f KB')),
2114 2114 (10, 1 << 10, _('%.1f KB')),
2115 2115 (1, 1 << 10, _('%.2f KB')),
2116 2116 (1, 1, _('%.0f bytes')),
2117 2117 )
2118 2118
2119 2119 def uirepr(s):
2120 2120 # Avoid double backslash in Windows path repr()
2121 2121 return repr(s).replace('\\\\', '\\')
2122 2122
2123 2123 # delay import of textwrap
2124 2124 def MBTextWrapper(**kwargs):
2125 2125 class tw(textwrap.TextWrapper):
2126 2126 """
2127 2127 Extend TextWrapper for width-awareness.
2128 2128
2129 2129 Neither number of 'bytes' in any encoding nor 'characters' is
2130 2130 appropriate to calculate terminal columns for specified string.
2131 2131
2132 2132 Original TextWrapper implementation uses built-in 'len()' directly,
2133 2133 so overriding is needed to use width information of each characters.
2134 2134
2135 2135 In addition, characters classified into 'ambiguous' width are
2136 2136 treated as wide in East Asian area, but as narrow in other.
2137 2137
2138 2138 This requires use decision to determine width of such characters.
2139 2139 """
2140 2140 def _cutdown(self, ucstr, space_left):
2141 2141 l = 0
2142 2142 colwidth = encoding.ucolwidth
2143 2143 for i in xrange(len(ucstr)):
2144 2144 l += colwidth(ucstr[i])
2145 2145 if space_left < l:
2146 2146 return (ucstr[:i], ucstr[i:])
2147 2147 return ucstr, ''
2148 2148
2149 2149 # overriding of base class
2150 2150 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2151 2151 space_left = max(width - cur_len, 1)
2152 2152
2153 2153 if self.break_long_words:
2154 2154 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2155 2155 cur_line.append(cut)
2156 2156 reversed_chunks[-1] = res
2157 2157 elif not cur_line:
2158 2158 cur_line.append(reversed_chunks.pop())
2159 2159
2160 2160 # this overriding code is imported from TextWrapper of Python 2.6
2161 2161 # to calculate columns of string by 'encoding.ucolwidth()'
2162 2162 def _wrap_chunks(self, chunks):
2163 2163 colwidth = encoding.ucolwidth
2164 2164
2165 2165 lines = []
2166 2166 if self.width <= 0:
2167 2167 raise ValueError("invalid width %r (must be > 0)" % self.width)
2168 2168
2169 2169 # Arrange in reverse order so items can be efficiently popped
2170 2170 # from a stack of chucks.
2171 2171 chunks.reverse()
2172 2172
2173 2173 while chunks:
2174 2174
2175 2175 # Start the list of chunks that will make up the current line.
2176 2176 # cur_len is just the length of all the chunks in cur_line.
2177 2177 cur_line = []
2178 2178 cur_len = 0
2179 2179
2180 2180 # Figure out which static string will prefix this line.
2181 2181 if lines:
2182 2182 indent = self.subsequent_indent
2183 2183 else:
2184 2184 indent = self.initial_indent
2185 2185
2186 2186 # Maximum width for this line.
2187 2187 width = self.width - len(indent)
2188 2188
2189 2189 # First chunk on line is whitespace -- drop it, unless this
2190 2190 # is the very beginning of the text (i.e. no lines started yet).
2191 2191 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2192 2192 del chunks[-1]
2193 2193
2194 2194 while chunks:
2195 2195 l = colwidth(chunks[-1])
2196 2196
2197 2197 # Can at least squeeze this chunk onto the current line.
2198 2198 if cur_len + l <= width:
2199 2199 cur_line.append(chunks.pop())
2200 2200 cur_len += l
2201 2201
2202 2202 # Nope, this line is full.
2203 2203 else:
2204 2204 break
2205 2205
2206 2206 # The current line is full, and the next chunk is too big to
2207 2207 # fit on *any* line (not just this one).
2208 2208 if chunks and colwidth(chunks[-1]) > width:
2209 2209 self._handle_long_word(chunks, cur_line, cur_len, width)
2210 2210
2211 2211 # If the last chunk on this line is all whitespace, drop it.
2212 2212 if (self.drop_whitespace and
2213 2213 cur_line and cur_line[-1].strip() == ''):
2214 2214 del cur_line[-1]
2215 2215
2216 2216 # Convert current line back to a string and store it in list
2217 2217 # of all lines (return value).
2218 2218 if cur_line:
2219 2219 lines.append(indent + ''.join(cur_line))
2220 2220
2221 2221 return lines
2222 2222
2223 2223 global MBTextWrapper
2224 2224 MBTextWrapper = tw
2225 2225 return tw(**kwargs)
2226 2226
2227 2227 def wrap(line, width, initindent='', hangindent=''):
2228 2228 maxindent = max(len(hangindent), len(initindent))
2229 2229 if width <= maxindent:
2230 2230 # adjust for weird terminal size
2231 2231 width = max(78, maxindent + 1)
2232 2232 line = line.decode(encoding.encoding, encoding.encodingmode)
2233 2233 initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
2234 2234 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
2235 2235 wrapper = MBTextWrapper(width=width,
2236 2236 initial_indent=initindent,
2237 2237 subsequent_indent=hangindent)
2238 2238 return wrapper.fill(line).encode(encoding.encoding)
2239 2239
2240 2240 if (pyplatform.python_implementation() == 'CPython' and
2241 2241 sys.version_info < (3, 0)):
2242 2242 # There is an issue in CPython that some IO methods do not handle EINTR
2243 2243 # correctly. The following table shows what CPython version (and functions)
2244 2244 # are affected (buggy: has the EINTR bug, okay: otherwise):
2245 2245 #
2246 2246 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2247 2247 # --------------------------------------------------
2248 2248 # fp.__iter__ | buggy | buggy | okay
2249 2249 # fp.read* | buggy | okay [1] | okay
2250 2250 #
2251 2251 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2252 2252 #
2253 2253 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2254 2254 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2255 2255 #
2256 2256 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2257 2257 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2258 2258 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2259 2259 # fp.__iter__ but not other fp.read* methods.
2260 2260 #
2261 2261 # On modern systems like Linux, the "read" syscall cannot be interrupted
2262 2262 # when reading "fast" files like on-disk files. So the EINTR issue only
2263 2263 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2264 2264 # files approximately as "fast" files and use the fast (unsafe) code path,
2265 2265 # to minimize the performance impact.
2266 2266 if sys.version_info >= (2, 7, 4):
2267 2267 # fp.readline deals with EINTR correctly, use it as a workaround.
2268 2268 def _safeiterfile(fp):
2269 2269 return iter(fp.readline, '')
2270 2270 else:
2271 2271 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2272 2272 # note: this may block longer than necessary because of bufsize.
2273 2273 def _safeiterfile(fp, bufsize=4096):
2274 2274 fd = fp.fileno()
2275 2275 line = ''
2276 2276 while True:
2277 2277 try:
2278 2278 buf = os.read(fd, bufsize)
2279 2279 except OSError as ex:
2280 2280 # os.read only raises EINTR before any data is read
2281 2281 if ex.errno == errno.EINTR:
2282 2282 continue
2283 2283 else:
2284 2284 raise
2285 2285 line += buf
2286 2286 if '\n' in buf:
2287 2287 splitted = line.splitlines(True)
2288 2288 line = ''
2289 2289 for l in splitted:
2290 2290 if l[-1] == '\n':
2291 2291 yield l
2292 2292 else:
2293 2293 line = l
2294 2294 if not buf:
2295 2295 break
2296 2296 if line:
2297 2297 yield line
2298 2298
2299 2299 def iterfile(fp):
2300 2300 fastpath = True
2301 2301 if type(fp) is file:
2302 2302 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2303 2303 if fastpath:
2304 2304 return fp
2305 2305 else:
2306 2306 return _safeiterfile(fp)
2307 2307 else:
2308 2308 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2309 2309 def iterfile(fp):
2310 2310 return fp
2311 2311
2312 2312 def iterlines(iterator):
2313 2313 for chunk in iterator:
2314 2314 for line in chunk.splitlines():
2315 2315 yield line
2316 2316
2317 2317 def expandpath(path):
2318 2318 return os.path.expanduser(os.path.expandvars(path))
2319 2319
2320 2320 def hgcmd():
2321 2321 """Return the command used to execute current hg
2322 2322
2323 2323 This is different from hgexecutable() because on Windows we want
2324 2324 to avoid things opening new shell windows like batch files, so we
2325 2325 get either the python call or current executable.
2326 2326 """
2327 2327 if mainfrozen():
2328 2328 if getattr(sys, 'frozen', None) == 'macosx_app':
2329 2329 # Env variable set by py2app
2330 2330 return [encoding.environ['EXECUTABLEPATH']]
2331 2331 else:
2332 2332 return [pycompat.sysexecutable]
2333 2333 return gethgcmd()
2334 2334
2335 2335 def rundetached(args, condfn):
2336 2336 """Execute the argument list in a detached process.
2337 2337
2338 2338 condfn is a callable which is called repeatedly and should return
2339 2339 True once the child process is known to have started successfully.
2340 2340 At this point, the child process PID is returned. If the child
2341 2341 process fails to start or finishes before condfn() evaluates to
2342 2342 True, return -1.
2343 2343 """
2344 2344 # Windows case is easier because the child process is either
2345 2345 # successfully starting and validating the condition or exiting
2346 2346 # on failure. We just poll on its PID. On Unix, if the child
2347 2347 # process fails to start, it will be left in a zombie state until
2348 2348 # the parent wait on it, which we cannot do since we expect a long
2349 2349 # running process on success. Instead we listen for SIGCHLD telling
2350 2350 # us our child process terminated.
2351 2351 terminated = set()
2352 2352 def handler(signum, frame):
2353 2353 terminated.add(os.wait())
2354 2354 prevhandler = None
2355 2355 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2356 2356 if SIGCHLD is not None:
2357 2357 prevhandler = signal.signal(SIGCHLD, handler)
2358 2358 try:
2359 2359 pid = spawndetached(args)
2360 2360 while not condfn():
2361 2361 if ((pid in terminated or not testpid(pid))
2362 2362 and not condfn()):
2363 2363 return -1
2364 2364 time.sleep(0.1)
2365 2365 return pid
2366 2366 finally:
2367 2367 if prevhandler is not None:
2368 2368 signal.signal(signal.SIGCHLD, prevhandler)
2369 2369
2370 2370 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2371 2371 """Return the result of interpolating items in the mapping into string s.
2372 2372
2373 2373 prefix is a single character string, or a two character string with
2374 2374 a backslash as the first character if the prefix needs to be escaped in
2375 2375 a regular expression.
2376 2376
2377 2377 fn is an optional function that will be applied to the replacement text
2378 2378 just before replacement.
2379 2379
2380 2380 escape_prefix is an optional flag that allows using doubled prefix for
2381 2381 its escaping.
2382 2382 """
2383 2383 fn = fn or (lambda s: s)
2384 2384 patterns = '|'.join(mapping.keys())
2385 2385 if escape_prefix:
2386 2386 patterns += '|' + prefix
2387 2387 if len(prefix) > 1:
2388 2388 prefix_char = prefix[1:]
2389 2389 else:
2390 2390 prefix_char = prefix
2391 2391 mapping[prefix_char] = prefix_char
2392 2392 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2393 2393 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2394 2394
2395 2395 def getport(port):
2396 2396 """Return the port for a given network service.
2397 2397
2398 2398 If port is an integer, it's returned as is. If it's a string, it's
2399 2399 looked up using socket.getservbyname(). If there's no matching
2400 2400 service, error.Abort is raised.
2401 2401 """
2402 2402 try:
2403 2403 return int(port)
2404 2404 except ValueError:
2405 2405 pass
2406 2406
2407 2407 try:
2408 2408 return socket.getservbyname(port)
2409 2409 except socket.error:
2410 2410 raise Abort(_("no port number associated with service '%s'") % port)
2411 2411
2412 2412 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2413 2413 '0': False, 'no': False, 'false': False, 'off': False,
2414 2414 'never': False}
2415 2415
2416 2416 def parsebool(s):
2417 2417 """Parse s into a boolean.
2418 2418
2419 2419 If s is not a valid boolean, returns None.
2420 2420 """
2421 2421 return _booleans.get(s.lower(), None)
2422 2422
2423 2423 _hextochr = dict((a + b, chr(int(a + b, 16)))
2424 2424 for a in string.hexdigits for b in string.hexdigits)
2425 2425
2426 2426 class url(object):
2427 2427 r"""Reliable URL parser.
2428 2428
2429 2429 This parses URLs and provides attributes for the following
2430 2430 components:
2431 2431
2432 2432 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2433 2433
2434 2434 Missing components are set to None. The only exception is
2435 2435 fragment, which is set to '' if present but empty.
2436 2436
2437 2437 If parsefragment is False, fragment is included in query. If
2438 2438 parsequery is False, query is included in path. If both are
2439 2439 False, both fragment and query are included in path.
2440 2440
2441 2441 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2442 2442
2443 2443 Note that for backward compatibility reasons, bundle URLs do not
2444 2444 take host names. That means 'bundle://../' has a path of '../'.
2445 2445
2446 2446 Examples:
2447 2447
2448 2448 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2449 2449 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2450 2450 >>> url('ssh://[::1]:2200//home/joe/repo')
2451 2451 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2452 2452 >>> url('file:///home/joe/repo')
2453 2453 <url scheme: 'file', path: '/home/joe/repo'>
2454 2454 >>> url('file:///c:/temp/foo/')
2455 2455 <url scheme: 'file', path: 'c:/temp/foo/'>
2456 2456 >>> url('bundle:foo')
2457 2457 <url scheme: 'bundle', path: 'foo'>
2458 2458 >>> url('bundle://../foo')
2459 2459 <url scheme: 'bundle', path: '../foo'>
2460 2460 >>> url(r'c:\foo\bar')
2461 2461 <url path: 'c:\\foo\\bar'>
2462 2462 >>> url(r'\\blah\blah\blah')
2463 2463 <url path: '\\\\blah\\blah\\blah'>
2464 2464 >>> url(r'\\blah\blah\blah#baz')
2465 2465 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2466 2466 >>> url(r'file:///C:\users\me')
2467 2467 <url scheme: 'file', path: 'C:\\users\\me'>
2468 2468
2469 2469 Authentication credentials:
2470 2470
2471 2471 >>> url('ssh://joe:xyz@x/repo')
2472 2472 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2473 2473 >>> url('ssh://joe@x/repo')
2474 2474 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2475 2475
2476 2476 Query strings and fragments:
2477 2477
2478 2478 >>> url('http://host/a?b#c')
2479 2479 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2480 2480 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2481 2481 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2482 2482
2483 2483 Empty path:
2484 2484
2485 2485 >>> url('')
2486 2486 <url path: ''>
2487 2487 >>> url('#a')
2488 2488 <url path: '', fragment: 'a'>
2489 2489 >>> url('http://host/')
2490 2490 <url scheme: 'http', host: 'host', path: ''>
2491 2491 >>> url('http://host/#a')
2492 2492 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2493 2493
2494 2494 Only scheme:
2495 2495
2496 2496 >>> url('http:')
2497 2497 <url scheme: 'http'>
2498 2498 """
2499 2499
2500 2500 _safechars = "!~*'()+"
2501 2501 _safepchars = "/!~*'()+:\\"
2502 2502 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2503 2503
2504 2504 def __init__(self, path, parsequery=True, parsefragment=True):
2505 2505 # We slowly chomp away at path until we have only the path left
2506 2506 self.scheme = self.user = self.passwd = self.host = None
2507 2507 self.port = self.path = self.query = self.fragment = None
2508 2508 self._localpath = True
2509 2509 self._hostport = ''
2510 2510 self._origpath = path
2511 2511
2512 2512 if parsefragment and '#' in path:
2513 2513 path, self.fragment = path.split('#', 1)
2514 2514
2515 2515 # special case for Windows drive letters and UNC paths
2516 2516 if hasdriveletter(path) or path.startswith('\\\\'):
2517 2517 self.path = path
2518 2518 return
2519 2519
2520 2520 # For compatibility reasons, we can't handle bundle paths as
2521 2521 # normal URLS
2522 2522 if path.startswith('bundle:'):
2523 2523 self.scheme = 'bundle'
2524 2524 path = path[7:]
2525 2525 if path.startswith('//'):
2526 2526 path = path[2:]
2527 2527 self.path = path
2528 2528 return
2529 2529
2530 2530 if self._matchscheme(path):
2531 2531 parts = path.split(':', 1)
2532 2532 if parts[0]:
2533 2533 self.scheme, path = parts
2534 2534 self._localpath = False
2535 2535
2536 2536 if not path:
2537 2537 path = None
2538 2538 if self._localpath:
2539 2539 self.path = ''
2540 2540 return
2541 2541 else:
2542 2542 if self._localpath:
2543 2543 self.path = path
2544 2544 return
2545 2545
2546 2546 if parsequery and '?' in path:
2547 2547 path, self.query = path.split('?', 1)
2548 2548 if not path:
2549 2549 path = None
2550 2550 if not self.query:
2551 2551 self.query = None
2552 2552
2553 2553 # // is required to specify a host/authority
2554 2554 if path and path.startswith('//'):
2555 2555 parts = path[2:].split('/', 1)
2556 2556 if len(parts) > 1:
2557 2557 self.host, path = parts
2558 2558 else:
2559 2559 self.host = parts[0]
2560 2560 path = None
2561 2561 if not self.host:
2562 2562 self.host = None
2563 2563 # path of file:///d is /d
2564 2564 # path of file:///d:/ is d:/, not /d:/
2565 2565 if path and not hasdriveletter(path):
2566 2566 path = '/' + path
2567 2567
2568 2568 if self.host and '@' in self.host:
2569 2569 self.user, self.host = self.host.rsplit('@', 1)
2570 2570 if ':' in self.user:
2571 2571 self.user, self.passwd = self.user.split(':', 1)
2572 2572 if not self.host:
2573 2573 self.host = None
2574 2574
2575 2575 # Don't split on colons in IPv6 addresses without ports
2576 2576 if (self.host and ':' in self.host and
2577 2577 not (self.host.startswith('[') and self.host.endswith(']'))):
2578 2578 self._hostport = self.host
2579 2579 self.host, self.port = self.host.rsplit(':', 1)
2580 2580 if not self.host:
2581 2581 self.host = None
2582 2582
2583 2583 if (self.host and self.scheme == 'file' and
2584 2584 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2585 2585 raise Abort(_('file:// URLs can only refer to localhost'))
2586 2586
2587 2587 self.path = path
2588 2588
2589 2589 # leave the query string escaped
2590 2590 for a in ('user', 'passwd', 'host', 'port',
2591 2591 'path', 'fragment'):
2592 2592 v = getattr(self, a)
2593 2593 if v is not None:
2594 2594 setattr(self, a, pycompat.urlunquote(v))
2595 2595
2596 2596 def __repr__(self):
2597 2597 attrs = []
2598 2598 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2599 2599 'query', 'fragment'):
2600 2600 v = getattr(self, a)
2601 2601 if v is not None:
2602 2602 attrs.append('%s: %r' % (a, v))
2603 2603 return '<url %s>' % ', '.join(attrs)
2604 2604
2605 2605 def __str__(self):
2606 2606 r"""Join the URL's components back into a URL string.
2607 2607
2608 2608 Examples:
2609 2609
2610 2610 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2611 2611 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2612 2612 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2613 2613 'http://user:pw@host:80/?foo=bar&baz=42'
2614 2614 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2615 2615 'http://user:pw@host:80/?foo=bar%3dbaz'
2616 2616 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2617 2617 'ssh://user:pw@[::1]:2200//home/joe#'
2618 2618 >>> str(url('http://localhost:80//'))
2619 2619 'http://localhost:80//'
2620 2620 >>> str(url('http://localhost:80/'))
2621 2621 'http://localhost:80/'
2622 2622 >>> str(url('http://localhost:80'))
2623 2623 'http://localhost:80/'
2624 2624 >>> str(url('bundle:foo'))
2625 2625 'bundle:foo'
2626 2626 >>> str(url('bundle://../foo'))
2627 2627 'bundle:../foo'
2628 2628 >>> str(url('path'))
2629 2629 'path'
2630 2630 >>> str(url('file:///tmp/foo/bar'))
2631 2631 'file:///tmp/foo/bar'
2632 2632 >>> str(url('file:///c:/tmp/foo/bar'))
2633 2633 'file:///c:/tmp/foo/bar'
2634 2634 >>> print url(r'bundle:foo\bar')
2635 2635 bundle:foo\bar
2636 2636 >>> print url(r'file:///D:\data\hg')
2637 2637 file:///D:\data\hg
2638 2638 """
2639 2639 if self._localpath:
2640 2640 s = self.path
2641 2641 if self.scheme == 'bundle':
2642 2642 s = 'bundle:' + s
2643 2643 if self.fragment:
2644 2644 s += '#' + self.fragment
2645 2645 return s
2646 2646
2647 2647 s = self.scheme + ':'
2648 2648 if self.user or self.passwd or self.host:
2649 2649 s += '//'
2650 2650 elif self.scheme and (not self.path or self.path.startswith('/')
2651 2651 or hasdriveletter(self.path)):
2652 2652 s += '//'
2653 2653 if hasdriveletter(self.path):
2654 2654 s += '/'
2655 2655 if self.user:
2656 2656 s += urlreq.quote(self.user, safe=self._safechars)
2657 2657 if self.passwd:
2658 2658 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2659 2659 if self.user or self.passwd:
2660 2660 s += '@'
2661 2661 if self.host:
2662 2662 if not (self.host.startswith('[') and self.host.endswith(']')):
2663 2663 s += urlreq.quote(self.host)
2664 2664 else:
2665 2665 s += self.host
2666 2666 if self.port:
2667 2667 s += ':' + urlreq.quote(self.port)
2668 2668 if self.host:
2669 2669 s += '/'
2670 2670 if self.path:
2671 2671 # TODO: similar to the query string, we should not unescape the
2672 2672 # path when we store it, the path might contain '%2f' = '/',
2673 2673 # which we should *not* escape.
2674 2674 s += urlreq.quote(self.path, safe=self._safepchars)
2675 2675 if self.query:
2676 2676 # we store the query in escaped form.
2677 2677 s += '?' + self.query
2678 2678 if self.fragment is not None:
2679 2679 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2680 2680 return s
2681 2681
2682 2682 def authinfo(self):
2683 2683 user, passwd = self.user, self.passwd
2684 2684 try:
2685 2685 self.user, self.passwd = None, None
2686 2686 s = str(self)
2687 2687 finally:
2688 2688 self.user, self.passwd = user, passwd
2689 2689 if not self.user:
2690 2690 return (s, None)
2691 2691 # authinfo[1] is passed to urllib2 password manager, and its
2692 2692 # URIs must not contain credentials. The host is passed in the
2693 2693 # URIs list because Python < 2.4.3 uses only that to search for
2694 2694 # a password.
2695 2695 return (s, (None, (s, self.host),
2696 2696 self.user, self.passwd or ''))
2697 2697
2698 2698 def isabs(self):
2699 2699 if self.scheme and self.scheme != 'file':
2700 2700 return True # remote URL
2701 2701 if hasdriveletter(self.path):
2702 2702 return True # absolute for our purposes - can't be joined()
2703 2703 if self.path.startswith(r'\\'):
2704 2704 return True # Windows UNC path
2705 2705 if self.path.startswith('/'):
2706 2706 return True # POSIX-style
2707 2707 return False
2708 2708
2709 2709 def localpath(self):
2710 2710 if self.scheme == 'file' or self.scheme == 'bundle':
2711 2711 path = self.path or '/'
2712 2712 # For Windows, we need to promote hosts containing drive
2713 2713 # letters to paths with drive letters.
2714 2714 if hasdriveletter(self._hostport):
2715 2715 path = self._hostport + '/' + self.path
2716 2716 elif (self.host is not None and self.path
2717 2717 and not hasdriveletter(path)):
2718 2718 path = '/' + path
2719 2719 return path
2720 2720 return self._origpath
2721 2721
2722 2722 def islocal(self):
2723 2723 '''whether localpath will return something that posixfile can open'''
2724 2724 return (not self.scheme or self.scheme == 'file'
2725 2725 or self.scheme == 'bundle')
2726 2726
2727 2727 def hasscheme(path):
2728 2728 return bool(url(path).scheme)
2729 2729
2730 2730 def hasdriveletter(path):
2731 2731 return path and path[1:2] == ':' and path[0:1].isalpha()
2732 2732
2733 2733 def urllocalpath(path):
2734 2734 return url(path, parsequery=False, parsefragment=False).localpath()
2735 2735
2736 2736 def hidepassword(u):
2737 2737 '''hide user credential in a url string'''
2738 2738 u = url(u)
2739 2739 if u.passwd:
2740 2740 u.passwd = '***'
2741 2741 return str(u)
2742 2742
2743 2743 def removeauth(u):
2744 2744 '''remove all authentication information from a url string'''
2745 2745 u = url(u)
2746 2746 u.user = u.passwd = None
2747 2747 return str(u)
2748 2748
2749 2749 def isatty(fp):
2750 2750 try:
2751 2751 return fp.isatty()
2752 2752 except AttributeError:
2753 2753 return False
2754 2754
2755 2755 timecount = unitcountfn(
2756 2756 (1, 1e3, _('%.0f s')),
2757 2757 (100, 1, _('%.1f s')),
2758 2758 (10, 1, _('%.2f s')),
2759 2759 (1, 1, _('%.3f s')),
2760 2760 (100, 0.001, _('%.1f ms')),
2761 2761 (10, 0.001, _('%.2f ms')),
2762 2762 (1, 0.001, _('%.3f ms')),
2763 2763 (100, 0.000001, _('%.1f us')),
2764 2764 (10, 0.000001, _('%.2f us')),
2765 2765 (1, 0.000001, _('%.3f us')),
2766 2766 (100, 0.000000001, _('%.1f ns')),
2767 2767 (10, 0.000000001, _('%.2f ns')),
2768 2768 (1, 0.000000001, _('%.3f ns')),
2769 2769 )
2770 2770
2771 2771 _timenesting = [0]
2772 2772
2773 2773 def timed(func):
2774 2774 '''Report the execution time of a function call to stderr.
2775 2775
2776 2776 During development, use as a decorator when you need to measure
2777 2777 the cost of a function, e.g. as follows:
2778 2778
2779 2779 @util.timed
2780 2780 def foo(a, b, c):
2781 2781 pass
2782 2782 '''
2783 2783
2784 2784 def wrapper(*args, **kwargs):
2785 2785 start = time.time()
2786 2786 indent = 2
2787 2787 _timenesting[0] += indent
2788 2788 try:
2789 2789 return func(*args, **kwargs)
2790 2790 finally:
2791 2791 elapsed = time.time() - start
2792 2792 _timenesting[0] -= indent
2793 2793 stderr.write('%s%s: %s\n' %
2794 2794 (' ' * _timenesting[0], func.__name__,
2795 2795 timecount(elapsed)))
2796 2796 return wrapper
2797 2797
2798 2798 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2799 2799 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2800 2800
2801 2801 def sizetoint(s):
2802 2802 '''Convert a space specifier to a byte count.
2803 2803
2804 2804 >>> sizetoint('30')
2805 2805 30
2806 2806 >>> sizetoint('2.2kb')
2807 2807 2252
2808 2808 >>> sizetoint('6M')
2809 2809 6291456
2810 2810 '''
2811 2811 t = s.strip().lower()
2812 2812 try:
2813 2813 for k, u in _sizeunits:
2814 2814 if t.endswith(k):
2815 2815 return int(float(t[:-len(k)]) * u)
2816 2816 return int(t)
2817 2817 except ValueError:
2818 2818 raise error.ParseError(_("couldn't parse size: %s") % s)
2819 2819
2820 2820 class hooks(object):
2821 2821 '''A collection of hook functions that can be used to extend a
2822 2822 function's behavior. Hooks are called in lexicographic order,
2823 2823 based on the names of their sources.'''
2824 2824
2825 2825 def __init__(self):
2826 2826 self._hooks = []
2827 2827
2828 2828 def add(self, source, hook):
2829 2829 self._hooks.append((source, hook))
2830 2830
2831 2831 def __call__(self, *args):
2832 2832 self._hooks.sort(key=lambda x: x[0])
2833 2833 results = []
2834 2834 for source, hook in self._hooks:
2835 2835 results.append(hook(*args))
2836 2836 return results
2837 2837
2838 2838 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s'):
2839 2839 '''Yields lines for a nicely formatted stacktrace.
2840 2840 Skips the 'skip' last entries.
2841 2841 Each file+linenumber is formatted according to fileline.
2842 2842 Each line is formatted according to line.
2843 2843 If line is None, it yields:
2844 2844 length of longest filepath+line number,
2845 2845 filepath+linenumber,
2846 2846 function
2847 2847
2848 2848 Not be used in production code but very convenient while developing.
2849 2849 '''
2850 2850 entries = [(fileline % (fn, ln), func)
2851 2851 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]]
2852 2852 if entries:
2853 2853 fnmax = max(len(entry[0]) for entry in entries)
2854 2854 for fnln, func in entries:
2855 2855 if line is None:
2856 2856 yield (fnmax, fnln, func)
2857 2857 else:
2858 2858 yield line % (fnmax, fnln, func)
2859 2859
2860 2860 def debugstacktrace(msg='stacktrace', skip=0, f=stderr, otherf=stdout):
2861 2861 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
2862 2862 Skips the 'skip' last entries. By default it will flush stdout first.
2863 2863 It can be used everywhere and intentionally does not require an ui object.
2864 2864 Not be used in production code but very convenient while developing.
2865 2865 '''
2866 2866 if otherf:
2867 2867 otherf.flush()
2868 2868 f.write('%s at:\n' % msg)
2869 2869 for line in getstackframes(skip + 1):
2870 2870 f.write(line)
2871 2871 f.flush()
2872 2872
2873 2873 class dirs(object):
2874 2874 '''a multiset of directory names from a dirstate or manifest'''
2875 2875
2876 2876 def __init__(self, map, skip=None):
2877 2877 self._dirs = {}
2878 2878 addpath = self.addpath
2879 2879 if safehasattr(map, 'iteritems') and skip is not None:
2880 2880 for f, s in map.iteritems():
2881 2881 if s[0] != skip:
2882 2882 addpath(f)
2883 2883 else:
2884 2884 for f in map:
2885 2885 addpath(f)
2886 2886
2887 2887 def addpath(self, path):
2888 2888 dirs = self._dirs
2889 2889 for base in finddirs(path):
2890 2890 if base in dirs:
2891 2891 dirs[base] += 1
2892 2892 return
2893 2893 dirs[base] = 1
2894 2894
2895 2895 def delpath(self, path):
2896 2896 dirs = self._dirs
2897 2897 for base in finddirs(path):
2898 2898 if dirs[base] > 1:
2899 2899 dirs[base] -= 1
2900 2900 return
2901 2901 del dirs[base]
2902 2902
2903 2903 def __iter__(self):
2904 2904 return self._dirs.iterkeys()
2905 2905
2906 2906 def __contains__(self, d):
2907 2907 return d in self._dirs
2908 2908
2909 2909 if safehasattr(parsers, 'dirs'):
2910 2910 dirs = parsers.dirs
2911 2911
2912 2912 def finddirs(path):
2913 2913 pos = path.rfind('/')
2914 2914 while pos != -1:
2915 2915 yield path[:pos]
2916 2916 pos = path.rfind('/', 0, pos)
2917 2917
2918 2918 class ctxmanager(object):
2919 2919 '''A context manager for use in 'with' blocks to allow multiple
2920 2920 contexts to be entered at once. This is both safer and more
2921 2921 flexible than contextlib.nested.
2922 2922
2923 2923 Once Mercurial supports Python 2.7+, this will become mostly
2924 2924 unnecessary.
2925 2925 '''
2926 2926
2927 2927 def __init__(self, *args):
2928 2928 '''Accepts a list of no-argument functions that return context
2929 2929 managers. These will be invoked at __call__ time.'''
2930 2930 self._pending = args
2931 2931 self._atexit = []
2932 2932
2933 2933 def __enter__(self):
2934 2934 return self
2935 2935
2936 2936 def enter(self):
2937 2937 '''Create and enter context managers in the order in which they were
2938 2938 passed to the constructor.'''
2939 2939 values = []
2940 2940 for func in self._pending:
2941 2941 obj = func()
2942 2942 values.append(obj.__enter__())
2943 2943 self._atexit.append(obj.__exit__)
2944 2944 del self._pending
2945 2945 return values
2946 2946
2947 2947 def atexit(self, func, *args, **kwargs):
2948 2948 '''Add a function to call when this context manager exits. The
2949 2949 ordering of multiple atexit calls is unspecified, save that
2950 2950 they will happen before any __exit__ functions.'''
2951 2951 def wrapper(exc_type, exc_val, exc_tb):
2952 2952 func(*args, **kwargs)
2953 2953 self._atexit.append(wrapper)
2954 2954 return func
2955 2955
2956 2956 def __exit__(self, exc_type, exc_val, exc_tb):
2957 2957 '''Context managers are exited in the reverse order from which
2958 2958 they were created.'''
2959 2959 received = exc_type is not None
2960 2960 suppressed = False
2961 2961 pending = None
2962 2962 self._atexit.reverse()
2963 2963 for exitfunc in self._atexit:
2964 2964 try:
2965 2965 if exitfunc(exc_type, exc_val, exc_tb):
2966 2966 suppressed = True
2967 2967 exc_type = None
2968 2968 exc_val = None
2969 2969 exc_tb = None
2970 2970 except BaseException:
2971 2971 pending = sys.exc_info()
2972 2972 exc_type, exc_val, exc_tb = pending = sys.exc_info()
2973 2973 del self._atexit
2974 2974 if pending:
2975 2975 raise exc_val
2976 2976 return received and suppressed
2977 2977
2978 2978 # compression code
2979 2979
2980 2980 SERVERROLE = 'server'
2981 2981 CLIENTROLE = 'client'
2982 2982
2983 2983 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
2984 2984 (u'name', u'serverpriority',
2985 2985 u'clientpriority'))
2986 2986
2987 2987 class compressormanager(object):
2988 2988 """Holds registrations of various compression engines.
2989 2989
2990 2990 This class essentially abstracts the differences between compression
2991 2991 engines to allow new compression formats to be added easily, possibly from
2992 2992 extensions.
2993 2993
2994 2994 Compressors are registered against the global instance by calling its
2995 2995 ``register()`` method.
2996 2996 """
2997 2997 def __init__(self):
2998 2998 self._engines = {}
2999 2999 # Bundle spec human name to engine name.
3000 3000 self._bundlenames = {}
3001 3001 # Internal bundle identifier to engine name.
3002 3002 self._bundletypes = {}
3003 # Revlog header to engine name.
3004 self._revlogheaders = {}
3003 3005 # Wire proto identifier to engine name.
3004 3006 self._wiretypes = {}
3005 3007
3006 3008 def __getitem__(self, key):
3007 3009 return self._engines[key]
3008 3010
3009 3011 def __contains__(self, key):
3010 3012 return key in self._engines
3011 3013
3012 3014 def __iter__(self):
3013 3015 return iter(self._engines.keys())
3014 3016
3015 3017 def register(self, engine):
3016 3018 """Register a compression engine with the manager.
3017 3019
3018 3020 The argument must be a ``compressionengine`` instance.
3019 3021 """
3020 3022 if not isinstance(engine, compressionengine):
3021 3023 raise ValueError(_('argument must be a compressionengine'))
3022 3024
3023 3025 name = engine.name()
3024 3026
3025 3027 if name in self._engines:
3026 3028 raise error.Abort(_('compression engine %s already registered') %
3027 3029 name)
3028 3030
3029 3031 bundleinfo = engine.bundletype()
3030 3032 if bundleinfo:
3031 3033 bundlename, bundletype = bundleinfo
3032 3034
3033 3035 if bundlename in self._bundlenames:
3034 3036 raise error.Abort(_('bundle name %s already registered') %
3035 3037 bundlename)
3036 3038 if bundletype in self._bundletypes:
3037 3039 raise error.Abort(_('bundle type %s already registered by %s') %
3038 3040 (bundletype, self._bundletypes[bundletype]))
3039 3041
3040 3042 # No external facing name declared.
3041 3043 if bundlename:
3042 3044 self._bundlenames[bundlename] = name
3043 3045
3044 3046 self._bundletypes[bundletype] = name
3045 3047
3046 3048 wiresupport = engine.wireprotosupport()
3047 3049 if wiresupport:
3048 3050 wiretype = wiresupport.name
3049 3051 if wiretype in self._wiretypes:
3050 3052 raise error.Abort(_('wire protocol compression %s already '
3051 3053 'registered by %s') %
3052 3054 (wiretype, self._wiretypes[wiretype]))
3053 3055
3054 3056 self._wiretypes[wiretype] = name
3055 3057
3058 revlogheader = engine.revlogheader()
3059 if revlogheader and revlogheader in self._revlogheaders:
3060 raise error.Abort(_('revlog header %s already registered by %s') %
3061 (revlogheader, self._revlogheaders[revlogheader]))
3062
3063 if revlogheader:
3064 self._revlogheaders[revlogheader] = name
3065
3056 3066 self._engines[name] = engine
3057 3067
3058 3068 @property
3059 3069 def supportedbundlenames(self):
3060 3070 return set(self._bundlenames.keys())
3061 3071
3062 3072 @property
3063 3073 def supportedbundletypes(self):
3064 3074 return set(self._bundletypes.keys())
3065 3075
3066 3076 def forbundlename(self, bundlename):
3067 3077 """Obtain a compression engine registered to a bundle name.
3068 3078
3069 3079 Will raise KeyError if the bundle type isn't registered.
3070 3080
3071 3081 Will abort if the engine is known but not available.
3072 3082 """
3073 3083 engine = self._engines[self._bundlenames[bundlename]]
3074 3084 if not engine.available():
3075 3085 raise error.Abort(_('compression engine %s could not be loaded') %
3076 3086 engine.name())
3077 3087 return engine
3078 3088
3079 3089 def forbundletype(self, bundletype):
3080 3090 """Obtain a compression engine registered to a bundle type.
3081 3091
3082 3092 Will raise KeyError if the bundle type isn't registered.
3083 3093
3084 3094 Will abort if the engine is known but not available.
3085 3095 """
3086 3096 engine = self._engines[self._bundletypes[bundletype]]
3087 3097 if not engine.available():
3088 3098 raise error.Abort(_('compression engine %s could not be loaded') %
3089 3099 engine.name())
3090 3100 return engine
3091 3101
3092 3102 def supportedwireengines(self, role, onlyavailable=True):
3093 3103 """Obtain compression engines that support the wire protocol.
3094 3104
3095 3105 Returns a list of engines in prioritized order, most desired first.
3096 3106
3097 3107 If ``onlyavailable`` is set, filter out engines that can't be
3098 3108 loaded.
3099 3109 """
3100 3110 assert role in (SERVERROLE, CLIENTROLE)
3101 3111
3102 3112 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3103 3113
3104 3114 engines = [self._engines[e] for e in self._wiretypes.values()]
3105 3115 if onlyavailable:
3106 3116 engines = [e for e in engines if e.available()]
3107 3117
3108 3118 def getkey(e):
3109 3119 # Sort first by priority, highest first. In case of tie, sort
3110 3120 # alphabetically. This is arbitrary, but ensures output is
3111 3121 # stable.
3112 3122 w = e.wireprotosupport()
3113 3123 return -1 * getattr(w, attr), w.name
3114 3124
3115 3125 return list(sorted(engines, key=getkey))
3116 3126
3117 3127 def forwiretype(self, wiretype):
3118 3128 engine = self._engines[self._wiretypes[wiretype]]
3119 3129 if not engine.available():
3120 3130 raise error.Abort(_('compression engine %s could not be loaded') %
3121 3131 engine.name())
3122 3132 return engine
3123 3133
3134 def forrevlogheader(self, header):
3135 """Obtain a compression engine registered to a revlog header.
3136
3137 Will raise KeyError if the revlog header value isn't registered.
3138 """
3139 return self._engines[self._revlogheaders[header]]
3140
3124 3141 compengines = compressormanager()
3125 3142
3126 3143 class compressionengine(object):
3127 3144 """Base class for compression engines.
3128 3145
3129 3146 Compression engines must implement the interface defined by this class.
3130 3147 """
3131 3148 def name(self):
3132 3149 """Returns the name of the compression engine.
3133 3150
3134 3151 This is the key the engine is registered under.
3135 3152
3136 3153 This method must be implemented.
3137 3154 """
3138 3155 raise NotImplementedError()
3139 3156
3140 3157 def available(self):
3141 3158 """Whether the compression engine is available.
3142 3159
3143 3160 The intent of this method is to allow optional compression engines
3144 3161 that may not be available in all installations (such as engines relying
3145 3162 on C extensions that may not be present).
3146 3163 """
3147 3164 return True
3148 3165
3149 3166 def bundletype(self):
3150 3167 """Describes bundle identifiers for this engine.
3151 3168
3152 3169 If this compression engine isn't supported for bundles, returns None.
3153 3170
3154 3171 If this engine can be used for bundles, returns a 2-tuple of strings of
3155 3172 the user-facing "bundle spec" compression name and an internal
3156 3173 identifier used to denote the compression format within bundles. To
3157 3174 exclude the name from external usage, set the first element to ``None``.
3158 3175
3159 3176 If bundle compression is supported, the class must also implement
3160 3177 ``compressstream`` and `decompressorreader``.
3161 3178 """
3162 3179 return None
3163 3180
3164 3181 def wireprotosupport(self):
3165 3182 """Declare support for this compression format on the wire protocol.
3166 3183
3167 3184 If this compression engine isn't supported for compressing wire
3168 3185 protocol payloads, returns None.
3169 3186
3170 3187 Otherwise, returns ``compenginewireprotosupport`` with the following
3171 3188 fields:
3172 3189
3173 3190 * String format identifier
3174 3191 * Integer priority for the server
3175 3192 * Integer priority for the client
3176 3193
3177 3194 The integer priorities are used to order the advertisement of format
3178 3195 support by server and client. The highest integer is advertised
3179 3196 first. Integers with non-positive values aren't advertised.
3180 3197
3181 3198 The priority values are somewhat arbitrary and only used for default
3182 3199 ordering. The relative order can be changed via config options.
3183 3200
3184 3201 If wire protocol compression is supported, the class must also implement
3185 3202 ``compressstream`` and ``decompressorreader``.
3186 3203 """
3187 3204 return None
3188 3205
3206 def revlogheader(self):
3207 """Header added to revlog chunks that identifies this engine.
3208
3209 If this engine can be used to compress revlogs, this method should
3210 return the bytes used to identify chunks compressed with this engine.
3211 Else, the method should return ``None`` to indicate it does not
3212 participate in revlog compression.
3213 """
3214 return None
3215
3189 3216 def compressstream(self, it, opts=None):
3190 3217 """Compress an iterator of chunks.
3191 3218
3192 3219 The method receives an iterator (ideally a generator) of chunks of
3193 3220 bytes to be compressed. It returns an iterator (ideally a generator)
3194 3221 of bytes of chunks representing the compressed output.
3195 3222
3196 3223 Optionally accepts an argument defining how to perform compression.
3197 3224 Each engine treats this argument differently.
3198 3225 """
3199 3226 raise NotImplementedError()
3200 3227
3201 3228 def decompressorreader(self, fh):
3202 3229 """Perform decompression on a file object.
3203 3230
3204 3231 Argument is an object with a ``read(size)`` method that returns
3205 3232 compressed data. Return value is an object with a ``read(size)`` that
3206 3233 returns uncompressed data.
3207 3234 """
3208 3235 raise NotImplementedError()
3209 3236
3210 3237 def revlogcompressor(self, opts=None):
3211 3238 """Obtain an object that can be used to compress revlog entries.
3212 3239
3213 3240 The object has a ``compress(data)`` method that compresses binary
3214 3241 data. This method returns compressed binary data or ``None`` if
3215 3242 the data could not be compressed (too small, not compressible, etc).
3216 3243 The returned data should have a header uniquely identifying this
3217 3244 compression format so decompression can be routed to this engine.
3245 This header should be identified by the ``revlogheader()`` return
3246 value.
3247
3248 The object has a ``decompress(data)`` method that decompresses
3249 data. The method will only be called if ``data`` begins with
3250 ``revlogheader()``. The method should return the raw, uncompressed
3251 data or raise a ``RevlogError``.
3218 3252
3219 3253 The object is reusable but is not thread safe.
3220 3254 """
3221 3255 raise NotImplementedError()
3222 3256
3223 3257 class _zlibengine(compressionengine):
3224 3258 def name(self):
3225 3259 return 'zlib'
3226 3260
3227 3261 def bundletype(self):
3228 3262 return 'gzip', 'GZ'
3229 3263
3230 3264 def wireprotosupport(self):
3231 3265 return compewireprotosupport('zlib', 20, 20)
3232 3266
3267 def revlogheader(self):
3268 return 'x'
3269
3233 3270 def compressstream(self, it, opts=None):
3234 3271 opts = opts or {}
3235 3272
3236 3273 z = zlib.compressobj(opts.get('level', -1))
3237 3274 for chunk in it:
3238 3275 data = z.compress(chunk)
3239 3276 # Not all calls to compress emit data. It is cheaper to inspect
3240 3277 # here than to feed empty chunks through generator.
3241 3278 if data:
3242 3279 yield data
3243 3280
3244 3281 yield z.flush()
3245 3282
3246 3283 def decompressorreader(self, fh):
3247 3284 def gen():
3248 3285 d = zlib.decompressobj()
3249 3286 for chunk in filechunkiter(fh):
3250 3287 while chunk:
3251 3288 # Limit output size to limit memory.
3252 3289 yield d.decompress(chunk, 2 ** 18)
3253 3290 chunk = d.unconsumed_tail
3254 3291
3255 3292 return chunkbuffer(gen())
3256 3293
3257 3294 class zlibrevlogcompressor(object):
3258 3295 def compress(self, data):
3259 3296 insize = len(data)
3260 3297 # Caller handles empty input case.
3261 3298 assert insize > 0
3262 3299
3263 3300 if insize < 44:
3264 3301 return None
3265 3302
3266 3303 elif insize <= 1000000:
3267 3304 compressed = zlib.compress(data)
3268 3305 if len(compressed) < insize:
3269 3306 return compressed
3270 3307 return None
3271 3308
3272 3309 # zlib makes an internal copy of the input buffer, doubling
3273 3310 # memory usage for large inputs. So do streaming compression
3274 3311 # on large inputs.
3275 3312 else:
3276 3313 z = zlib.compressobj()
3277 3314 parts = []
3278 3315 pos = 0
3279 3316 while pos < insize:
3280 3317 pos2 = pos + 2**20
3281 3318 parts.append(z.compress(data[pos:pos2]))
3282 3319 pos = pos2
3283 3320 parts.append(z.flush())
3284 3321
3285 3322 if sum(map(len, parts)) < insize:
3286 3323 return ''.join(parts)
3287 3324 return None
3288 3325
3326 def decompress(self, data):
3327 try:
3328 return zlib.decompress(data)
3329 except zlib.error as e:
3330 raise error.RevlogError(_('revlog decompress error: %s') %
3331 str(e))
3332
3289 3333 def revlogcompressor(self, opts=None):
3290 3334 return self.zlibrevlogcompressor()
3291 3335
3292 3336 compengines.register(_zlibengine())
3293 3337
3294 3338 class _bz2engine(compressionengine):
3295 3339 def name(self):
3296 3340 return 'bz2'
3297 3341
3298 3342 def bundletype(self):
3299 3343 return 'bzip2', 'BZ'
3300 3344
3301 3345 # We declare a protocol name but don't advertise by default because
3302 3346 # it is slow.
3303 3347 def wireprotosupport(self):
3304 3348 return compewireprotosupport('bzip2', 0, 0)
3305 3349
3306 3350 def compressstream(self, it, opts=None):
3307 3351 opts = opts or {}
3308 3352 z = bz2.BZ2Compressor(opts.get('level', 9))
3309 3353 for chunk in it:
3310 3354 data = z.compress(chunk)
3311 3355 if data:
3312 3356 yield data
3313 3357
3314 3358 yield z.flush()
3315 3359
3316 3360 def decompressorreader(self, fh):
3317 3361 def gen():
3318 3362 d = bz2.BZ2Decompressor()
3319 3363 for chunk in filechunkiter(fh):
3320 3364 yield d.decompress(chunk)
3321 3365
3322 3366 return chunkbuffer(gen())
3323 3367
3324 3368 compengines.register(_bz2engine())
3325 3369
3326 3370 class _truncatedbz2engine(compressionengine):
3327 3371 def name(self):
3328 3372 return 'bz2truncated'
3329 3373
3330 3374 def bundletype(self):
3331 3375 return None, '_truncatedBZ'
3332 3376
3333 3377 # We don't implement compressstream because it is hackily handled elsewhere.
3334 3378
3335 3379 def decompressorreader(self, fh):
3336 3380 def gen():
3337 3381 # The input stream doesn't have the 'BZ' header. So add it back.
3338 3382 d = bz2.BZ2Decompressor()
3339 3383 d.decompress('BZ')
3340 3384 for chunk in filechunkiter(fh):
3341 3385 yield d.decompress(chunk)
3342 3386
3343 3387 return chunkbuffer(gen())
3344 3388
3345 3389 compengines.register(_truncatedbz2engine())
3346 3390
3347 3391 class _noopengine(compressionengine):
3348 3392 def name(self):
3349 3393 return 'none'
3350 3394
3351 3395 def bundletype(self):
3352 3396 return 'none', 'UN'
3353 3397
3354 3398 # Clients always support uncompressed payloads. Servers don't because
3355 3399 # unless you are on a fast network, uncompressed payloads can easily
3356 3400 # saturate your network pipe.
3357 3401 def wireprotosupport(self):
3358 3402 return compewireprotosupport('none', 0, 10)
3359 3403
3404 # We don't implement revlogheader because it is handled specially
3405 # in the revlog class.
3406
3360 3407 def compressstream(self, it, opts=None):
3361 3408 return it
3362 3409
3363 3410 def decompressorreader(self, fh):
3364 3411 return fh
3365 3412
3366 3413 class nooprevlogcompressor(object):
3367 3414 def compress(self, data):
3368 3415 return None
3369 3416
3370 3417 def revlogcompressor(self, opts=None):
3371 3418 return self.nooprevlogcompressor()
3372 3419
3373 3420 compengines.register(_noopengine())
3374 3421
3375 3422 class _zstdengine(compressionengine):
3376 3423 def name(self):
3377 3424 return 'zstd'
3378 3425
3379 3426 @propertycache
3380 3427 def _module(self):
3381 3428 # Not all installs have the zstd module available. So defer importing
3382 3429 # until first access.
3383 3430 try:
3384 3431 from . import zstd
3385 3432 # Force delayed import.
3386 3433 zstd.__version__
3387 3434 return zstd
3388 3435 except ImportError:
3389 3436 return None
3390 3437
3391 3438 def available(self):
3392 3439 return bool(self._module)
3393 3440
3394 3441 def bundletype(self):
3395 3442 return 'zstd', 'ZS'
3396 3443
3397 3444 def wireprotosupport(self):
3398 3445 return compewireprotosupport('zstd', 50, 50)
3399 3446
3447 def revlogheader(self):
3448 return '\x28'
3449
3400 3450 def compressstream(self, it, opts=None):
3401 3451 opts = opts or {}
3402 3452 # zstd level 3 is almost always significantly faster than zlib
3403 3453 # while providing no worse compression. It strikes a good balance
3404 3454 # between speed and compression.
3405 3455 level = opts.get('level', 3)
3406 3456
3407 3457 zstd = self._module
3408 3458 z = zstd.ZstdCompressor(level=level).compressobj()
3409 3459 for chunk in it:
3410 3460 data = z.compress(chunk)
3411 3461 if data:
3412 3462 yield data
3413 3463
3414 3464 yield z.flush()
3415 3465
3416 3466 def decompressorreader(self, fh):
3417 3467 zstd = self._module
3418 3468 dctx = zstd.ZstdDecompressor()
3419 3469 return chunkbuffer(dctx.read_from(fh))
3420 3470
3421 3471 class zstdrevlogcompressor(object):
3422 3472 def __init__(self, zstd, level=3):
3423 3473 # Writing the content size adds a few bytes to the output. However,
3424 3474 # it allows decompression to be more optimal since we can
3425 3475 # pre-allocate a buffer to hold the result.
3426 3476 self._cctx = zstd.ZstdCompressor(level=level,
3427 3477 write_content_size=True)
3478 self._dctx = zstd.ZstdDecompressor()
3428 3479 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3480 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3429 3481
3430 3482 def compress(self, data):
3431 3483 insize = len(data)
3432 3484 # Caller handles empty input case.
3433 3485 assert insize > 0
3434 3486
3435 3487 if insize < 50:
3436 3488 return None
3437 3489
3438 3490 elif insize <= 1000000:
3439 3491 compressed = self._cctx.compress(data)
3440 3492 if len(compressed) < insize:
3441 3493 return compressed
3442 3494 return None
3443 3495 else:
3444 3496 z = self._cctx.compressobj()
3445 3497 chunks = []
3446 3498 pos = 0
3447 3499 while pos < insize:
3448 3500 pos2 = pos + self._compinsize
3449 3501 chunk = z.compress(data[pos:pos2])
3450 3502 if chunk:
3451 3503 chunks.append(chunk)
3452 3504 pos = pos2
3453 3505 chunks.append(z.flush())
3454 3506
3455 3507 if sum(map(len, chunks)) < insize:
3456 3508 return ''.join(chunks)
3457 3509 return None
3458 3510
3511 def decompress(self, data):
3512 insize = len(data)
3513
3514 try:
3515 # This was measured to be faster than other streaming
3516 # decompressors.
3517 dobj = self._dctx.decompressobj()
3518 chunks = []
3519 pos = 0
3520 while pos < insize:
3521 pos2 = pos + self._decompinsize
3522 chunk = dobj.decompress(data[pos:pos2])
3523 if chunk:
3524 chunks.append(chunk)
3525 pos = pos2
3526 # Frame should be exhausted, so no finish() API.
3527
3528 return ''.join(chunks)
3529 except Exception as e:
3530 raise error.RevlogError(_('revlog decompress error: %s') %
3531 str(e))
3532
3459 3533 def revlogcompressor(self, opts=None):
3460 3534 opts = opts or {}
3461 3535 return self.zstdrevlogcompressor(self._module,
3462 3536 level=opts.get('level', 3))
3463 3537
3464 3538 compengines.register(_zstdengine())
3465 3539
3466 3540 # convenient shortcut
3467 3541 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now