diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -24,10 +24,12 @@ import gc import hashlib import imp import os +import platform as pyplatform import re as remod import shutil import signal import socket +import stat import string import subprocess import sys @@ -2208,10 +2210,77 @@ def wrap(line, width, initindent='', han subsequent_indent=hangindent) return wrapper.fill(line).encode(encoding.encoding) -def iterfile(fp): - """like fp.__iter__ but does not have issues with EINTR. Python 2.7.12 is - known to have such issues.""" - return iter(fp.readline, '') +if (pyplatform.python_implementation() == 'CPython' and + sys.version_info < (3, 0)): + # There is an issue in CPython that some IO methods do not handle EINTR + # correctly. The following table shows what CPython version (and functions) + # are affected (buggy: has the EINTR bug, okay: otherwise): + # + # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0 + # -------------------------------------------------- + # fp.__iter__ | buggy | buggy | okay + # fp.read* | buggy | okay [1] | okay + # + # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo. + # + # Here we workaround the EINTR issue for fileobj.__iter__. Other methods + # like "read*" are ignored for now, as Python < 2.7.4 is a minority. + # + # Although we can workaround the EINTR issue for fp.__iter__, it is slower: + # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in + # CPython 2, because CPython 2 maintains an internal readahead buffer for + # fp.__iter__ but not other fp.read* methods. + # + # On modern systems like Linux, the "read" syscall cannot be interrupted + # when reading "fast" files like on-disk files. So the EINTR issue only + # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG) + # files approximately as "fast" files and use the fast (unsafe) code path, + # to minimize the performance impact. + if sys.version_info >= (2, 7, 4): + # fp.readline deals with EINTR correctly, use it as a workaround. + def _safeiterfile(fp): + return iter(fp.readline, '') + else: + # fp.read* are broken too, manually deal with EINTR in a stupid way. + # note: this may block longer than necessary because of bufsize. + def _safeiterfile(fp, bufsize=4096): + fd = fp.fileno() + line = '' + while True: + try: + buf = os.read(fd, bufsize) + except OSError as ex: + # os.read only raises EINTR before any data is read + if ex.errno == errno.EINTR: + continue + else: + raise + line += buf + if '\n' in buf: + splitted = line.splitlines(True) + line = '' + for l in splitted: + if l[-1] == '\n': + yield l + else: + line = l + if not buf: + break + if line: + yield line + + def iterfile(fp): + fastpath = True + if type(fp) is file: + fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode) + if fastpath: + return fp + else: + return _safeiterfile(fp) +else: + # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed. + def iterfile(fp): + return fp def iterlines(iterator): for chunk in iterator: