##// END OF EJS Templates
util: improve iterfile so it chooses code path wisely...
Jun Wu -
r30418:1156ec81 default
parent child Browse files
Show More
@@ -24,10 +24,12 b' import gc'
24 import hashlib
24 import hashlib
25 import imp
25 import imp
26 import os
26 import os
27 import platform as pyplatform
27 import re as remod
28 import re as remod
28 import shutil
29 import shutil
29 import signal
30 import signal
30 import socket
31 import socket
32 import stat
31 import string
33 import string
32 import subprocess
34 import subprocess
33 import sys
35 import sys
@@ -2208,10 +2210,77 b" def wrap(line, width, initindent='', han"
2208 subsequent_indent=hangindent)
2210 subsequent_indent=hangindent)
2209 return wrapper.fill(line).encode(encoding.encoding)
2211 return wrapper.fill(line).encode(encoding.encoding)
2210
2212
2211 def iterfile(fp):
2213 if (pyplatform.python_implementation() == 'CPython' and
2212 """like fp.__iter__ but does not have issues with EINTR. Python 2.7.12 is
2214 sys.version_info < (3, 0)):
2213 known to have such issues."""
2215 # There is an issue in CPython that some IO methods do not handle EINTR
2214 return iter(fp.readline, '')
2216 # correctly. The following table shows what CPython version (and functions)
2217 # are affected (buggy: has the EINTR bug, okay: otherwise):
2218 #
2219 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2220 # --------------------------------------------------
2221 # fp.__iter__ | buggy | buggy | okay
2222 # fp.read* | buggy | okay [1] | okay
2223 #
2224 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2225 #
2226 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2227 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2228 #
2229 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2230 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2231 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2232 # fp.__iter__ but not other fp.read* methods.
2233 #
2234 # On modern systems like Linux, the "read" syscall cannot be interrupted
2235 # when reading "fast" files like on-disk files. So the EINTR issue only
2236 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2237 # files approximately as "fast" files and use the fast (unsafe) code path,
2238 # to minimize the performance impact.
2239 if sys.version_info >= (2, 7, 4):
2240 # fp.readline deals with EINTR correctly, use it as a workaround.
2241 def _safeiterfile(fp):
2242 return iter(fp.readline, '')
2243 else:
2244 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2245 # note: this may block longer than necessary because of bufsize.
2246 def _safeiterfile(fp, bufsize=4096):
2247 fd = fp.fileno()
2248 line = ''
2249 while True:
2250 try:
2251 buf = os.read(fd, bufsize)
2252 except OSError as ex:
2253 # os.read only raises EINTR before any data is read
2254 if ex.errno == errno.EINTR:
2255 continue
2256 else:
2257 raise
2258 line += buf
2259 if '\n' in buf:
2260 splitted = line.splitlines(True)
2261 line = ''
2262 for l in splitted:
2263 if l[-1] == '\n':
2264 yield l
2265 else:
2266 line = l
2267 if not buf:
2268 break
2269 if line:
2270 yield line
2271
2272 def iterfile(fp):
2273 fastpath = True
2274 if type(fp) is file:
2275 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2276 if fastpath:
2277 return fp
2278 else:
2279 return _safeiterfile(fp)
2280 else:
2281 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2282 def iterfile(fp):
2283 return fp
2215
2284
2216 def iterlines(iterator):
2285 def iterlines(iterator):
2217 for chunk in iterator:
2286 for chunk in iterator:
General Comments 0
You need to be logged in to leave comments. Login now