##// END OF EJS Templates
match: sort patterns before compiling them into a regex...
match: sort patterns before compiling them into a regex While investigating cripping performance for `hg cat` in some context, I discovered that, for large inputs, building a regex from out of order patterns result may result in a *much* slower regex and a much slower associated matcher's performance. So we are now sorting the patterns to help the regex engine. There is more to the story as we rely on regexp more than we should. See the next changeset for details. Benchmarks ========== In the following benchmark we are comparing the `hg cat` and `hg files` run time when matching against the full list of files in the repository. They are run: - without the rust extensions - with the standard python enfine (so without re2) sort vs non-sorted - Before this changeset (3f5137543773) --------------------------------------------------------- ###### hg files ############################################################### ### mercurial-2018-08-01-zstd-sparse-revlog sorted: 0.230092 seconds shuffled: 0.234235 seconds (+1.80%) ### pypy-2018-08-01-zstd-sparse-revlog sorted: 0.613567 seconds shuffled: 0.801880 seconds (+30.69%) ### mozilla-central-2018-08-01-zstd-sparse-revlog sorted: 62.474221 seconds shuffled: 1364.180218 seconds (+2083.59%) ### netbeans-2018-08-01-zstd-sparse-revlog sorted: 21.541828 seconds shuffled: 172.759857 seconds (+701.97%) ###### hg cat ################################################################# ### mercurial-2018-08-01-zstd-sparse-revlog sorted: 0.764407 seconds shuffled: 0.768924 seconds ### pypy-2018-08-01-zstd-sparse-revlog sorted: 2.065220 seconds shuffled: 2.276388 seconds (+10.22%) ### netbeans-2018-08-01-zstd-sparse-revlog sorted: 40.967983 seconds shuffled: 216.388709 seconds (+428.19%) ### mozilla-central-2018-08-01-zstd-sparse-revlog sorted: 105.228510 seconds shuffled: 1448.722784 seconds (+1276.74%) sort vs non-sorted - With this changeset ---------------------------------------- ###### hg files ############################################################### ### mercurial-2018-08-01-zstd-sparse-revlog all-list-pattern-sorted: 0.230069 all-list-pattern-shuffled: 0.231165 ### pypy-2018-08-01-zstd-sparse-revlog all-list-pattern-sorted: 0.616799 all-list-pattern-shuffled: 0.616393 ### netbeans-2018-08-01-zstd-sparse-revlog all-list-pattern-sorted: 21.586773 all-list-pattern-shuffled: 21.908197 ### mozilla-central-2018-08-01-zstd-sparse-revlog all-list-pattern-sorted: 61.279490 all-list-pattern-shuffled: 62.473549 ###### hg cat ################################################################# ### mercurial-2018-08-01-zstd-sparse-revlog sorted: 0.763883 seconds shuffled: 0.765848 seconds ### pypy-2018-08-01-zstd-sparse-revlog sorted: 2.070498 seconds shuffled: 2.069197 seconds ### netbeans-2018-08-01-zstd-sparse-revlog sorted: 41.392423 seconds shuffled: 41.648689 seconds ### mozilla-central-2018-08-01-zstd-sparse-revlog sorted: 103.315670 seconds shuffled: 104.369358 seconds

File last commit:

r49768:f254fc73 default
r51285:47686726 stable
Show More
wsgicgi.py
93 lines | 2.9 KiB | text/x-python | PythonLexer
# hgweb/wsgicgi.py - CGI->WSGI translator
#
# Copyright 2006 Eric Hopper <hopper@omnifarious.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# This was originally copied from the public domain code at
# http://www.python.org/dev/peps/pep-0333/#the-server-gateway-side
import os
from ..pycompat import getattr
from .. import pycompat
from ..utils import procutil
from . import common
def launch(application):
procutil.setbinary(procutil.stdin)
procutil.setbinary(procutil.stdout)
environ = dict(os.environ.items()) # re-exports
environ.setdefault('PATH_INFO', '')
if environ.get('SERVER_SOFTWARE', '').startswith('Microsoft-IIS'):
# IIS includes script_name in PATH_INFO
scriptname = environ['SCRIPT_NAME']
if environ['PATH_INFO'].startswith(scriptname):
environ['PATH_INFO'] = environ['PATH_INFO'][len(scriptname) :]
stdin = procutil.stdin
if environ.get('HTTP_EXPECT', '').lower() == '100-continue':
stdin = common.continuereader(stdin, procutil.stdout.write)
environ['wsgi.input'] = stdin
environ['wsgi.errors'] = procutil.stderr
environ['wsgi.version'] = (1, 0)
environ['wsgi.multithread'] = False
environ['wsgi.multiprocess'] = True
environ['wsgi.run_once'] = True
if environ.get('HTTPS', 'off').lower() in ('on', '1', 'yes'):
environ['wsgi.url_scheme'] = 'https'
else:
environ['wsgi.url_scheme'] = 'http'
headers_set = []
headers_sent = []
out = procutil.stdout
def write(data):
if not headers_set:
raise AssertionError(b"write() before start_response()")
elif not headers_sent:
# Before the first output, send the stored headers
status, response_headers = headers_sent[:] = headers_set
out.write(b'Status: %s\r\n' % pycompat.bytesurl(status))
for hk, hv in response_headers:
out.write(
b'%s: %s\r\n'
% (pycompat.bytesurl(hk), pycompat.bytesurl(hv))
)
out.write(b'\r\n')
out.write(data)
out.flush()
def start_response(status, response_headers, exc_info=None):
if exc_info:
try:
if headers_sent:
# Re-raise original exception if headers sent
raise exc_info[0](exc_info[1], exc_info[2])
finally:
del exc_info # avoid dangling circular ref
elif headers_set:
raise AssertionError(b"Headers already set!")
headers_set[:] = [status, response_headers]
return write
content = application(environ, start_response)
try:
for chunk in content:
write(chunk)
if not headers_sent:
write(b'') # send headers now if body was empty
finally:
getattr(content, 'close', lambda: None)()