upstream/mercurial-mirror Files · mercurial/hgweb/wsgicgi.py

revlog: optionally cache the full text when adding revisions...

revlog: optionally cache the full text when adding revisions revlog instances can cache the full text of a single revision. Typically the most recently read revision is cached. When adding a delta group via addgroup() and _addrevision(), the full text isn't always computed: sometimes only the passed in delta is sufficient for adding a new revision to the revlog. When writing the changelog from a delta group, the just-added full text revision is always read immediately after it is written because the changegroup code needs to extract the set of files from the entry. In other words, revision() is *always* being called and caching the full text of the just-added revision is guaranteed to result in a cache hit, making the cache worthwhile. This patch adds support to _addrevision() for always building and caching the full text. This option is currently only active when processing changelog entries from a changegroup. While the total number of revision() calls is the same, the location matters: buildtext() calls into revision() on the base revision when building the full text of the just-added revision. Since the previous revision's _addrevision() built the full text and the the previous revision is likely the base revision, this means that the base revision's full text is likely cached and can be used to compute the current full text from just a delta. No extra I/O required. The end result is the changelog isn't opened and read after adding every revision from a changegroup. On my 2013 MacBook Pro running OS X 10.10.5 from an SSD and Python 2.7, this patch impacted the time taken to apply ~262,000 changesets from a mozilla-central gzip bundle: before: ~43s after: ~32s ~25% reduction in changelog processing times. Not bad.

Mads Kiilerich - - Load All Authors

File last commit:

r18552:e8efcc8f stable


                r26243:83629142

default

Download file

             wsgicgi.py
        
                    83 lines
            
             | 2.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / hgweb / wsgicgi.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # hgweb/wsgicgi.py - CGI->WSGI translator

      #

      # Copyright 2006 Eric Hopper <hopper@omnifarious.org>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      #

      # This was originally copied from the public domain code at

      # http://www.python.org/dev/peps/pep-0333/#the-server-gateway-side

      import os, sys

      from mercurial import util

      from mercurial.hgweb import common

      def launch(application):

          util.setbinary(sys.stdin)

          util.setbinary(sys.stdout)

          environ = dict(os.environ.iteritems())

          environ.setdefault('PATH_INFO', '')

          if environ.get('SERVER_SOFTWARE', '').startswith('Microsoft-IIS'):

              # IIS includes script_name in PATH_INFO

              scriptname = environ['SCRIPT_NAME']

              if environ['PATH_INFO'].startswith(scriptname):

                  environ['PATH_INFO'] = environ['PATH_INFO'][len(scriptname):]

          stdin = sys.stdin

          if environ.get('HTTP_EXPECT', '').lower() == '100-continue':

              stdin = common.continuereader(stdin, sys.stdout.write)

          environ['wsgi.input'] = stdin

          environ['wsgi.errors'] = sys.stderr

          environ['wsgi.version'] = (1, 0)

          environ['wsgi.multithread'] = False

          environ['wsgi.multiprocess'] = True

          environ['wsgi.run_once'] = True

          if environ.get('HTTPS', 'off').lower() in ('on', '1', 'yes'):

              environ['wsgi.url_scheme'] = 'https'

          else:

              environ['wsgi.url_scheme'] = 'http'

          headers_set = []

          headers_sent = []

          out = sys.stdout

          def write(data):

              if not headers_set:

                  raise AssertionError("write() before start_response()")

              elif not headers_sent:

                  # Before the first output, send the stored headers

                  status, response_headers = headers_sent[:] = headers_set

                  out.write('Status: %s\r\n' % status)

                  for header in response_headers:

                      out.write('%s: %s\r\n' % header)

                  out.write('\r\n')

              out.write(data)

              out.flush()

          def start_response(status, response_headers, exc_info=None):

              if exc_info:

                  try:

                      if headers_sent:

                          # Re-raise original exception if headers sent

                          raise exc_info[0](exc_info[1], exc_info[2])

                  finally:

                      exc_info = None     # avoid dangling circular ref

              elif headers_set:

                  raise AssertionError("Headers already set!")

              headers_set[:] = [status, response_headers]

              return write

          content = application(environ, start_response)

          try:

              for chunk in content:

                  write(chunk)

              if not headers_sent:

                  write('')   # send headers now if body was empty

          finally:

              getattr(content, 'close', lambda : None)()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# hgweb/wsgicgi.py - CGI->WSGI translator
				#
				# Copyright 2006 Eric Hopper <hopper@omnifarious.org>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.
				#
				# This was originally copied from the public domain code at
				# http://www.python.org/dev/peps/pep-0333/#the-server-gateway-side

				import os, sys
				from mercurial import util
				from mercurial.hgweb import common

				def launch(application):
				util.setbinary(sys.stdin)
				util.setbinary(sys.stdout)

				environ = dict(os.environ.iteritems())
				environ.setdefault('PATH_INFO', '')
				if environ.get('SERVER_SOFTWARE', '').startswith('Microsoft-IIS'):
				# IIS includes script_name in PATH_INFO
				scriptname = environ['SCRIPT_NAME']
				if environ['PATH_INFO'].startswith(scriptname):
				environ['PATH_INFO'] = environ['PATH_INFO'][len(scriptname):]

				stdin = sys.stdin
				if environ.get('HTTP_EXPECT', '').lower() == '100-continue':
				stdin = common.continuereader(stdin, sys.stdout.write)

				environ['wsgi.input'] = stdin
				environ['wsgi.errors'] = sys.stderr
				environ['wsgi.version'] = (1, 0)
				environ['wsgi.multithread'] = False
				environ['wsgi.multiprocess'] = True
				environ['wsgi.run_once'] = True

				if environ.get('HTTPS', 'off').lower() in ('on', '1', 'yes'):
				environ['wsgi.url_scheme'] = 'https'
				else:
				environ['wsgi.url_scheme'] = 'http'

				headers_set = []
				headers_sent = []
				out = sys.stdout

				def write(data):
				if not headers_set:
				raise AssertionError("write() before start_response()")

				elif not headers_sent:
				# Before the first output, send the stored headers
				status, response_headers = headers_sent[:] = headers_set
				out.write('Status: %s\r\n' % status)
				for header in response_headers:
				out.write('%s: %s\r\n' % header)
				out.write('\r\n')

				out.write(data)
				out.flush()

				def start_response(status, response_headers, exc_info=None):
				if exc_info:
				try:
				if headers_sent:
				# Re-raise original exception if headers sent
				raise exc_info[0](exc_info[1], exc_info[2])
				finally:
				exc_info = None # avoid dangling circular ref
				elif headers_set:
				raise AssertionError("Headers already set!")

				headers_set[:] = [status, response_headers]
				return write

				content = application(environ, start_response)
				try:
				for chunk in content:
				write(chunk)
				if not headers_sent:
				write('') # send headers now if body was empty
				finally:
				getattr(content, 'close', lambda : None)()