upstream/mercurial-mirror Files · mercurial/hgweb/request.py

revlog: optionally cache the full text when adding revisions...

revlog: optionally cache the full text when adding revisions revlog instances can cache the full text of a single revision. Typically the most recently read revision is cached. When adding a delta group via addgroup() and _addrevision(), the full text isn't always computed: sometimes only the passed in delta is sufficient for adding a new revision to the revlog. When writing the changelog from a delta group, the just-added full text revision is always read immediately after it is written because the changegroup code needs to extract the set of files from the entry. In other words, revision() is *always* being called and caching the full text of the just-added revision is guaranteed to result in a cache hit, making the cache worthwhile. This patch adds support to _addrevision() for always building and caching the full text. This option is currently only active when processing changelog entries from a changegroup. While the total number of revision() calls is the same, the location matters: buildtext() calls into revision() on the base revision when building the full text of the just-added revision. Since the previous revision's _addrevision() built the full text and the the previous revision is likely the base revision, this means that the base revision's full text is likely cached and can be used to compute the current full text from just a delta. No extra I/O required. The end result is the changelog isn't opened and read after adding every revision from a changegroup. On my 2013 MacBook Pro running OS X 10.10.5 from an SSD and Python 2.7, this patch impacted the time taken to apply ~262,000 changesets from a mozilla-central gzip bundle: before: ~43s after: ~32s ~25% reduction in changelog processing times. Not bad.

timeless@mozdev.org - - Load All Authors

File last commit:

r26200:461e7b70 default


                r26243:83629142

default

Download file

             request.py
        
                    140 lines
            
             | 5.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / hgweb / request.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # hgweb/request.py - An http request from either CGI or the standalone server.

      #

      # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>

      # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import socket, cgi, errno

      from mercurial import util

      from common import ErrorResponse, statusmessage, HTTP_NOT_MODIFIED

      shortcuts = {

          'cl': [('cmd', ['changelog']), ('rev', None)],

          'sl': [('cmd', ['shortlog']), ('rev', None)],

          'cs': [('cmd', ['changeset']), ('node', None)],

          'f': [('cmd', ['file']), ('filenode', None)],

          'fl': [('cmd', ['filelog']), ('filenode', None)],

          'fd': [('cmd', ['filediff']), ('node', None)],

          'fa': [('cmd', ['annotate']), ('filenode', None)],

          'mf': [('cmd', ['manifest']), ('manifest', None)],

          'ca': [('cmd', ['archive']), ('node', None)],

          'tags': [('cmd', ['tags'])],

          'tip': [('cmd', ['changeset']), ('node', ['tip'])],

          'static': [('cmd', ['static']), ('file', None)]

      }

      def normalize(form):

          # first expand the shortcuts

          for k in shortcuts.iterkeys():

              if k in form:

                  for name, value in shortcuts[k]:

                      if value is None:

                          value = form[k]

                      form[name] = value

                  del form[k]

          # And strip the values

          for k, v in form.iteritems():

              form[k] = [i.strip() for i in v]

          return form

      class wsgirequest(object):

          """Higher-level API for a WSGI request.

          WSGI applications are invoked with 2 arguments. They are used to

          instantiate instances of this class, which provides higher-level APIs

          for obtaining request parameters, writing HTTP output, etc.

          """

          def __init__(self, wsgienv, start_response):

              version = wsgienv['wsgi.version']

              if (version < (1, 0)) or (version >= (2, 0)):

                  raise RuntimeError("Unknown and unsupported WSGI version %d.%d"

                                     % version)

              self.inp = wsgienv['wsgi.input']

              self.err = wsgienv['wsgi.errors']

              self.threaded = wsgienv['wsgi.multithread']

              self.multiprocess = wsgienv['wsgi.multiprocess']

              self.run_once = wsgienv['wsgi.run_once']

              self.env = wsgienv

              self.form = normalize(cgi.parse(self.inp,

                                              self.env,

                                              keep_blank_values=1))

              self._start_response = start_response

              self.server_write = None

              self.headers = []

          def __iter__(self):

              return iter([])

          def read(self, count=-1):

              return self.inp.read(count)

          def drain(self):

              '''need to read all data from request, httplib is half-duplex'''

              length = int(self.env.get('CONTENT_LENGTH') or 0)

              for s in util.filechunkiter(self.inp, limit=length):

                  pass

          def respond(self, status, type, filename=None, body=None):

              if self._start_response is not None:

                  self.headers.append(('Content-Type', type))

                  if filename:

                      filename = (filename.split('/')[-1]

                                  .replace('\\', '\\\\').replace('"', '\\"'))

                      self.headers.append(('Content-Disposition',

                                           'inline; filename="%s"' % filename))

                  if body is not None:

                      self.headers.append(('Content-Length', str(len(body))))

                  for k, v in self.headers:

                      if not isinstance(v, str):

                          raise TypeError('header value must be string: %r' % (v,))

                  if isinstance(status, ErrorResponse):

                      self.headers.extend(status.headers)

                      if status.code == HTTP_NOT_MODIFIED:

                          # RFC 2616 Section 10.3.5: 304 Not Modified has cases where

                          # it MUST NOT include any headers other than these and no

                          # body

                          self.headers = [(k, v) for (k, v) in self.headers if

                                          k in ('Date', 'ETag', 'Expires',

                                                'Cache-Control', 'Vary')]

                      status = statusmessage(status.code, str(status))

                  elif status == 200:

                      status = '200 Script output follows'

                  elif isinstance(status, int):

                      status = statusmessage(status)

                  self.server_write = self._start_response(status, self.headers)

                  self._start_response = None

                  self.headers = []

              if body is not None:

                  self.write(body)

                  self.server_write = None

          def write(self, thing):

              if thing:

                  try:

                      self.server_write(thing)

                  except socket.error as inst:

                      if inst[0] != errno.ECONNRESET:

                          raise

          def writelines(self, lines):

              for line in lines:

                  self.write(line)

          def flush(self):

              return None

          def close(self):

              return None

      def wsgiapplication(app_maker):

          '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

          can and should now be used as a WSGI application.'''

          application = app_maker()

          def run_wsgi(env, respond):

              return application(env, respond)

          return run_wsgi

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# hgweb/request.py - An http request from either CGI or the standalone server.
				#
				# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
				# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				import socket, cgi, errno
				from mercurial import util
				from common import ErrorResponse, statusmessage, HTTP_NOT_MODIFIED

				shortcuts = {
				'cl': [('cmd', ['changelog']), ('rev', None)],
				'sl': [('cmd', ['shortlog']), ('rev', None)],
				'cs': [('cmd', ['changeset']), ('node', None)],
				'f': [('cmd', ['file']), ('filenode', None)],
				'fl': [('cmd', ['filelog']), ('filenode', None)],
				'fd': [('cmd', ['filediff']), ('node', None)],
				'fa': [('cmd', ['annotate']), ('filenode', None)],
				'mf': [('cmd', ['manifest']), ('manifest', None)],
				'ca': [('cmd', ['archive']), ('node', None)],
				'tags': [('cmd', ['tags'])],
				'tip': [('cmd', ['changeset']), ('node', ['tip'])],
				'static': [('cmd', ['static']), ('file', None)]
				}

				def normalize(form):
				# first expand the shortcuts
				for k in shortcuts.iterkeys():
				if k in form:
				for name, value in shortcuts[k]:
				if value is None:
				value = form[k]
				form[name] = value
				del form[k]
				# And strip the values
				for k, v in form.iteritems():
				form[k] = [i.strip() for i in v]
				return form

				class wsgirequest(object):
				"""Higher-level API for a WSGI request.

				WSGI applications are invoked with 2 arguments. They are used to
				instantiate instances of this class, which provides higher-level APIs
				for obtaining request parameters, writing HTTP output, etc.
				"""
				def __init__(self, wsgienv, start_response):
				version = wsgienv['wsgi.version']
				if (version < (1, 0)) or (version >= (2, 0)):
				raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
				% version)
				self.inp = wsgienv['wsgi.input']
				self.err = wsgienv['wsgi.errors']
				self.threaded = wsgienv['wsgi.multithread']
				self.multiprocess = wsgienv['wsgi.multiprocess']
				self.run_once = wsgienv['wsgi.run_once']
				self.env = wsgienv
				self.form = normalize(cgi.parse(self.inp,
				self.env,
				keep_blank_values=1))
				self._start_response = start_response
				self.server_write = None
				self.headers = []

				def __iter__(self):
				return iter([])

				def read(self, count=-1):
				return self.inp.read(count)

				def drain(self):
				'''need to read all data from request, httplib is half-duplex'''
				length = int(self.env.get('CONTENT_LENGTH') or 0)
				for s in util.filechunkiter(self.inp, limit=length):
				pass

				def respond(self, status, type, filename=None, body=None):
				if self._start_response is not None:
				self.headers.append(('Content-Type', type))
				if filename:
				filename = (filename.split('/')[-1]
				.replace('\\', '\\\\').replace('"', '\\"'))
				self.headers.append(('Content-Disposition',
				'inline; filename="%s"' % filename))
				if body is not None:
				self.headers.append(('Content-Length', str(len(body))))

				for k, v in self.headers:
				if not isinstance(v, str):
				raise TypeError('header value must be string: %r' % (v,))

				if isinstance(status, ErrorResponse):
				self.headers.extend(status.headers)
				if status.code == HTTP_NOT_MODIFIED:
				# RFC 2616 Section 10.3.5: 304 Not Modified has cases where
				# it MUST NOT include any headers other than these and no
				# body
				self.headers = [(k, v) for (k, v) in self.headers if
				k in ('Date', 'ETag', 'Expires',
				'Cache-Control', 'Vary')]
				status = statusmessage(status.code, str(status))
				elif status == 200:
				status = '200 Script output follows'
				elif isinstance(status, int):
				status = statusmessage(status)

				self.server_write = self._start_response(status, self.headers)
				self._start_response = None
				self.headers = []
				if body is not None:
				self.write(body)
				self.server_write = None

				def write(self, thing):
				if thing:
				try:
				self.server_write(thing)
				except socket.error as inst:
				if inst[0] != errno.ECONNRESET:
				raise

				def writelines(self, lines):
				for line in lines:
				self.write(line)

				def flush(self):
				return None

				def close(self):
				return None

				def wsgiapplication(app_maker):
				'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
				can and should now be used as a WSGI application.'''
				application = app_maker()
				def run_wsgi(env, respond):
				return application(env, respond)
				return run_wsgi