##// END OF EJS Templates
revlog: optionally cache the full text when adding revisions...
revlog: optionally cache the full text when adding revisions revlog instances can cache the full text of a single revision. Typically the most recently read revision is cached. When adding a delta group via addgroup() and _addrevision(), the full text isn't always computed: sometimes only the passed in delta is sufficient for adding a new revision to the revlog. When writing the changelog from a delta group, the just-added full text revision is always read immediately after it is written because the changegroup code needs to extract the set of files from the entry. In other words, revision() is *always* being called and caching the full text of the just-added revision is guaranteed to result in a cache hit, making the cache worthwhile. This patch adds support to _addrevision() for always building and caching the full text. This option is currently only active when processing changelog entries from a changegroup. While the total number of revision() calls is the same, the location matters: buildtext() calls into revision() on the base revision when building the full text of the just-added revision. Since the previous revision's _addrevision() built the full text and the the previous revision is likely the base revision, this means that the base revision's full text is likely cached and can be used to compute the current full text from just a delta. No extra I/O required. The end result is the changelog isn't opened and read after adding every revision from a changegroup. On my 2013 MacBook Pro running OS X 10.10.5 from an SSD and Python 2.7, this patch impacted the time taken to apply ~262,000 changesets from a mozilla-central gzip bundle: before: ~43s after: ~32s ~25% reduction in changelog processing times. Not bad.

File last commit:

r26200:461e7b70 default
r26243:83629142 default
Show More
request.py
140 lines | 5.2 KiB | text/x-python | PythonLexer
# hgweb/request.py - An http request from either CGI or the standalone server.
#
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import socket, cgi, errno
from mercurial import util
from common import ErrorResponse, statusmessage, HTTP_NOT_MODIFIED
shortcuts = {
'cl': [('cmd', ['changelog']), ('rev', None)],
'sl': [('cmd', ['shortlog']), ('rev', None)],
'cs': [('cmd', ['changeset']), ('node', None)],
'f': [('cmd', ['file']), ('filenode', None)],
'fl': [('cmd', ['filelog']), ('filenode', None)],
'fd': [('cmd', ['filediff']), ('node', None)],
'fa': [('cmd', ['annotate']), ('filenode', None)],
'mf': [('cmd', ['manifest']), ('manifest', None)],
'ca': [('cmd', ['archive']), ('node', None)],
'tags': [('cmd', ['tags'])],
'tip': [('cmd', ['changeset']), ('node', ['tip'])],
'static': [('cmd', ['static']), ('file', None)]
}
def normalize(form):
# first expand the shortcuts
for k in shortcuts.iterkeys():
if k in form:
for name, value in shortcuts[k]:
if value is None:
value = form[k]
form[name] = value
del form[k]
# And strip the values
for k, v in form.iteritems():
form[k] = [i.strip() for i in v]
return form
class wsgirequest(object):
"""Higher-level API for a WSGI request.
WSGI applications are invoked with 2 arguments. They are used to
instantiate instances of this class, which provides higher-level APIs
for obtaining request parameters, writing HTTP output, etc.
"""
def __init__(self, wsgienv, start_response):
version = wsgienv['wsgi.version']
if (version < (1, 0)) or (version >= (2, 0)):
raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
% version)
self.inp = wsgienv['wsgi.input']
self.err = wsgienv['wsgi.errors']
self.threaded = wsgienv['wsgi.multithread']
self.multiprocess = wsgienv['wsgi.multiprocess']
self.run_once = wsgienv['wsgi.run_once']
self.env = wsgienv
self.form = normalize(cgi.parse(self.inp,
self.env,
keep_blank_values=1))
self._start_response = start_response
self.server_write = None
self.headers = []
def __iter__(self):
return iter([])
def read(self, count=-1):
return self.inp.read(count)
def drain(self):
'''need to read all data from request, httplib is half-duplex'''
length = int(self.env.get('CONTENT_LENGTH') or 0)
for s in util.filechunkiter(self.inp, limit=length):
pass
def respond(self, status, type, filename=None, body=None):
if self._start_response is not None:
self.headers.append(('Content-Type', type))
if filename:
filename = (filename.split('/')[-1]
.replace('\\', '\\\\').replace('"', '\\"'))
self.headers.append(('Content-Disposition',
'inline; filename="%s"' % filename))
if body is not None:
self.headers.append(('Content-Length', str(len(body))))
for k, v in self.headers:
if not isinstance(v, str):
raise TypeError('header value must be string: %r' % (v,))
if isinstance(status, ErrorResponse):
self.headers.extend(status.headers)
if status.code == HTTP_NOT_MODIFIED:
# RFC 2616 Section 10.3.5: 304 Not Modified has cases where
# it MUST NOT include any headers other than these and no
# body
self.headers = [(k, v) for (k, v) in self.headers if
k in ('Date', 'ETag', 'Expires',
'Cache-Control', 'Vary')]
status = statusmessage(status.code, str(status))
elif status == 200:
status = '200 Script output follows'
elif isinstance(status, int):
status = statusmessage(status)
self.server_write = self._start_response(status, self.headers)
self._start_response = None
self.headers = []
if body is not None:
self.write(body)
self.server_write = None
def write(self, thing):
if thing:
try:
self.server_write(thing)
except socket.error as inst:
if inst[0] != errno.ECONNRESET:
raise
def writelines(self, lines):
for line in lines:
self.write(line)
def flush(self):
return None
def close(self):
return None
def wsgiapplication(app_maker):
'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
can and should now be used as a WSGI application.'''
application = app_maker()
def run_wsgi(env, respond):
return application(env, respond)
return run_wsgi