##// END OF EJS Templates
bdiff: replace hash algorithm...
bdiff: replace hash algorithm This patch replaces lyhash with the hash algorithm used by diffutils. The algorithm has its origins in Git commit 2e9d1410, which is all the way back from 1992. The license header in the code at that revision in GPL v2. I have not performed an extensive analysis of the distribution (and therefore buckets) of hash output. However, `hg perfbdiff` gives some clear wins. I'd like to think that if it is good enough for diffutils it is good enough for us? From the mozilla-unified repository: $ perfbdiff -m 3041e4d59df2 ! wall 0.053271 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) ! wall 0.035827 comb 0.040000 user 0.040000 sys 0.000000 (best of 100) $ perfbdiff 0e9928989e9c --alldata --count 100 ! wall 6.204277 comb 6.200000 user 6.200000 sys 0.000000 (best of 3) ! wall 4.309710 comb 4.300000 user 4.300000 sys 0.000000 (best of 3) From the hg repo: $ perfbdiff 35000 --alldata --count 1000 ! wall 0.660358 comb 0.660000 user 0.660000 sys 0.000000 (best of 15) ! wall 0.534092 comb 0.530000 user 0.530000 sys 0.000000 (best of 19) Looking at the generated assembly and statistical profiler output from the kernel level, I believe there is room to make this function even faster. Namely, we're still consuming data character by character instead of at the word level. This translates to more loop iterations and more instructions. At this juncture though, the real performance killer is that we're hashing every line. We should get a significant speedup if we change the algorithm to find the longest prefix, longest suffix, treat those as single "lines" and then only do the line splitting and hashing on the parts that are different. That will require a lot of C code, however. I'm optimistic this approach could result in a ~2x speedup.

File last commit:

r29668:09a5699c stable
r30318:e1d6aa0e default
Show More
transport.py
137 lines | 5.2 KiB | text/x-python | PythonLexer
# -*- coding: utf-8 -*-
# Copyright (C) 2007 Daniel Holth <dholth@fastmail.fm>
# This is a stripped-down version of the original bzr-svn transport.py,
# Copyright (C) 2006 Jelmer Vernooij <jelmer@samba.org>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import svn.client
import svn.core
import svn.ra
Pool = svn.core.Pool
SubversionException = svn.core.SubversionException
from mercurial import (
util,
)
# Some older versions of the Python bindings need to be
# explicitly initialized. But what we want to do probably
# won't work worth a darn against those libraries anyway!
svn.ra.initialize()
svn_config = None
def _create_auth_baton(pool):
"""Create a Subversion authentication baton. """
import svn.client
# Give the client context baton a suite of authentication
# providers.h
providers = [
svn.client.get_simple_provider(pool),
svn.client.get_username_provider(pool),
svn.client.get_ssl_client_cert_file_provider(pool),
svn.client.get_ssl_client_cert_pw_file_provider(pool),
svn.client.get_ssl_server_trust_file_provider(pool),
]
# Platform-dependent authentication methods
getprovider = getattr(svn.core, 'svn_auth_get_platform_specific_provider',
None)
if getprovider:
# Available in svn >= 1.6
for name in ('gnome_keyring', 'keychain', 'kwallet', 'windows'):
for type in ('simple', 'ssl_client_cert_pw', 'ssl_server_trust'):
p = getprovider(name, type, pool)
if p:
providers.append(p)
else:
if util.safehasattr(svn.client, 'get_windows_simple_provider'):
providers.append(svn.client.get_windows_simple_provider(pool))
return svn.core.svn_auth_open(providers, pool)
class NotBranchError(SubversionException):
pass
class SvnRaTransport(object):
"""
Open an ra connection to a Subversion repository.
"""
def __init__(self, url="", ra=None):
self.pool = Pool()
self.svn_url = url
self.username = ''
self.password = ''
# Only Subversion 1.4 has reparent()
if ra is None or not util.safehasattr(svn.ra, 'reparent'):
self.client = svn.client.create_context(self.pool)
ab = _create_auth_baton(self.pool)
if False:
svn.core.svn_auth_set_parameter(
ab, svn.core.SVN_AUTH_PARAM_DEFAULT_USERNAME, self.username)
svn.core.svn_auth_set_parameter(
ab, svn.core.SVN_AUTH_PARAM_DEFAULT_PASSWORD, self.password)
self.client.auth_baton = ab
global svn_config
if svn_config is None:
svn_config = svn.core.svn_config_get_config(None)
self.client.config = svn_config
try:
self.ra = svn.client.open_ra_session(
self.svn_url,
self.client, self.pool)
except SubversionException as xxx_todo_changeme:
(inst, num) = xxx_todo_changeme.args
if num in (svn.core.SVN_ERR_RA_ILLEGAL_URL,
svn.core.SVN_ERR_RA_LOCAL_REPOS_OPEN_FAILED,
svn.core.SVN_ERR_BAD_URL):
raise NotBranchError(url)
raise
else:
self.ra = ra
svn.ra.reparent(self.ra, self.svn_url.encode('utf8'))
class Reporter(object):
def __init__(self, reporter_data):
self._reporter, self._baton = reporter_data
def set_path(self, path, revnum, start_empty, lock_token, pool=None):
svn.ra.reporter2_invoke_set_path(self._reporter, self._baton,
path, revnum, start_empty, lock_token, pool)
def delete_path(self, path, pool=None):
svn.ra.reporter2_invoke_delete_path(self._reporter, self._baton,
path, pool)
def link_path(self, path, url, revision, start_empty, lock_token,
pool=None):
svn.ra.reporter2_invoke_link_path(self._reporter, self._baton,
path, url, revision, start_empty, lock_token,
pool)
def finish_report(self, pool=None):
svn.ra.reporter2_invoke_finish_report(self._reporter,
self._baton, pool)
def abort_report(self, pool=None):
svn.ra.reporter2_invoke_abort_report(self._reporter,
self._baton, pool)
def do_update(self, revnum, path, *args, **kwargs):
return self.Reporter(svn.ra.do_update(self.ra, revnum, path,
*args, **kwargs))