##// END OF EJS Templates
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes....
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes. We speed up 'findrenames' for the usecase when a user specifies they want a similarity of 100% by matching files by their exact SHA1 hash value. This reduces the number of comparisons required to find exact matches from O(n^2) to O(n). While it would be nice if we could just use mercurial's pre-calculated SHA1 hash for existing files, this hash includes the file's ancestor information making it unsuitable for our purposes. Instead, we calculate the hash of old content from scratch. The following benchmarks were taken on the current head of crew: addremove 100% similarity: rm -rf *; hg up -C; mv tests tests.new hg --time addremove -s100 --dry-run before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000) after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000) addremove 75% similarity: rm -rf *; hg up -C; mv tests tests.new; \ for i in tests.new/*; do echo x >> $i; done hg --time addremove -s75 --dry-run before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000) after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)

File last commit:

r10282:08a0f04b default
r11060:e6df0177 default
Show More
transaction.py
166 lines | 4.6 KiB | text/x-python | PythonLexer
# transaction.py - simple journalling scheme for mercurial
#
# This transaction scheme is intended to gracefully handle program
# errors and interruptions. More serious failures like system crashes
# can be recovered with an fsck-like tool. As the whole repository is
# effectively log-structured, this should amount to simply truncating
# anything that isn't referenced in the changelog.
#
# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from i18n import _
import os, errno
import error
def active(func):
def _active(self, *args, **kwds):
if self.count == 0:
raise error.Abort(_(
'cannot use transaction when it is already committed/aborted'))
return func(self, *args, **kwds)
return _active
def _playback(journal, report, opener, entries, unlink=True):
for f, o, ignore in entries:
if o or not unlink:
try:
opener(f, 'a').truncate(o)
except IOError:
report(_("failed to truncate %s\n") % f)
raise
else:
try:
fn = opener(f).name
os.unlink(fn)
except (IOError, OSError), inst:
if inst.errno != errno.ENOENT:
raise
os.unlink(journal)
class transaction(object):
def __init__(self, report, opener, journal, after=None, createmode=None):
self.count = 1
self.report = report
self.opener = opener
self.after = after
self.entries = []
self.map = {}
self.journal = journal
self._queue = []
self.file = open(self.journal, "w")
if createmode is not None:
os.chmod(self.journal, createmode & 0666)
def __del__(self):
if self.journal:
self._abort()
@active
def startgroup(self):
self._queue.append([])
@active
def endgroup(self):
q = self._queue.pop()
d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q])
self.entries.extend(q)
self.file.write(d)
self.file.flush()
@active
def add(self, file, offset, data=None):
if file in self.map:
return
if self._queue:
self._queue[-1].append((file, offset, data))
return
self.entries.append((file, offset, data))
self.map[file] = len(self.entries) - 1
# add enough data to the journal to do the truncate
self.file.write("%s\0%d\n" % (file, offset))
self.file.flush()
@active
def find(self, file):
if file in self.map:
return self.entries[self.map[file]]
return None
@active
def replace(self, file, offset, data=None):
'''
replace can only replace already committed entries
that are not pending in the queue
'''
if file not in self.map:
raise KeyError(file)
index = self.map[file]
self.entries[index] = (file, offset, data)
self.file.write("%s\0%d\n" % (file, offset))
self.file.flush()
@active
def nest(self):
self.count += 1
return self
def running(self):
return self.count > 0
@active
def close(self):
'''commit the transaction'''
self.count -= 1
if self.count != 0:
return
self.file.close()
self.entries = []
if self.after:
self.after()
if os.path.isfile(self.journal):
os.unlink(self.journal)
self.journal = None
@active
def abort(self):
'''abort the transaction (generally called on error, or when the
transaction is not explicitly committed before going out of
scope)'''
self._abort()
def _abort(self):
self.count = 0
self.file.close()
try:
if not self.entries:
if self.journal:
os.unlink(self.journal)
return
self.report(_("transaction abort!\n"))
try:
_playback(self.journal, self.report, self.opener,
self.entries, False)
self.report(_("rollback completed\n"))
except:
self.report(_("rollback failed - please run hg recover\n"))
finally:
self.journal = None
def rollback(opener, file, report):
entries = []
for l in open(file).readlines():
f, o = l.split('\0')
entries.append((f, int(o), None))
_playback(file, report, opener, entries)