##// END OF EJS Templates
localrepo.commit: normalize commit message even for rawcommit....
localrepo.commit: normalize commit message even for rawcommit. This normalization consists of: - stripping trailing whitespace - always using "\n" as the line separator I think the main reason rawcommit was skipping this normalization was an attempt to preserve hashes during an hg->hg conversion. While this is a nice goal, it's not particularly interesting in practice. Since SHA-1 is so strong, the only safe way to do it is to have absolutely identical revisions. But: - if the original revision was created with a recent version of hg, the commit message will be the same, with or without that normalization - if it was created with an ancient version of hg that didn't do any normalization, even if the commit message is identical, the file list in the changelog is likely to be different (e.g. no removed files), and there were some old issues with e.g. extra file merging, which will end up changing the hash anyway - in any case, if one *really* has to preserve hashes, it's easier (and faster) to fake a partial conversion using something like: hg clone -U -r rev orig-repo new-repo hg -R new-repo log --template '#node# #node#\n' > new-repo/.hg/shamap Additionally, we've had some reports of problems arising from this lack of normalization - e.g. issue871, and a user that was wondering why hg export/hg import was not preserving hashes when there was nothing unusual going on (it was just import doing the normalization that had been skipped). This also means that it's even more unlikely to get identical revisions when going $VCS->hg->$VCS.

File last commit:

r6212:e75aab65 default
r6254:3667b6e4 default
Show More
changelog.py
192 lines | 6.3 KiB | text/x-python | PythonLexer
mpm@selenic.com
changelog: adjust imports, comment
r1095 # changelog.py - changelog class for mercurial
mpm@selenic.com
Break apart hg.py...
r1089 #
Thomas Arendsen Hein
Updated copyright notices and add "and others" to "hg version"
r4635 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
mpm@selenic.com
Break apart hg.py...
r1089 #
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
Joel Rosdahl
Expand import * to allow Pyflakes to find problems
r6211 from node import bin, hex, nullid
from revlog import revlog
Joel Rosdahl
Remove unused imports
r6212 import util
mpm@selenic.com
Break apart hg.py...
r1089
Benoit Boissinot
[extendedchangelog] encode/decode function...
r3232 def _string_escape(text):
"""
>>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
>>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
>>> s
'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
>>> res = _string_escape(s)
Matt Mackall
changelog: inline trivial call for extra data unescaping
r5745 >>> s == res.decode('string_escape')
Benoit Boissinot
[extendedchangelog] encode/decode function...
r3232 True
"""
# subset of the string_escape codec
text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
return text.replace('\0', '\\0')
Matt Mackall
restructure changelog file appending...
r4261 class appender:
'''the changelog index must be update last on disk, so we use this class
to delay writes to it'''
def __init__(self, fp, buf):
self.data = buf
self.fp = fp
self.offset = fp.tell()
self.size = util.fstat(fp).st_size
def end(self):
return self.size + len("".join(self.data))
def tell(self):
return self.offset
def flush(self):
pass
def close(self):
Benoit Boissinot
fix bogus close spotted by pychecker (no close() in global scope)
r4961 self.fp.close()
Matt Mackall
restructure changelog file appending...
r4261
def seek(self, offset, whence=0):
'''virtual file offset spans real file and data'''
if whence == 0:
self.offset = offset
elif whence == 1:
self.offset += offset
elif whence == 2:
self.offset = self.end() + offset
if self.offset < self.size:
self.fp.seek(self.offset)
def read(self, count=-1):
'''only trick here is reads that span real file and data'''
ret = ""
if self.offset < self.size:
s = self.fp.read(count)
ret = s
self.offset += len(s)
if count > 0:
count -= len(s)
if count != 0:
doff = self.offset - self.size
self.data.insert(0, "".join(self.data))
del self.data[1:]
s = self.data[0][doff:doff+count]
self.offset += len(s)
ret += s
return ret
def write(self, s):
Matt Mackall
revlog: fix caching of buffer objects
r5450 self.data.append(str(s))
Matt Mackall
restructure changelog file appending...
r4261 self.offset += len(s)
mpm@selenic.com
Break apart hg.py...
r1089 class changelog(revlog):
Matt Mackall
revlog: simplify revlog version handling...
r4258 def __init__(self, opener):
revlog.__init__(self, opener, "00changelog.i")
mpm@selenic.com
Break apart hg.py...
r1089
Matt Mackall
restructure changelog file appending...
r4261 def delayupdate(self):
"delay visibility of index updates to other readers"
self._realopener = self.opener
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 self.opener = self._delayopener
self._delaycount = self.count()
Matt Mackall
restructure changelog file appending...
r4261 self._delaybuf = []
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 self._delayname = None
Matt Mackall
restructure changelog file appending...
r4261
def finalize(self, tr):
"finalize index updates"
self.opener = self._realopener
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 # move redirected index data back into place
if self._delayname:
util.rename(self._delayname + ".a", self._delayname)
elif self._delaybuf:
Matt Mackall
restructure changelog file appending...
r4261 fp = self.opener(self.indexfile, 'a')
fp.write("".join(self._delaybuf))
fp.close()
del self._delaybuf
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 # split when we're done
Matt Mackall
restructure changelog file appending...
r4261 self.checkinlinesize(tr)
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 def _delayopener(self, name, mode='r'):
Matt Mackall
restructure changelog file appending...
r4261 fp = self._realopener(name, mode)
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 # only divert the index
Matt Mackall
restructure changelog file appending...
r4261 if not name == self.indexfile:
return fp
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 # if we're doing an initial clone, divert to another file
if self._delaycount == 0:
self._delayname = fp.name
return self._realopener(name + ".a", mode)
# otherwise, divert to memory
Matt Mackall
restructure changelog file appending...
r4261 return appender(fp, self._delaybuf)
def checkinlinesize(self, tr, fp=None):
Matt Mackall
changelog: optimize delayed updates for clone vs pull...
r4269 if self.opener == self._delayopener:
Matt Mackall
restructure changelog file appending...
r4261 return
return revlog.checkinlinesize(self, tr, fp)
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 def decode_extra(self, text):
extra = {}
for l in text.split('\0'):
Matt Mackall
changelog: inline trivial call for extra data unescaping
r5745 if l:
Matt Mackall
changelog: fix decoding of extra...
r5791 k, v = l.decode('string_escape').split(':', 1)
Matt Mackall
changelog: inline trivial call for extra data unescaping
r5745 extra[k] = v
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 return extra
def encode_extra(self, d):
Brendan Cully
Sort changelog extra dict to avoid possible nondeterminism
r4847 # keys must be sorted to produce a deterministic changelog entry
Brendan Cully
python 2.3 does not have sorted
r4848 keys = d.keys()
keys.sort()
items = [_string_escape('%s:%s' % (k, d[k])) for k in keys]
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 return "\0".join(items)
Matt Mackall
changelog: remove extract function
r5744 def read(self, node):
Benoit Boissinot
document changelog format
r3077 """
format used:
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 nodeid\n : manifest node in ascii
user\n : user, no \n or \r allowed
time tz extra\n : date (time is int or float, timezone is int)
: extra is metadatas, encoded and separated by '\0'
: older versions ignore it
files\n\n : files modified by the cset, no \n or \r allowed
(.*) : comment (free text, ideally utf-8)
changelog v0 doesn't use extra
Benoit Boissinot
document changelog format
r3077 """
Matt Mackall
changelog: remove extract function
r5744 text = self.revision(node)
mpm@selenic.com
Break apart hg.py...
r1089 if not text:
Alexis S. L. Carvalho
"default" is the default branch name
r4176 return (nullid, "", (0, 0), [], "", {'branch': 'default'})
mpm@selenic.com
Break apart hg.py...
r1089 last = text.index("\n\n")
Matt Mackall
Handle transcoding of username and description in changelog
r3771 desc = util.tolocal(text[last + 2:])
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 l = text[:last].split('\n')
mpm@selenic.com
Break apart hg.py...
r1089 manifest = bin(l[0])
Matt Mackall
Handle transcoding of username and description in changelog
r3771 user = util.tolocal(l[1])
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233
extra_data = l[2].split(' ', 2)
if len(extra_data) != 3:
time = float(extra_data.pop(0))
try:
# various tools did silly things with the time zone field.
timezone = int(extra_data[0])
except:
timezone = 0
extra = {}
else:
time, timezone, extra = extra_data
time, timezone = float(time), int(timezone)
extra = self.decode_extra(extra)
Alexis S. L. Carvalho
"default" is the default branch name
r4176 if not extra.get('branch'):
extra['branch'] = 'default'
mpm@selenic.com
Break apart hg.py...
r1089 files = l[3:]
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 return (manifest, user, (time, timezone), files, desc, extra)
mpm@selenic.com
Break apart hg.py...
r1089
def add(self, manifest, list, desc, transaction, p1=None, p2=None,
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 user=None, date=None, extra={}):
Matt Mackall
Handle transcoding of username and description in changelog
r3771 user, desc = util.fromlocal(user), util.fromlocal(desc)
Bryan O'Sullivan
Validate user input of dates when adding a changelog entry.
r1195 if date:
Benoit Boissinot
validate the resulting date in parsedate
r2523 parseddate = "%d %d" % util.parsedate(date)
Bryan O'Sullivan
Validate user input of dates when adding a changelog entry.
r1195 else:
Jose M. Prieto
Allow the use of human readable dates (issue 251)
r2522 parseddate = "%d %d" % util.makedate()
Alexis S. L. Carvalho
"default" is the default branch name
r4176 if extra and extra.get("branch") in ("default", ""):
del extra["branch"]
Benoit Boissinot
[extendedchangelog] add extra metadata in the changelog entry...
r3233 if extra:
extra = self.encode_extra(extra)
parseddate = "%s %s" % (parseddate, extra)
mpm@selenic.com
Break apart hg.py...
r1089 list.sort()
Jose M. Prieto
Allow the use of human readable dates (issue 251)
r2522 l = [hex(manifest), user, parseddate] + list + ["", desc]
mpm@selenic.com
Break apart hg.py...
r1089 text = "\n".join(l)
return self.addrevision(text, transaction, self.count(), p1, p2)