# HG changeset patch # User Gregory Szorc # Date 2016-03-06 22:30:25 # Node ID 837f1c437d5832e8efe4b7211a7ea96d1cf3e047 # Parent f57f7500a095fe4dfb31a84b4c39dbe606271955 changelog: lazily parse date/extra field This is probably the most complicated patch in the parsing refactor. Because the date and extras are encoded in the same field, we stuff the entire field into a dedicated variable and add a property for accessing the sub-components of each. There is some duplicated code here. But the code is relatively simple, so it shouldn't be a big deal. We see revset performance wins across the board: author(mpm) 0.896565 0.876713 0.822961 desc(bug) 0.887169 0.895514 0.847054 date(2015) 0.878797 0.820987 0.811613 extra(rebase_source) 0.865446 0.823811 0.797756 author(mpm) or author(greg) 1.801832 1.784160 1.668172 author(mpm) or desc(bug) 1.812438 1.822756 1.677608 date(2015) or branch(default) 0.968276 0.910981 0.896032 author(mpm) or desc(bug) or date(2015) or extra(rebase_source) 3.656193 3.516788 3.265024 We see a speed-up on revsets accessing date and extras because the new parsing code only parses what you access. Even though they are stored the same text field, we avoid parsing dates when accessing extras and vice-versa. But strangely revsets accessing both date and extras appeared to speed up as well! I'm not sure if this is due to refactoring the parsing code or due to an optimization in revsets. You can't argue with the results! diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -151,9 +151,8 @@ class changelogrevision(object): """ __slots__ = ( - 'date', + '_rawdateextra', '_rawdesc', - 'extra', 'files', '_rawmanifest', '_rawuser', @@ -194,22 +193,10 @@ class changelogrevision(object): nl2 = text.index('\n', nl1 + 1) self._rawuser = text[nl1 + 1:nl2] - l = text[:doublenl].split('\n') + nl3 = text.index('\n', nl2 + 1) + self._rawdateextra = text[nl2 + 1:nl3] - tdata = l[2].split(' ', 2) - if len(tdata) != 3: - time = float(tdata[0]) - try: - # various tools did silly things with the time zone field. - timezone = int(tdata[1]) - except ValueError: - timezone = 0 - self.extra = _defaultextra - else: - time, timezone = float(tdata[0]), int(tdata[1]) - self.extra = decodeextra(tdata[2]) - - self.date = (time, timezone) + l = text[:doublenl].split('\n') self.files = l[3:] return self @@ -223,6 +210,38 @@ class changelogrevision(object): return encoding.tolocal(self._rawuser) @property + def _rawdate(self): + return self._rawdateextra.split(' ', 2)[0:2] + + @property + def _rawextra(self): + fields = self._rawdateextra.split(' ', 2) + if len(fields) != 3: + return None + + return fields[2] + + @property + def date(self): + raw = self._rawdate + time = float(raw[0]) + # Various tools did silly things with the timezone. + try: + timezone = int(raw[1]) + except ValueError: + timezone = 0 + + return time, timezone + + @property + def extra(self): + raw = self._rawextra + if raw is None: + return _defaultextra + + return decodeextra(raw) + + @property def description(self): return encoding.tolocal(self._rawdesc)