# HG changeset patch # User Pierre-Yves David # Date 2023-05-30 16:43:59 # Node ID e06d1a779eb6995bbb06dfb11da6fed3c0415a59 # Parent 11562d72cb7b7254594d21339925b5e72ef7d3cd store: stop relying on a `revlog_type` property We want to know if a file is related to a revlog, but the rest is dealt with differently already, so we simplify things further. as a bonus, this cleanup This provides a small but noticeable speedup. The number below use `hg perf::stream-locked-section` to measure the time spend in the locked section of the streaming clone. Number are run on various repository and compare different steps.: 1) the effect of this patchs, 2) the effect of the cleanup series, 2) current state compared to because large refactoring. All benchmarks are run on linux with Python 3.10.7. ### Effect of this patch # mercurial-2018-08-01-zstd-sparse-revlog # benchmark.name = perf-stream-locked-section before: 0.030246 seconds after: 0.029274 seconds (-3.21%) # pypy-2018-08-01-zstd-sparse-revlog before: 0.545012 seconds after: 0.520872 seconds (-4.43%) # netbeans-2018-08-01-zstd-sparse-revlog before: 2.719939 seconds after: 2.626791 seconds (-3.42%) # mozilla-central-2018-08-01-zstd-sparse-revlog before: 6.304179 seconds after: 6.096700 seconds (-3.29%) # mozilla-try-2019-02-18-zstd-sparse-revlog before: 14.142687 seconds after: 13.640779 seconds (-3.55%) ### Effect of this series A small but sizeable speedup # mercurial-2018-08-01-zstd-sparse-revlog before: 0.031122 seconds after: 0.029274 seconds (-5.94%) # pypy-2018-08-01-zstd-sparse-revlog before: 0.589970 seconds after: 0.520872 seconds (-11.71%) # netbeans-2018-08-01-zstd-sparse-revlog before: 2.980300 seconds after: 2.626791 seconds (-11.86%) # mozilla-central-2018-08-01-zstd-sparse-revlog before: 6.863204 seconds after: 6.096700 seconds (-11.17%) # mozilla-try-2019-02-18-zstd-sparse-revlog before: 14.921393 seconds after: 13.640779 seconds (-8.58%) ### Current state compared to the pre-refactoring state The refactoring introduced multiple string manipulation and dictionary creation that seems to induce a signifiant slowdown Slowdown # mercurial-2018-08-01-zstd-sparse-revlog 6.4.3: 0.019459 seconds after: 0.029274 seconds (+50.44%) ## pypy-2018-08-01-zstd-sparse-revlog 6.4.3: 0.290715 seconds after: 0.520872 seconds (+79.17%) # netbeans-2018-08-01-zstd-sparse-revlog 6.4.3: 1.403447 seconds after: 2.626791 seconds (+87.17%) # mozilla-central-2018-08-01-zstd-sparse-revlog 6.4.3: 3.163549 seconds after: 6.096700 seconds (+92.72%) # mozilla-try-2019-02-18-zstd-sparse-revlog 6.4.3: 6.702184 seconds after: 13.640779 seconds (+103.53%) diff --git a/mercurial/store.py b/mercurial/store.py --- a/mercurial/store.py +++ b/mercurial/store.py @@ -385,8 +385,8 @@ def _calcmode(vfs): b'requires', ] -REVLOG_FILES_MAIN_EXT = (b'.i',) -REVLOG_FILES_OTHER_EXT = ( +REVLOG_FILES_EXT = ( + b'.i', b'.idx', b'.d', b'.dat', @@ -415,22 +415,16 @@ EXCLUDED = re.compile(br'.*undo\.[^/]+\. def is_revlog(f, kind, st): if kind != stat.S_IFREG: - return None - return revlog_type(f) + return False + if f.endswith(REVLOG_FILES_EXT): + return True + return False -def revlog_type(f): - # XXX we need to filter `undo.` created by the transaction here, however - # being naive about it also filter revlog for `undo.*` files, leading to - # issue6542. So we no longer use EXCLUDED. - if f.endswith(REVLOG_FILES_MAIN_EXT): - return FILEFLAGS_REVLOG_MAIN - elif f.endswith(REVLOG_FILES_OTHER_EXT): - t = FILETYPE_FILELOG_OTHER - if f.endswith(REVLOG_FILES_VOLATILE_EXT): - t |= FILEFLAGS_VOLATILE - return t - return None +def is_revlog_file(f): + if f.endswith(REVLOG_FILES_EXT): + return True + return False # the file is part of changelog data @@ -758,10 +752,9 @@ class basicstore: p = visit.pop() for f, kind, st in readdir(p, stat=True): fp = p + b'/' + f - rl_type = is_revlog(f, kind, st) - if rl_type is not None: + if is_revlog(f, kind, st): n = util.pconvert(fp[striplen:]) - l.append((decodedir(n), (rl_type, st.st_size))) + l.append((decodedir(n), st.st_size)) elif kind == stat.S_IFDIR and recurse: visit.append(fp) @@ -794,20 +787,16 @@ class basicstore: ] for base_dir, rl_type, strip_filename in dirs: files = self._walk(base_dir, True, undecodable=undecodable) - files = (f for f in files if f[1][0] is not None) for revlog, details in _gather_revlog(files): - file_details = {} revlog_target_id = revlog.split(b'/', 1)[1] if strip_filename and b'/' in revlog: revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0] revlog_target_id += b'/' - for ext, (t, size) in sorted(details.items()): - file_details[ext] = size yield RevlogStoreEntry( path_prefix=revlog, revlog_type=rl_type, target_id=revlog_target_id, - details=file_details, + details=details, ) def top_entries( @@ -831,17 +820,17 @@ class basicstore: changelogs = collections.defaultdict(dict) manifestlogs = collections.defaultdict(dict) - for u, (t, s) in files: + for u, s in files: if u.startswith(b'00changelog'): name, ext = _split_revlog_ext(u) - changelogs[name][ext] = (t, s) + changelogs[name][ext] = s elif u.startswith(b'00manifest'): name, ext = _split_revlog_ext(u) - manifestlogs[name][ext] = (t, s) + manifestlogs[name][ext] = s else: yield SimpleStoreEntry( entry_path=u, - is_volatile=bool(t & FILEFLAGS_VOLATILE), + is_volatile=False, file_size=s, ) # yield manifest before changelog @@ -853,14 +842,11 @@ class basicstore: assert len(changelogs) <= 1 for data, revlog_type in top_rl: for revlog, details in sorted(data.items()): - file_details = {} - for ext, (t, size) in details.items(): - file_details[ext] = size yield RevlogStoreEntry( path_prefix=revlog, revlog_type=revlog_type, target_id=b'', - details=file_details, + details=details, ) def walk( @@ -1083,7 +1069,7 @@ class _fncachevfs(vfsmod.proxyvfs): if ( mode not in (b'r', b'rb') and (path.startswith(b'data/') or path.startswith(b'meta/')) - and revlog_type(path) is not None + and is_revlog_file(path) ): # do not trigger a fncache load when adding a file that already is # known to exist. @@ -1136,14 +1122,12 @@ class fncachestore(basicstore): def data_entries( self, matcher=None, undecodable=None ) -> Generator[BaseStoreEntry, None, None]: - files = ((f, revlog_type(f)) for f in self.fncache) # Note: all files in fncache should be revlog related, However the # fncache might contains such file added by previous version of # Mercurial. - files = (f for f in files if f[1] is not None) + files = ((f, None) for f in self.fncache if is_revlog_file(f)) by_revlog = _gather_revlog(files) for revlog, details in by_revlog: - file_details = {} if revlog.startswith(b'data/'): rl_type = FILEFLAGS_FILELOG revlog_target_id = revlog.split(b'/', 1)[1] @@ -1155,13 +1139,11 @@ class fncachestore(basicstore): else: # unreachable assert False, revlog - for ext in details: - file_details[ext] = None entry = RevlogStoreEntry( path_prefix=revlog, revlog_type=rl_type, target_id=revlog_target_id, - details=file_details, + details=details, ) if _match_tracked_entry(entry, matcher): yield entry diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py --- a/mercurial/upgrade_utils/engine.py +++ b/mercurial/upgrade_utils/engine.py @@ -370,7 +370,7 @@ def _files_to_copy_post_revlog_clone(src are cloned""" for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)): # don't copy revlogs as they are already cloned - if store.revlog_type(path) is not None: + if store.is_revlog_file(path): continue # Skip transaction related files. if path.startswith(b'undo'):