# HG changeset patch # User Pierre-Yves David # Date 2021-04-06 08:38:03 # Node ID 6085b7f1536dbb3e9375907235e6a7a56af6004f # Parent fe34c75f62ab302757ae18973ce7736d5f1c00eb store: also return some information about the type of file `walk` found We start returning of 4th information in the `store.walk` return tuple: the type of the file. This will make it easier for caller to determine which kind of file they are looking at. This should especically help with the `upgrade-repo` code that has to do a lot of fragile index's file name comparison. Differential Revision: https://phab.mercurial-scm.org/D10315 diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py +++ b/hgext/largefiles/lfutil.py @@ -514,7 +514,7 @@ def unixpath(path): def islfilesrepo(repo): '''Return true if the repo is a largefile repo.''' if b'largefiles' in repo.requirements and any( - shortnameslash in f[0] for f in repo.store.datafiles() + shortnameslash in f[1] for f in repo.store.datafiles() ): return True diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py +++ b/hgext/largefiles/reposetup.py @@ -445,7 +445,7 @@ def reposetup(ui, repo): def checkrequireslfiles(ui, repo, **kwargs): if b'largefiles' not in repo.requirements and any( - lfutil.shortname + b'/' in f[0] for f in repo.store.datafiles() + lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles() ): repo.requirements.add(b'largefiles') scmutil.writereporequirements(repo) diff --git a/hgext/narrow/narrowcommands.py b/hgext/narrow/narrowcommands.py --- a/hgext/narrow/narrowcommands.py +++ b/hgext/narrow/narrowcommands.py @@ -276,7 +276,7 @@ def _narrow( repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup) todelete = [] - for f, f2, size in repo.store.datafiles(): + for t, f, f2, size in repo.store.datafiles(): if f.startswith(b'data/'): file = f[5:-2] if not newmatch(file): diff --git a/hgext/remotefilelog/contentstore.py b/hgext/remotefilelog/contentstore.py --- a/hgext/remotefilelog/contentstore.py +++ b/hgext/remotefilelog/contentstore.py @@ -365,7 +365,7 @@ class manifestrevlogstore(object): ledger.markdataentry(self, treename, node) ledger.markhistoryentry(self, treename, node) - for path, encoded, size in self._store.datafiles(): + for t, path, encoded, size in self._store.datafiles(): if path[:5] != b'meta/' or path[-2:] != b'.i': continue diff --git a/hgext/remotefilelog/remotefilelogserver.py b/hgext/remotefilelog/remotefilelogserver.py --- a/hgext/remotefilelog/remotefilelogserver.py +++ b/hgext/remotefilelog/remotefilelogserver.py @@ -164,24 +164,26 @@ def onetimesetup(ui): b'.d' ): n = util.pconvert(fp[striplen:]) - yield (store.decodedir(n), n, st.st_size) + d = store.decodedir(n) + t = store.FILETYPE_OTHER + yield (t, d, n, st.st_size) if kind == stat.S_IFDIR: visit.append(fp) if scmutil.istreemanifest(repo): - for (u, e, s) in repo.store.datafiles(): + for (t, u, e, s) in repo.store.datafiles(): if u.startswith(b'meta/') and ( u.endswith(b'.i') or u.endswith(b'.d') ): - yield (u, e, s) + yield (t, u, e, s) # Return .d and .i files that do not match the shallow pattern match = state.match if match and not match.always(): - for (u, e, s) in repo.store.datafiles(): + for (t, u, e, s) in repo.store.datafiles(): f = u[5:-2] # trim data/... and .i/.d if not state.match(f): - yield (u, e, s) + yield (t, u, e, s) for x in repo.store.topfiles(): if state.noflatmf and x[0][:11] == b'00manifest.': diff --git a/mercurial/repair.py b/mercurial/repair.py --- a/mercurial/repair.py +++ b/mercurial/repair.py @@ -428,7 +428,7 @@ def manifestrevlogs(repo): if scmutil.istreemanifest(repo): # This logic is safe if treemanifest isn't enabled, but also # pointless, so we skip it if treemanifest isn't enabled. - for unencoded, encoded, size in repo.store.datafiles(): + for t, unencoded, encoded, size in repo.store.datafiles(): if unencoded.startswith(b'meta/') and unencoded.endswith( b'00manifest.i' ): diff --git a/mercurial/store.py b/mercurial/store.py --- a/mercurial/store.py +++ b/mercurial/store.py @@ -387,13 +387,44 @@ def _calcmode(vfs): b'requires', ] -REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd') +REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') +REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored') + + +def is_revlog(f, kind, st): + if kind != stat.S_IFREG: + return None + return revlog_type(f) + + +def revlog_type(f): + if f.endswith(REVLOG_FILES_MAIN_EXT): + return FILEFLAGS_REVLOG_MAIN + elif f.endswith(REVLOG_FILES_OTHER_EXT): + return FILETYPE_FILELOG_OTHER -def isrevlog(f, kind, st): - if kind != stat.S_IFREG: - return False - return f.endswith(REVLOG_FILES_EXT) +# the file is part of changelog data +FILEFLAGS_CHANGELOG = 1 << 13 +# the file is part of manifest data +FILEFLAGS_MANIFESTLOG = 1 << 12 +# the file is part of filelog data +FILEFLAGS_FILELOG = 1 << 11 +# file that are not directly part of a revlog +FILEFLAGS_OTHER = 1 << 10 + +# the main entry point for a revlog +FILEFLAGS_REVLOG_MAIN = 1 << 1 +# a secondary file for a revlog +FILEFLAGS_REVLOG_OTHER = 1 << 0 + +FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN +FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER +FILETYPE_OTHER = FILEFLAGS_OTHER class basicstore(object): @@ -425,9 +456,10 @@ class basicstore(object): p = visit.pop() for f, kind, st in readdir(p, stat=True): fp = p + b'/' + f - if isrevlog(f, kind, st): + rl_type = is_revlog(f, kind, st) + if rl_type is not None: n = util.pconvert(fp[striplen:]) - l.append((decodedir(n), n, st.st_size)) + l.append((rl_type, decodedir(n), n, st.st_size)) elif kind == stat.S_IFDIR and recurse: visit.append(fp) l.sort() @@ -445,16 +477,25 @@ class basicstore(object): return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch) def datafiles(self, matcher=None): - return self._walk(b'data', True) + self._walk(b'meta', True) + files = self._walk(b'data', True) + self._walk(b'meta', True) + for (t, u, e, s) in files: + yield (FILEFLAGS_FILELOG | t, u, e, s) def topfiles(self): # yield manifest before changelog - return reversed(self._walk(b'', False)) + files = reversed(self._walk(b'', False)) + for (t, u, e, s) in files: + if u.startswith(b'00changelog'): + yield (FILEFLAGS_CHANGELOG | t, u, e, s) + elif u.startswith(b'00manifest'): + yield (FILEFLAGS_MANIFESTLOG | t, u, e, s) + else: + yield (FILETYPE_OTHER | t, u, e, s) def walk(self, matcher=None): """return file related to data storage (ie: revlogs) - yields (unencoded, encoded, size) + yields (file_type, unencoded, encoded, size) if a matcher is passed, storage files of only those tracked paths are passed with matches the matcher @@ -500,14 +541,14 @@ class encodedstore(basicstore): self.opener = self.vfs def datafiles(self, matcher=None): - for a, b, size in super(encodedstore, self).datafiles(): + for t, a, b, size in super(encodedstore, self).datafiles(): try: a = decodefilename(a) except KeyError: a = None if a is not None and not _matchtrackedpath(a, matcher): continue - yield a, b, size + yield t, a, b, size def join(self, f): return self.path + b'/' + encodefilename(f) @@ -696,7 +737,9 @@ class fncachestore(basicstore): continue ef = self.encode(f) try: - yield f, ef, self.getsize(ef) + t = revlog_type(f) + t |= FILEFLAGS_FILELOG + yield t, f, ef, self.getsize(ef) except OSError as err: if err.errno != errno.ENOENT: raise diff --git a/mercurial/streamclone.py b/mercurial/streamclone.py --- a/mercurial/streamclone.py +++ b/mercurial/streamclone.py @@ -243,7 +243,7 @@ def generatev1(repo): # Get consistent snapshot of repo, lock during scan. with repo.lock(): repo.ui.debug(b'scanning\n') - for name, ename, size in _walkstreamfiles(repo): + for file_type, name, ename, size in _walkstreamfiles(repo): if size: entries.append((name, size)) total_bytes += size @@ -616,7 +616,7 @@ def generatev2(repo, includes, excludes, matcher = narrowspec.match(repo.root, includes, excludes) repo.ui.debug(b'scanning\n') - for name, ename, size in _walkstreamfiles(repo, matcher): + for rl_type, name, ename, size in _walkstreamfiles(repo, matcher): if size: entries.append((_srcstore, name, _fileappend, size)) totalfilesize += size diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py --- a/mercurial/upgrade_utils/engine.py +++ b/mercurial/upgrade_utils/engine.py @@ -192,7 +192,7 @@ def _clonerevlogs( # Perform a pass to collect metadata. This validates we can open all # source files and allows a unified progress bar to be displayed. - for unencoded, encoded, size in alldatafiles: + for revlog_type, unencoded, encoded, size in alldatafiles: if not unencoded.endswith(b'.i'): continue diff --git a/mercurial/verify.py b/mercurial/verify.py --- a/mercurial/verify.py +++ b/mercurial/verify.py @@ -416,7 +416,7 @@ class verifier(object): storefiles = set() subdirs = set() revlogv1 = self.revlogv1 - for f, f2, size in repo.store.datafiles(): + for t, f, f2, size in repo.store.datafiles(): if not f: self._err(None, _(b"cannot decode filename '%s'") % f2) elif (size > 0 or not revlogv1) and f.startswith(b'meta/'): @@ -480,7 +480,7 @@ class verifier(object): ui.status(_(b"checking files\n")) storefiles = set() - for f, f2, size in repo.store.datafiles(): + for rl_type, f, f2, size in repo.store.datafiles(): if not f: self._err(None, _(b"cannot decode filename '%s'") % f2) elif (size > 0 or not revlogv1) and f.startswith(b'data/'): diff --git a/mercurial/wireprotov2server.py b/mercurial/wireprotov2server.py --- a/mercurial/wireprotov2server.py +++ b/mercurial/wireprotov2server.py @@ -1582,7 +1582,8 @@ def rawstorefiledata(repo, proto, files, # TODO this is a bunch of storage layer interface abstractions because # it assumes revlogs. - for name, encodedname, size in topfiles: + for rl_type, name, encodedname, size in topfiles: + # XXX use the `rl_type` for that if b'changelog' in files and name.startswith(b'00changelog'): pass elif b'manifestlog' in files and name.startswith(b'00manifest'): diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t +++ b/tests/test-persistent-nodemap.t @@ -754,15 +754,15 @@ The persistent nodemap should exist afte $ hg clone -U --stream --config ui.ssh="\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/test-repo stream-clone --debug | egrep '00(changelog|manifest)' adding [s] 00manifest.n (70 bytes) - adding [s] 00manifest.i (313 KB) adding [s] 00manifest.d (452 KB) (no-zstd !) adding [s] 00manifest.d (491 KB) (zstd !) adding [s] 00manifest-*.nd (118 KB) (glob) adding [s] 00changelog.n (70 bytes) - adding [s] 00changelog.i (313 KB) adding [s] 00changelog.d (360 KB) (no-zstd !) adding [s] 00changelog.d (368 KB) (zstd !) adding [s] 00changelog-*.nd (118 KB) (glob) + adding [s] 00manifest.i (313 KB) + adding [s] 00changelog.i (313 KB) $ ls -1 stream-clone/.hg/store/ | egrep '00(changelog|manifest)(\.n|-.*\.nd)' 00changelog-*.nd (glob) 00changelog.n