# Copyright 2007 Bryan O'Sullivan # Copyright 2007 Alexis S. L. Carvalho # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import posixpath import typing from typing import ( Iterator, Mapping, MutableMapping, Optional, Set, Tuple, overload, ) from mercurial.i18n import _ from mercurial import ( error, pycompat, ) from . import common if typing.TYPE_CHECKING: from mercurial import ( ui as uimod, ) SKIPREV = common.SKIPREV def rpairs(path: bytes) -> Iterator[Tuple[bytes, bytes]]: """Yield tuples with path split at '/', starting with the full path. No leading, trailing or double '/', please. >>> for x in rpairs(b'foo/bar/baz'): print(x) ('foo/bar/baz', '') ('foo/bar', 'baz') ('foo', 'bar/baz') ('.', 'foo/bar/baz') """ i = len(path) while i != -1: yield path[:i], path[i + 1 :] i = path.rfind(b'/', 0, i) yield b'.', path if typing.TYPE_CHECKING: @overload def normalize(path: bytes) -> bytes: pass @overload def normalize(path: None) -> None: pass def normalize(path): """We use posixpath.normpath to support cross-platform path format. However, it doesn't handle None input. So we wrap it up.""" if path is None: return None return posixpath.normpath(path) class filemapper: """Map and filter filenames when importing. A name can be mapped to itself, a new name, or None (omit from new repository).""" rename: MutableMapping[bytes, bytes] targetprefixes: Optional[Set[bytes]] def __init__(self, ui: "uimod.ui", path: Optional[bytes] = None) -> None: self.ui = ui self.include = {} self.exclude = {} self.rename = {} self.targetprefixes = None if path: if self.parse(path): raise error.Abort(_(b'errors in filemap')) def parse(self, path: Optional[bytes]) -> int: errs = 0 def check(name: bytes, mapping, listname: bytes): if not name: self.ui.warn( _(b'%s:%d: path to %s is missing\n') % (lex.infile, lex.lineno, listname) ) return 1 if name in mapping: self.ui.warn( _(b'%s:%d: %r already in %s list\n') % (lex.infile, lex.lineno, name, listname) ) return 1 if name.startswith(b'/') or name.endswith(b'/') or b'//' in name: self.ui.warn( _(b'%s:%d: superfluous / in %s %r\n') % (lex.infile, lex.lineno, listname, pycompat.bytestr(name)) ) return 1 return 0 lex = common.shlexer( filepath=path, wordchars=b'!@#$%^&*()-=+[]{}|;:,./<>?' ) cmd = lex.get_token() while cmd: if cmd == b'include': name = normalize(lex.get_token()) errs += check(name, self.exclude, b'exclude') self.include[name] = name elif cmd == b'exclude': name = normalize(lex.get_token()) errs += check(name, self.include, b'include') errs += check(name, self.rename, b'rename') self.exclude[name] = name elif cmd == b'rename': src = normalize(lex.get_token()) dest = normalize(lex.get_token()) errs += check(src, self.exclude, b'exclude') self.rename[src] = dest elif cmd == b'source': errs += self.parse(normalize(lex.get_token())) else: self.ui.warn( _(b'%s:%d: unknown directive %r\n') % (lex.infile, lex.lineno, pycompat.bytestr(cmd)) ) errs += 1 cmd = lex.get_token() return errs def lookup( self, name: bytes, mapping: Mapping[bytes, bytes] ) -> Tuple[bytes, bytes, bytes]: name = normalize(name) for pre, suf in rpairs(name): try: return mapping[pre], pre, suf except KeyError: pass return b'', name, b'' def istargetfile(self, filename: bytes) -> bool: """Return true if the given target filename is covered as a destination of the filemap. This is useful for identifying what parts of the target repo belong to the source repo and what parts don't.""" if self.targetprefixes is None: self.targetprefixes = set() for before, after in self.rename.items(): self.targetprefixes.add(after) # If "." is a target, then all target files are considered from the # source. if not self.targetprefixes or b'.' in self.targetprefixes: return True filename = normalize(filename) for pre, suf in rpairs(filename): # This check is imperfect since it doesn't account for the # include/exclude list, but it should work in filemaps that don't # apply include/exclude to the same source directories they are # renaming. if pre in self.targetprefixes: return True return False def __call__(self, name: bytes) -> Optional[bytes]: if self.include: inc = self.lookup(name, self.include)[0] else: inc = name if self.exclude: exc = self.lookup(name, self.exclude)[0] else: exc = b'' if (not self.include and exc) or (len(inc) <= len(exc)): return None newpre, pre, suf = self.lookup(name, self.rename) if newpre: if newpre == b'.': return suf if suf: if newpre.endswith(b'/'): return newpre + suf return newpre + b'/' + suf return newpre return name def active(self) -> bool: return bool(self.include or self.exclude or self.rename) # This class does two additional things compared to a regular source: # # - Filter and rename files. This is mostly wrapped by the filemapper # class above. We hide the original filename in the revision that is # returned by getchanges to be able to find things later in getfile. # # - Return only revisions that matter for the files we're interested in. # This involves rewriting the parents of the original revision to # create a graph that is restricted to those revisions. # # This set of revisions includes not only revisions that directly # touch files we're interested in, but also merges that merge two # or more interesting revisions. class filemap_source(common.converter_source): def __init__( self, ui: "uimod.ui", baseconverter, filemap: Optional[bytes] ) -> None: super(filemap_source, self).__init__(ui, baseconverter.repotype) self.base = baseconverter self.filemapper = filemapper(ui, filemap) self.commits = {} # if a revision rev has parent p in the original revision graph, then # rev will have parent self.parentmap[p] in the restricted graph. self.parentmap = {} # self.wantedancestors[rev] is the set of all ancestors of rev that # are in the restricted graph. self.wantedancestors = {} self.convertedorder = None self._rebuilt = False self.origparents = {} self.children = {} self.seenchildren = {} # experimental config: convert.ignoreancestorcheck self.ignoreancestorcheck = self.ui.configbool( b'convert', b'ignoreancestorcheck' ) def before(self) -> None: self.base.before() def after(self) -> None: self.base.after() def setrevmap(self, revmap): # rebuild our state to make things restartable # # To avoid calling getcommit for every revision that has already # been converted, we rebuild only the parentmap, delaying the # rebuild of wantedancestors until we need it (i.e. until a # merge). # # We assume the order argument lists the revisions in # topological order, so that we can infer which revisions were # wanted by previous runs. self._rebuilt = not revmap seen = {SKIPREV: SKIPREV} dummyset = set() converted = [] for rev in revmap.order: mapped = revmap[rev] wanted = mapped not in seen if wanted: seen[mapped] = rev self.parentmap[rev] = rev else: self.parentmap[rev] = seen[mapped] self.wantedancestors[rev] = dummyset arg = seen[mapped] if arg == SKIPREV: arg = None converted.append((rev, wanted, arg)) self.convertedorder = converted return self.base.setrevmap(revmap) def rebuild(self) -> bool: if self._rebuilt: return True self._rebuilt = True self.parentmap.clear() self.wantedancestors.clear() self.seenchildren.clear() for rev, wanted, arg in self.convertedorder: if rev not in self.origparents: try: self.origparents[rev] = self.getcommit(rev).parents except error.RepoLookupError: self.ui.debug(b"unknown revmap source: %s\n" % rev) continue if arg is not None: self.children[arg] = self.children.get(arg, 0) + 1 for rev, wanted, arg in self.convertedorder: try: parents = self.origparents[rev] except KeyError: continue # unknown revmap source if wanted: self.mark_wanted(rev, parents) else: self.mark_not_wanted(rev, arg) self._discard(arg, *parents) return True def getheads(self): return self.base.getheads() def getcommit(self, rev: bytes): # We want to save a reference to the commit objects to be able # to rewrite their parents later on. c = self.commits[rev] = self.base.getcommit(rev) for p in c.parents: self.children[p] = self.children.get(p, 0) + 1 return c def numcommits(self): return self.base.numcommits() def _cachedcommit(self, rev): if rev in self.commits: return self.commits[rev] return self.base.getcommit(rev) def _discard(self, *revs) -> None: for r in revs: if r is None: continue self.seenchildren[r] = self.seenchildren.get(r, 0) + 1 if self.seenchildren[r] == self.children[r]: self.wantedancestors.pop(r, None) self.parentmap.pop(r, None) del self.seenchildren[r] if self._rebuilt: del self.children[r] def wanted(self, rev, i) -> bool: # Return True if we're directly interested in rev. # # i is an index selecting one of the parents of rev (if rev # has no parents, i is None). getchangedfiles will give us # the list of files that are different in rev and in the parent # indicated by i. If we're interested in any of these files, # we're interested in rev. try: files = self.base.getchangedfiles(rev, i) except NotImplementedError: raise error.Abort(_(b"source repository doesn't support --filemap")) for f in files: if self.filemapper(f): return True # The include directive is documented to include nothing else (though # valid branch closes are included). if self.filemapper.include: return False # Allow empty commits in the source revision through. The getchanges() # method doesn't even bother calling this if it determines that the # close marker is significant (i.e. all of the branch ancestors weren't # eliminated). Therefore if there *is* a close marker, getchanges() # doesn't consider it significant, and this revision should be dropped. return not files and b'close' not in self.commits[rev].extra def mark_not_wanted(self, rev, p) -> None: # Mark rev as not interesting and update data structures. if p is None: # A root revision. Use SKIPREV to indicate that it doesn't # map to any revision in the restricted graph. Put SKIPREV # in the set of wanted ancestors to simplify code elsewhere self.parentmap[rev] = SKIPREV self.wantedancestors[rev] = {SKIPREV} return # Reuse the data from our parent. self.parentmap[rev] = self.parentmap[p] self.wantedancestors[rev] = self.wantedancestors[p] def mark_wanted(self, rev, parents) -> None: # Mark rev ss wanted and update data structures. # rev will be in the restricted graph, so children of rev in # the original graph should still have rev as a parent in the # restricted graph. self.parentmap[rev] = rev # The set of wanted ancestors of rev is the union of the sets # of wanted ancestors of its parents. Plus rev itself. wrev = set() for p in parents: if p in self.wantedancestors: wrev.update(self.wantedancestors[p]) else: self.ui.warn( _(b'warning: %s parent %s is missing\n') % (rev, p) ) wrev.add(rev) self.wantedancestors[rev] = wrev def getchanges(self, rev, full): parents = self.commits[rev].parents if len(parents) > 1 and not self.ignoreancestorcheck: self.rebuild() # To decide whether we're interested in rev we: # # - calculate what parents rev will have if it turns out we're # interested in it. If it's going to have more than 1 parent, # we're interested in it. # # - otherwise, we'll compare it with the single parent we found. # If any of the files we're interested in is different in the # the two revisions, we're interested in rev. # A parent p is interesting if its mapped version (self.parentmap[p]): # - is not SKIPREV # - is still not in the list of parents (we don't want duplicates) # - is not an ancestor of the mapped versions of the other parents or # there is no parent in the same branch than the current revision. mparents = [] knownparents = set() branch = self.commits[rev].branch hasbranchparent = False for i, p1 in enumerate(parents): mp1 = self.parentmap[p1] if mp1 == SKIPREV or mp1 in knownparents: continue isancestor = not self.ignoreancestorcheck and any( p2 for p2 in parents if p1 != p2 and mp1 != self.parentmap[p2] and mp1 in self.wantedancestors[p2] ) if not isancestor and not hasbranchparent and len(parents) > 1: # This could be expensive, avoid unnecessary calls. if self._cachedcommit(p1).branch == branch: hasbranchparent = True mparents.append((p1, mp1, i, isancestor)) knownparents.add(mp1) # Discard parents ancestors of other parents if there is a # non-ancestor one on the same branch than current revision. if hasbranchparent: mparents = [p for p in mparents if not p[3]] wp = None if mparents: wp = max(p[2] for p in mparents) mparents = [p[1] for p in mparents] elif parents: wp = 0 self.origparents[rev] = parents closed = False if b'close' in self.commits[rev].extra: # A branch closing revision is only useful if one of its # parents belong to the branch being closed pbranches = [self._cachedcommit(p).branch for p in mparents] if branch in pbranches: closed = True if len(mparents) < 2 and not closed and not self.wanted(rev, wp): # We don't want this revision. # Update our state and tell the convert process to map this # revision to the same revision its parent as mapped to. p = None if parents: p = parents[wp] self.mark_not_wanted(rev, p) self.convertedorder.append((rev, False, p)) self._discard(*parents) return self.parentmap[rev] # We want this revision. # Rewrite the parents of the commit object self.commits[rev].parents = mparents self.mark_wanted(rev, parents) self.convertedorder.append((rev, True, None)) self._discard(*parents) # Get the real changes and do the filtering/mapping. To be # able to get the files later on in getfile, we hide the # original filename in the rev part of the return value. changes, copies, cleanp2 = self.base.getchanges(rev, full) files = {} ncleanp2 = set(cleanp2) for f, r in changes: newf = self.filemapper(f) if newf and (newf != f or newf not in files): files[newf] = (f, r) if newf != f: ncleanp2.discard(f) files = sorted(files.items()) ncopies = {} for c in copies: newc = self.filemapper(c) if newc: newsource = self.filemapper(copies[c]) if newsource: ncopies[newc] = newsource return files, ncopies, ncleanp2 def targetfilebelongstosource(self, targetfilename: bytes) -> bool: return self.filemapper.istargetfile(targetfilename) def getfile(self, name, rev): realname, realrev = rev return self.base.getfile(realname, realrev) def gettags(self): return self.base.gettags() def hasnativeorder(self) -> bool: return self.base.hasnativeorder() def lookuprev(self, rev): return self.base.lookuprev(rev) def getbookmarks(self): return self.base.getbookmarks() def converted(self, rev, sinkrev): self.base.converted(rev, sinkrev)