# sparse.py - functionality for sparse checkouts # # Copyright 2014 Facebook, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from __future__ import absolute_import import hashlib import os from .i18n import _ from .node import ( hex, nullid, ) from . import ( error, match as matchmod, merge as mergemod, pathutil, pycompat, scmutil, util, ) # Whether sparse features are enabled. This variable is intended to be # temporary to facilitate porting sparse to core. It should eventually be # a per-repo option, possibly a repo requirement. enabled = False def parseconfig(ui, raw, action): """Parse sparse config file content. action is the command which is trigerring this read, can be narrow, sparse Returns a tuple of includes, excludes, and profiles. """ includes = set() excludes = set() profiles = set() current = None havesection = False for line in raw.split('\n'): line = line.strip() if not line or line.startswith('#'): # empty or comment line, skip continue elif line.startswith('%include '): line = line[9:].strip() if line: profiles.add(line) elif line == '[include]': if havesection and current != includes: # TODO pass filename into this API so we can report it. raise error.Abort(_('%(action)s config cannot have includes ' 'after excludes') % {'action': action}) havesection = True current = includes continue elif line == '[exclude]': havesection = True current = excludes elif line: if current is None: raise error.Abort(_('%(action)s config entry outside of ' 'section: %(line)s') % {'action': action, 'line': line}, hint=_('add an [include] or [exclude] line ' 'to declare the entry type')) if line.strip().startswith('/'): ui.warn(_('warning: %(action)s profile cannot use' ' paths starting with /, ignoring %(line)s\n') % {'action': action, 'line': line}) continue current.add(line) return includes, excludes, profiles # Exists as separate function to facilitate monkeypatching. def readprofile(repo, profile, changeid): """Resolve the raw content of a sparse profile file.""" # TODO add some kind of cache here because this incurs a manifest # resolve and can be slow. return repo.filectx(profile, changeid=changeid).data() def patternsforrev(repo, rev): """Obtain sparse checkout patterns for the given rev. Returns a tuple of iterables representing includes, excludes, and patterns. """ # Feature isn't enabled. No-op. if not enabled: return set(), set(), set() raw = repo.vfs.tryread('sparse') if not raw: return set(), set(), set() if rev is None: raise error.Abort(_('cannot parse sparse patterns from working ' 'directory')) includes, excludes, profiles = parseconfig(repo.ui, raw, 'sparse') ctx = repo[rev] if profiles: visited = set() while profiles: profile = profiles.pop() if profile in visited: continue visited.add(profile) try: raw = readprofile(repo, profile, rev) except error.ManifestLookupError: msg = ( "warning: sparse profile '%s' not found " "in rev %s - ignoring it\n" % (profile, ctx)) # experimental config: sparse.missingwarning if repo.ui.configbool( 'sparse', 'missingwarning'): repo.ui.warn(msg) else: repo.ui.debug(msg) continue pincludes, pexcludes, subprofs = parseconfig(repo.ui, raw, 'sparse') includes.update(pincludes) excludes.update(pexcludes) profiles.update(subprofs) profiles = visited if includes: includes.add('.hg*') return includes, excludes, profiles def activeconfig(repo): """Determine the active sparse config rules. Rules are constructed by reading the current sparse config and bringing in referenced profiles from parents of the working directory. """ revs = [repo.changelog.rev(node) for node in repo.dirstate.parents() if node != nullid] allincludes = set() allexcludes = set() allprofiles = set() for rev in revs: includes, excludes, profiles = patternsforrev(repo, rev) allincludes |= includes allexcludes |= excludes allprofiles |= profiles return allincludes, allexcludes, allprofiles def configsignature(repo, includetemp=True): """Obtain the signature string for the current sparse configuration. This is used to construct a cache key for matchers. """ cache = repo._sparsesignaturecache signature = cache.get('signature') if includetemp: tempsignature = cache.get('tempsignature') else: tempsignature = '0' if signature is None or (includetemp and tempsignature is None): signature = hex(hashlib.sha1(repo.vfs.tryread('sparse')).digest()) cache['signature'] = signature if includetemp: raw = repo.vfs.tryread('tempsparse') tempsignature = hex(hashlib.sha1(raw).digest()) cache['tempsignature'] = tempsignature return '%s %s' % (signature, tempsignature) def writeconfig(repo, includes, excludes, profiles): """Write the sparse config file given a sparse configuration.""" with repo.vfs('sparse', 'wb') as fh: for p in sorted(profiles): fh.write('%%include %s\n' % p) if includes: fh.write('[include]\n') for i in sorted(includes): fh.write(i) fh.write('\n') if excludes: fh.write('[exclude]\n') for e in sorted(excludes): fh.write(e) fh.write('\n') repo._sparsesignaturecache.clear() def readtemporaryincludes(repo): raw = repo.vfs.tryread('tempsparse') if not raw: return set() return set(raw.split('\n')) def writetemporaryincludes(repo, includes): repo.vfs.write('tempsparse', '\n'.join(sorted(includes))) repo._sparsesignaturecache.clear() def addtemporaryincludes(repo, additional): includes = readtemporaryincludes(repo) for i in additional: includes.add(i) writetemporaryincludes(repo, includes) def prunetemporaryincludes(repo): if not enabled or not repo.vfs.exists('tempsparse'): return s = repo.status() if s.modified or s.added or s.removed or s.deleted: # Still have pending changes. Don't bother trying to prune. return sparsematch = matcher(repo, includetemp=False) dirstate = repo.dirstate actions = [] dropped = [] tempincludes = readtemporaryincludes(repo) for file in tempincludes: if file in dirstate and not sparsematch(file): message = _('dropping temporarily included sparse files') actions.append((file, None, message)) dropped.append(file) typeactions = mergemod.emptyactions() typeactions['r'] = actions mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False) # Fix dirstate for file in dropped: dirstate.drop(file) repo.vfs.unlink('tempsparse') repo._sparsesignaturecache.clear() msg = _('cleaned up %d temporarily added file(s) from the ' 'sparse checkout\n') repo.ui.status(msg % len(tempincludes)) def forceincludematcher(matcher, includes): """Returns a matcher that returns true for any of the forced includes before testing against the actual matcher.""" kindpats = [('path', include, '') for include in includes] includematcher = matchmod.includematcher('', kindpats) return matchmod.unionmatcher([includematcher, matcher]) def matcher(repo, revs=None, includetemp=True): """Obtain a matcher for sparse working directories for the given revs. If multiple revisions are specified, the matcher is the union of all revs. ``includetemp`` indicates whether to use the temporary sparse profile. """ # If sparse isn't enabled, sparse matcher matches everything. if not enabled: return matchmod.always() if not revs or revs == [None]: revs = [repo.changelog.rev(node) for node in repo.dirstate.parents() if node != nullid] signature = configsignature(repo, includetemp=includetemp) key = '%s %s' % (signature, ' '.join(map(pycompat.bytestr, revs))) result = repo._sparsematchercache.get(key) if result: return result matchers = [] for rev in revs: try: includes, excludes, profiles = patternsforrev(repo, rev) if includes or excludes: matcher = matchmod.match(repo.root, '', [], include=includes, exclude=excludes, default='relpath') matchers.append(matcher) except IOError: pass if not matchers: result = matchmod.always() elif len(matchers) == 1: result = matchers[0] else: result = matchmod.unionmatcher(matchers) if includetemp: tempincludes = readtemporaryincludes(repo) result = forceincludematcher(result, tempincludes) repo._sparsematchercache[key] = result return result def filterupdatesactions(repo, wctx, mctx, branchmerge, actions): """Filter updates to only lay out files that match the sparse rules.""" if not enabled: return actions oldrevs = [pctx.rev() for pctx in wctx.parents()] oldsparsematch = matcher(repo, oldrevs) if oldsparsematch.always(): return actions files = set() prunedactions = {} if branchmerge: # If we're merging, use the wctx filter, since we're merging into # the wctx. sparsematch = matcher(repo, [wctx.p1().rev()]) else: # If we're updating, use the target context's filter, since we're # moving to the target context. sparsematch = matcher(repo, [mctx.rev()]) temporaryfiles = [] for file, action in actions.iteritems(): type, args, msg = action files.add(file) if sparsematch(file): prunedactions[file] = action elif type == 'm': temporaryfiles.append(file) prunedactions[file] = action elif branchmerge: if type != 'k': temporaryfiles.append(file) prunedactions[file] = action elif type == 'f': prunedactions[file] = action elif file in wctx: prunedactions[file] = ('r', args, msg) if branchmerge and type == mergemod.ACTION_MERGE: f1, f2, fa, move, anc = args if not sparsematch(f1): temporaryfiles.append(f1) if len(temporaryfiles) > 0: repo.ui.status(_('temporarily included %d file(s) in the sparse ' 'checkout for merging\n') % len(temporaryfiles)) addtemporaryincludes(repo, temporaryfiles) # Add the new files to the working copy so they can be merged, etc actions = [] message = 'temporarily adding to sparse checkout' wctxmanifest = repo[None].manifest() for file in temporaryfiles: if file in wctxmanifest: fctx = repo[None][file] actions.append((file, (fctx.flags(), False), message)) typeactions = mergemod.emptyactions() typeactions['g'] = actions mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False) dirstate = repo.dirstate for file, flags, msg in actions: dirstate.normal(file) profiles = activeconfig(repo)[2] changedprofiles = profiles & files # If an active profile changed during the update, refresh the checkout. # Don't do this during a branch merge, since all incoming changes should # have been handled by the temporary includes above. if changedprofiles and not branchmerge: mf = mctx.manifest() for file in mf: old = oldsparsematch(file) new = sparsematch(file) if not old and new: flags = mf.flags(file) prunedactions[file] = ('g', (flags, False), '') elif old and not new: prunedactions[file] = ('r', [], '') return prunedactions def refreshwdir(repo, origstatus, origsparsematch, force=False): """Refreshes working directory by taking sparse config into account. The old status and sparse matcher is compared against the current sparse matcher. Will abort if a file with pending changes is being excluded or included unless ``force`` is True. """ # Verify there are no pending changes pending = set() pending.update(origstatus.modified) pending.update(origstatus.added) pending.update(origstatus.removed) sparsematch = matcher(repo) abort = False for f in pending: if not sparsematch(f): repo.ui.warn(_("pending changes to '%s'\n") % f) abort = not force if abort: raise error.Abort(_('could not update sparseness due to pending ' 'changes')) # Calculate actions dirstate = repo.dirstate ctx = repo['.'] added = [] lookup = [] dropped = [] mf = ctx.manifest() files = set(mf) actions = {} for file in files: old = origsparsematch(file) new = sparsematch(file) # Add files that are newly included, or that don't exist in # the dirstate yet. if (new and not old) or (old and new and not file in dirstate): fl = mf.flags(file) if repo.wvfs.exists(file): actions[file] = ('e', (fl,), '') lookup.append(file) else: actions[file] = ('g', (fl, False), '') added.append(file) # Drop files that are newly excluded, or that still exist in # the dirstate. elif (old and not new) or (not old and not new and file in dirstate): dropped.append(file) if file not in pending: actions[file] = ('r', [], '') # Verify there are no pending changes in newly included files abort = False for file in lookup: repo.ui.warn(_("pending changes to '%s'\n") % file) abort = not force if abort: raise error.Abort(_('cannot change sparseness due to pending ' 'changes (delete the files or use ' '--force to bring them back dirty)')) # Check for files that were only in the dirstate. for file, state in dirstate.iteritems(): if not file in files: old = origsparsematch(file) new = sparsematch(file) if old and not new: dropped.append(file) # Apply changes to disk typeactions = mergemod.emptyactions() for f, (m, args, msg) in actions.iteritems(): typeactions[m].append((f, args, msg)) mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False) # Fix dirstate for file in added: dirstate.normal(file) for file in dropped: dirstate.drop(file) for file in lookup: # File exists on disk, and we're bringing it back in an unknown state. dirstate.normallookup(file) return added, dropped, lookup def aftercommit(repo, node): """Perform actions after a working directory commit.""" # This function is called unconditionally, even if sparse isn't # enabled. ctx = repo[node] profiles = patternsforrev(repo, ctx.rev())[2] # profiles will only have data if sparse is enabled. if profiles & set(ctx.files()): origstatus = repo.status() origsparsematch = matcher(repo) refreshwdir(repo, origstatus, origsparsematch, force=True) prunetemporaryincludes(repo) def _updateconfigandrefreshwdir(repo, includes, excludes, profiles, force=False, removing=False): """Update the sparse config and working directory state.""" raw = repo.vfs.tryread('sparse') oldincludes, oldexcludes, oldprofiles = parseconfig(repo.ui, raw, 'sparse') oldstatus = repo.status() oldmatch = matcher(repo) oldrequires = set(repo.requirements) # TODO remove this try..except once the matcher integrates better # with dirstate. We currently have to write the updated config # because that will invalidate the matcher cache and force a # re-read. We ideally want to update the cached matcher on the # repo instance then flush the new config to disk once wdir is # updated. But this requires massive rework to matcher() and its # consumers. if 'exp-sparse' in oldrequires and removing: repo.requirements.discard('exp-sparse') scmutil.writerequires(repo.vfs, repo.requirements) elif 'exp-sparse' not in oldrequires: repo.requirements.add('exp-sparse') scmutil.writerequires(repo.vfs, repo.requirements) try: writeconfig(repo, includes, excludes, profiles) return refreshwdir(repo, oldstatus, oldmatch, force=force) except Exception: if repo.requirements != oldrequires: repo.requirements.clear() repo.requirements |= oldrequires scmutil.writerequires(repo.vfs, repo.requirements) writeconfig(repo, oldincludes, oldexcludes, oldprofiles) raise def clearrules(repo, force=False): """Clears include/exclude rules from the sparse config. The remaining sparse config only has profiles, if defined. The working directory is refreshed, as needed. """ with repo.wlock(): raw = repo.vfs.tryread('sparse') includes, excludes, profiles = parseconfig(repo.ui, raw, 'sparse') if not includes and not excludes: return _updateconfigandrefreshwdir(repo, set(), set(), profiles, force=force) def importfromfiles(repo, opts, paths, force=False): """Import sparse config rules from files. The updated sparse config is written out and the working directory is refreshed, as needed. """ with repo.wlock(): # read current configuration raw = repo.vfs.tryread('sparse') includes, excludes, profiles = parseconfig(repo.ui, raw, 'sparse') aincludes, aexcludes, aprofiles = activeconfig(repo) # Import rules on top; only take in rules that are not yet # part of the active rules. changed = False for p in paths: with util.posixfile(util.expandpath(p), mode='rb') as fh: raw = fh.read() iincludes, iexcludes, iprofiles = parseconfig(repo.ui, raw, 'sparse') oldsize = len(includes) + len(excludes) + len(profiles) includes.update(iincludes - aincludes) excludes.update(iexcludes - aexcludes) profiles.update(iprofiles - aprofiles) if len(includes) + len(excludes) + len(profiles) > oldsize: changed = True profilecount = includecount = excludecount = 0 fcounts = (0, 0, 0) if changed: profilecount = len(profiles - aprofiles) includecount = len(includes - aincludes) excludecount = len(excludes - aexcludes) fcounts = map(len, _updateconfigandrefreshwdir( repo, includes, excludes, profiles, force=force)) printchanges(repo.ui, opts, profilecount, includecount, excludecount, *fcounts) def updateconfig(repo, pats, opts, include=False, exclude=False, reset=False, delete=False, enableprofile=False, disableprofile=False, force=False, usereporootpaths=False): """Perform a sparse config update. Only one of the actions may be performed. The new config is written out and a working directory refresh is performed. """ with repo.wlock(): raw = repo.vfs.tryread('sparse') oldinclude, oldexclude, oldprofiles = parseconfig(repo.ui, raw, 'sparse') if reset: newinclude = set() newexclude = set() newprofiles = set() else: newinclude = set(oldinclude) newexclude = set(oldexclude) newprofiles = set(oldprofiles) if any(os.path.isabs(pat) for pat in pats): raise error.Abort(_('paths cannot be absolute')) if not usereporootpaths: # let's treat paths as relative to cwd root, cwd = repo.root, repo.getcwd() abspats = [] for kindpat in pats: kind, pat = matchmod._patsplit(kindpat, None) if kind in matchmod.cwdrelativepatternkinds or kind is None: ap = (kind + ':' if kind else '') +\ pathutil.canonpath(root, cwd, pat) abspats.append(ap) else: abspats.append(kindpat) pats = abspats if include: newinclude.update(pats) elif exclude: newexclude.update(pats) elif enableprofile: newprofiles.update(pats) elif disableprofile: newprofiles.difference_update(pats) elif delete: newinclude.difference_update(pats) newexclude.difference_update(pats) profilecount = (len(newprofiles - oldprofiles) - len(oldprofiles - newprofiles)) includecount = (len(newinclude - oldinclude) - len(oldinclude - newinclude)) excludecount = (len(newexclude - oldexclude) - len(oldexclude - newexclude)) fcounts = map(len, _updateconfigandrefreshwdir( repo, newinclude, newexclude, newprofiles, force=force, removing=reset)) printchanges(repo.ui, opts, profilecount, includecount, excludecount, *fcounts) def printchanges(ui, opts, profilecount=0, includecount=0, excludecount=0, added=0, dropped=0, conflicting=0): """Print output summarizing sparse config changes.""" with ui.formatter('sparse', opts) as fm: fm.startitem() fm.condwrite(ui.verbose, 'profiles_added', _('Profiles changed: %d\n'), profilecount) fm.condwrite(ui.verbose, 'include_rules_added', _('Include rules changed: %d\n'), includecount) fm.condwrite(ui.verbose, 'exclude_rules_added', _('Exclude rules changed: %d\n'), excludecount) # In 'plain' verbose mode, mergemod.applyupdates already outputs what # files are added or removed outside of the templating formatter # framework. No point in repeating ourselves in that case. if not fm.isplain(): fm.condwrite(ui.verbose, 'files_added', _('Files added: %d\n'), added) fm.condwrite(ui.verbose, 'files_dropped', _('Files dropped: %d\n'), dropped) fm.condwrite(ui.verbose, 'files_conflicting', _('Files conflicting: %d\n'), conflicting)