#!/usr/bin/env python r""" mglob - enhanced file list expansion module Use as stand-alone utility (for xargs, `backticks` etc.), or a globbing library for own python programs. Globbing the sys.argv is something that almost every Windows script has to perform manually, and this module is here to help with that task. Also Unix users will benefit from enhanced modes such as recursion, exclusion, directory omission... Unlike glob.glob, directories are not included in the glob unless specified with 'dir:' 'expand' is the function to use in python programs. Typical use to expand argv (esp. in windows):: try: import mglob files = mglob.expand(sys.argv[1:]) except ImportError: print "mglob not found; try 'easy_install mglob' for extra features" files = sys.argv[1:] Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. Therefore, you might want to use quotes with normal wildcards to prevent this expansion, in order for mglob to see the wildcards and get the wanted behaviour. Not quoting the wildcards is harmless and typically has equivalent results, though. Author: Ville Vainio <vivainio@gmail.com> License: MIT Open Source license """ #Assigned in variable for "usage" printing convenience" globsyntax = """\ This program allows specifying filenames with "mglob" mechanism. Supported syntax in globs (wilcard matching patterns):: *.cpp ?ellowo* - obvious. Differs from normal glob in that dirs are not included. Unix users might want to write this as: "*.cpp" "?ellowo*" rec:/usr/share=*.txt,*.doc - get all *.txt and *.doc under /usr/share, recursively rec:/usr/share - All files under /usr/share, recursively rec:*.py - All .py files under current working dir, recursively foo - File or dir foo !*.bak readme* - readme*, exclude files ending with .bak !.svn/ !.hg/ !*_Data/ rec:. - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. Trailing / is the key, \ does not work! Use !.*/ for all hidden. dir:foo - the directory foo if it exists (not files in foo) dir:* - all directories in current folder foo.py bar.* !h* rec:*.py - Obvious. !h* exclusion only applies for rec:*.py. foo.py is *not* included twice. @filelist.txt - All files listed in 'filelist.txt' file, on separate lines. "cont:class \wak:" rec:*.py - Match files containing regexp. Applies to subsequent files. note quotes because of whitespace. """ __version__ = "0.2" import os,glob,fnmatch,sys,re def expand(flist,exp_dirs = False): """ Expand the glob(s) in flist. flist may be either a whitespace-separated list of globs/files or an array of globs/files. if exp_dirs is true, directory names in glob are expanded to the files contained in them - otherwise, directory names are returned as is. """ if isinstance(flist, basestring): import shlex flist = shlex.split(flist) done_set = set() denied_set = set() cont_set = set() cur_rejected_dirs = set() def recfind(p, pats = ["*"]): denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] for (dp,dnames,fnames) in os.walk(p): # see if we should ignore the whole directory dp_norm = dp.replace("\\","/") + "/" deny = False # do not traverse under already rejected dirs for d in cur_rejected_dirs: if dp.startswith(d): deny = True break if deny: continue #print "dp",dp bname = os.path.basename(dp) for deny_pat in denied_dirs: if fnmatch.fnmatch( bname, deny_pat): deny = True cur_rejected_dirs.add(dp) break if deny: continue for f in fnames: matched = False for p in pats: if fnmatch.fnmatch(f,p): matched = True break if matched: yield os.path.join(dp,f) def once_filter(seq): for it in seq: p = os.path.abspath(it) if p in done_set: continue done_set.add(p) deny = False for deny_pat in denied_set: if fnmatch.fnmatch(os.path.basename(p), deny_pat): deny = True break if cont_set: try: cont = open(p).read() except IOError: # deny continue for pat in cont_set: if not re.search(pat,cont, re.IGNORECASE): deny = True break if not deny: yield it return res = [] for ent in flist: ent = os.path.expanduser(os.path.expandvars(ent)) if ent.lower().startswith('rec:'): fields = ent[4:].split('=') if len(fields) == 2: pth, patlist = fields elif len(fields) == 1: if os.path.isdir(fields[0]): # single arg is dir pth, patlist = fields[0], '*' else: # single arg is pattern pth, patlist = '.', fields[0] elif len(fields) == 0: pth, pathlist = '.','*' pats = patlist.split(',') res.extend(once_filter(recfind(pth, pats))) # filelist elif ent.startswith('@') and os.path.isfile(ent[1:]): res.extend(once_filter(open(ent[1:]).read().splitlines())) # exclusion elif ent.startswith('!'): denied_set.add(ent[1:]) # glob only dirs elif ent.lower().startswith('dir:'): res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) elif ent.lower().startswith('cont:'): cont_set.add(ent[5:]) # get all files in the specified dir elif os.path.isdir(ent) and exp_dirs: res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) # glob only files elif '*' in ent or '?' in ent: res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) else: res.extend(once_filter([ent])) return res def test(): assert ( expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) ) def main(): if len(sys.argv) < 2: print globsyntax return print "\n".join(expand(sys.argv[1:])), def mglob_f(self, arg): from IPython.utils.text import SList if arg.strip(): return SList(expand(arg)) print "Please specify pattern!" print globsyntax def init_ipython(ip): """ register %mglob for IPython """ mglob_f.__doc__ = globsyntax ip.define_magic("mglob",mglob_f) # test() if __name__ == "__main__": main()