_mglob.py
227 lines
| 7.1 KiB
| text/x-python
|
PythonLexer
ville
|
r988 | r""" mglob - enhanced file list expansion module | ||
Bernardo B. Marques
|
r4872 | Use as stand-alone utility (for xargs, `backticks` etc.), | ||
or a globbing library for own python programs. Globbing the sys.argv is something | ||||
ville
|
r988 | that almost every Windows script has to perform manually, and this module is here | ||
Bernardo B. Marques
|
r4872 | to help with that task. Also Unix users will benefit from enhanced modes | ||
ville
|
r988 | such as recursion, exclusion, directory omission... | ||
Bernardo B. Marques
|
r4872 | Unlike glob.glob, directories are not included in the glob unless specified | ||
ville
|
r988 | with 'dir:' | ||
'expand' is the function to use in python programs. Typical use | ||||
to expand argv (esp. in windows):: | ||||
try: | ||||
Bernardo B. Marques
|
r4872 | import mglob | ||
ville
|
r988 | files = mglob.expand(sys.argv[1:]) | ||
except ImportError: | ||||
print "mglob not found; try 'easy_install mglob' for extra features" | ||||
Bernardo B. Marques
|
r4872 | files = sys.argv[1:] | ||
ville
|
r988 | |||
Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. | ||||
Bernardo B. Marques
|
r4872 | Therefore, you might want to use quotes with normal wildcards to prevent this | ||
ville
|
r988 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. | ||
Not quoting the wildcards is harmless and typically has equivalent results, though. | ||||
Author: Ville Vainio <vivainio@gmail.com> | ||||
License: MIT Open Source license | ||||
""" | ||||
#Assigned in variable for "usage" printing convenience" | ||||
globsyntax = """\ | ||||
Ville M. Vainio
|
r1143 | This program allows specifying filenames with "mglob" mechanism. | ||
Supported syntax in globs (wilcard matching patterns):: | ||||
Bernardo B. Marques
|
r4872 | |||
*.cpp ?ellowo* | ||||
Ville M. Vainio
|
r1143 | - obvious. Differs from normal glob in that dirs are not included. | ||
Unix users might want to write this as: "*.cpp" "?ellowo*" | ||||
Bernardo B. Marques
|
r4872 | rec:/usr/share=*.txt,*.doc | ||
- get all *.txt and *.doc under /usr/share, | ||||
Ville M. Vainio
|
r1143 | recursively | ||
rec:/usr/share | ||||
- All files under /usr/share, recursively | ||||
rec:*.py | ||||
- All .py files under current working dir, recursively | ||||
Bernardo B. Marques
|
r4872 | foo | ||
Ville M. Vainio
|
r1143 | - File or dir foo | ||
Bernardo B. Marques
|
r4872 | !*.bak readme* | ||
Ville M. Vainio
|
r1143 | - readme*, exclude files ending with .bak | ||
!.svn/ !.hg/ !*_Data/ rec:. | ||||
- Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. | ||||
Ville M. Vainio
|
r1341 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. | ||
Bernardo B. Marques
|
r4872 | dir:foo | ||
Ville M. Vainio
|
r1143 | - the directory foo if it exists (not files in foo) | ||
Bernardo B. Marques
|
r4872 | dir:* | ||
Ville M. Vainio
|
r1143 | - all directories in current folder | ||
foo.py bar.* !h* rec:*.py | ||||
- Obvious. !h* exclusion only applies for rec:*.py. | ||||
foo.py is *not* included twice. | ||||
@filelist.txt | ||||
- All files listed in 'filelist.txt' file, on separate lines. | ||||
Ville M. Vainio
|
r1341 | "cont:class \wak:" rec:*.py | ||
- Match files containing regexp. Applies to subsequent files. | ||||
note quotes because of whitespace. | ||||
ville
|
r988 | """ | ||
__version__ = "0.2" | ||||
Ville M. Vainio
|
r1341 | import os,glob,fnmatch,sys,re | ||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | def expand(flist,exp_dirs = False): | ||
""" Expand the glob(s) in flist. | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | flist may be either a whitespace-separated list of globs/files | ||
or an array of globs/files. | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | if exp_dirs is true, directory names in glob are expanded to the files | ||
contained in them - otherwise, directory names are returned as is. | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | """ | ||
if isinstance(flist, basestring): | ||||
Ville M. Vainio
|
r1341 | import shlex | ||
flist = shlex.split(flist) | ||||
ville
|
r988 | done_set = set() | ||
denied_set = set() | ||||
Ville M. Vainio
|
r1341 | cont_set = set() | ||
cur_rejected_dirs = set() | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | def recfind(p, pats = ["*"]): | ||
Ville M. Vainio
|
r1341 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] | ||
ville
|
r988 | for (dp,dnames,fnames) in os.walk(p): | ||
# see if we should ignore the whole directory | ||||
dp_norm = dp.replace("\\","/") + "/" | ||||
deny = False | ||||
Ville M. Vainio
|
r1341 | # do not traverse under already rejected dirs | ||
for d in cur_rejected_dirs: | ||||
if dp.startswith(d): | ||||
deny = True | ||||
break | ||||
if deny: | ||||
continue | ||||
Bernardo B. Marques
|
r4872 | |||
Ville M. Vainio
|
r1341 | |||
ville
|
r988 | #print "dp",dp | ||
Ville M. Vainio
|
r1341 | bname = os.path.basename(dp) | ||
ville
|
r988 | for deny_pat in denied_dirs: | ||
Ville M. Vainio
|
r1341 | if fnmatch.fnmatch( bname, deny_pat): | ||
ville
|
r988 | deny = True | ||
Ville M. Vainio
|
r1341 | cur_rejected_dirs.add(dp) | ||
ville
|
r988 | break | ||
if deny: | ||||
continue | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | for f in fnames: | ||
matched = False | ||||
for p in pats: | ||||
if fnmatch.fnmatch(f,p): | ||||
matched = True | ||||
break | ||||
if matched: | ||||
Bernardo B. Marques
|
r4872 | yield os.path.join(dp,f) | ||
ville
|
r988 | |||
def once_filter(seq): | ||||
for it in seq: | ||||
p = os.path.abspath(it) | ||||
if p in done_set: | ||||
continue | ||||
done_set.add(p) | ||||
deny = False | ||||
for deny_pat in denied_set: | ||||
if fnmatch.fnmatch(os.path.basename(p), deny_pat): | ||||
deny = True | ||||
break | ||||
Ville M. Vainio
|
r1341 | if cont_set: | ||
try: | ||||
cont = open(p).read() | ||||
except IOError: | ||||
# deny | ||||
continue | ||||
for pat in cont_set: | ||||
if not re.search(pat,cont, re.IGNORECASE): | ||||
deny = True | ||||
break | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | if not deny: | ||
yield it | ||||
return | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | res = [] | ||
for ent in flist: | ||||
ent = os.path.expanduser(os.path.expandvars(ent)) | ||||
if ent.lower().startswith('rec:'): | ||||
Bernardo B. Marques
|
r4872 | fields = ent[4:].split('=') | ||
ville
|
r988 | if len(fields) == 2: | ||
pth, patlist = fields | ||||
elif len(fields) == 1: | ||||
if os.path.isdir(fields[0]): | ||||
# single arg is dir | ||||
pth, patlist = fields[0], '*' | ||||
Bernardo B. Marques
|
r4872 | else: | ||
ville
|
r988 | # single arg is pattern | ||
pth, patlist = '.', fields[0] | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | elif len(fields) == 0: | ||
pth, pathlist = '.','*' | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | pats = patlist.split(',') | ||
res.extend(once_filter(recfind(pth, pats))) | ||||
# filelist | ||||
elif ent.startswith('@') and os.path.isfile(ent[1:]): | ||||
res.extend(once_filter(open(ent[1:]).read().splitlines())) | ||||
# exclusion | ||||
elif ent.startswith('!'): | ||||
denied_set.add(ent[1:]) | ||||
# glob only dirs | ||||
elif ent.lower().startswith('dir:'): | ||||
res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) | ||||
Ville M. Vainio
|
r1341 | elif ent.lower().startswith('cont:'): | ||
cont_set.add(ent[5:]) | ||||
ville
|
r988 | # get all files in the specified dir | ||
elif os.path.isdir(ent) and exp_dirs: | ||||
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | # glob only files | ||
elif '*' in ent or '?' in ent: | ||||
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) | ||||
else: | ||||
res.extend(once_filter([ent])) | ||||
return res | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | def test(): | ||
assert ( | ||||
Bernardo B. Marques
|
r4872 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == | ||
expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) | ||||
ville
|
r988 | ) | ||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | def main(): | ||
if len(sys.argv) < 2: | ||||
print globsyntax | ||||
return | ||||
Bernardo B. Marques
|
r4872 | |||
ville
|
r988 | print "\n".join(expand(sys.argv[1:])), | ||
def mglob_f(self, arg): | ||||
Brian Granger
|
r2498 | from IPython.utils.text import SList | ||
ville
|
r988 | if arg.strip(): | ||
return SList(expand(arg)) | ||||
print "Please specify pattern!" | ||||
print globsyntax | ||||
def init_ipython(ip): | ||||
""" register %mglob for IPython """ | ||||
mglob_f.__doc__ = globsyntax | ||||
Bernardo B. Marques
|
r4872 | ip.define_magic("mglob",mglob_f) | ||
ville
|
r988 | # test() | ||
if __name__ == "__main__": | ||||
main() | ||||