mglob.py
229 lines
| 7.5 KiB
| text/x-python
|
PythonLexer
ville
|
r988 | #!/usr/bin/env python | ||
r""" mglob - enhanced file list expansion module | ||||
Use as stand-alone utility (for xargs, `backticks` etc.), | ||||
or a globbing library for own python programs. Globbing the sys.argv is something | ||||
that almost every Windows script has to perform manually, and this module is here | ||||
to help with that task. Also Unix users will benefit from enhanced modes | ||||
such as recursion, exclusion, directory omission... | ||||
Unlike glob.glob, directories are not included in the glob unless specified | ||||
with 'dir:' | ||||
'expand' is the function to use in python programs. Typical use | ||||
to expand argv (esp. in windows):: | ||||
try: | ||||
import mglob | ||||
files = mglob.expand(sys.argv[1:]) | ||||
except ImportError: | ||||
print "mglob not found; try 'easy_install mglob' for extra features" | ||||
files = sys.argv[1:] | ||||
Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. | ||||
Therefore, you might want to use quotes with normal wildcards to prevent this | ||||
expansion, in order for mglob to see the wildcards and get the wanted behaviour. | ||||
Not quoting the wildcards is harmless and typically has equivalent results, though. | ||||
Author: Ville Vainio <vivainio@gmail.com> | ||||
License: MIT Open Source license | ||||
""" | ||||
#Assigned in variable for "usage" printing convenience" | ||||
globsyntax = """\ | ||||
Ville M. Vainio
|
r1143 | This program allows specifying filenames with "mglob" mechanism. | ||
Supported syntax in globs (wilcard matching patterns):: | ||||
*.cpp ?ellowo* | ||||
- obvious. Differs from normal glob in that dirs are not included. | ||||
Unix users might want to write this as: "*.cpp" "?ellowo*" | ||||
rec:/usr/share=*.txt,*.doc | ||||
- get all *.txt and *.doc under /usr/share, | ||||
recursively | ||||
rec:/usr/share | ||||
- All files under /usr/share, recursively | ||||
rec:*.py | ||||
- All .py files under current working dir, recursively | ||||
foo | ||||
- File or dir foo | ||||
!*.bak readme* | ||||
- readme*, exclude files ending with .bak | ||||
!.svn/ !.hg/ !*_Data/ rec:. | ||||
- Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. | ||||
Ville M. Vainio
|
r1341 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. | ||
Ville M. Vainio
|
r1143 | dir:foo | ||
- the directory foo if it exists (not files in foo) | ||||
dir:* | ||||
- all directories in current folder | ||||
foo.py bar.* !h* rec:*.py | ||||
- Obvious. !h* exclusion only applies for rec:*.py. | ||||
foo.py is *not* included twice. | ||||
@filelist.txt | ||||
- All files listed in 'filelist.txt' file, on separate lines. | ||||
Ville M. Vainio
|
r1341 | "cont:class \wak:" rec:*.py | ||
- Match files containing regexp. Applies to subsequent files. | ||||
note quotes because of whitespace. | ||||
ville
|
r988 | """ | ||
__version__ = "0.2" | ||||
Ville M. Vainio
|
r1341 | import os,glob,fnmatch,sys,re | ||
ville
|
r988 | |||
def expand(flist,exp_dirs = False): | ||||
""" Expand the glob(s) in flist. | ||||
flist may be either a whitespace-separated list of globs/files | ||||
or an array of globs/files. | ||||
if exp_dirs is true, directory names in glob are expanded to the files | ||||
contained in them - otherwise, directory names are returned as is. | ||||
""" | ||||
if isinstance(flist, basestring): | ||||
Ville M. Vainio
|
r1341 | import shlex | ||
flist = shlex.split(flist) | ||||
ville
|
r988 | done_set = set() | ||
denied_set = set() | ||||
Ville M. Vainio
|
r1341 | cont_set = set() | ||
cur_rejected_dirs = set() | ||||
ville
|
r988 | def recfind(p, pats = ["*"]): | ||
Ville M. Vainio
|
r1341 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] | ||
ville
|
r988 | for (dp,dnames,fnames) in os.walk(p): | ||
# see if we should ignore the whole directory | ||||
dp_norm = dp.replace("\\","/") + "/" | ||||
deny = False | ||||
Ville M. Vainio
|
r1341 | # do not traverse under already rejected dirs | ||
for d in cur_rejected_dirs: | ||||
if dp.startswith(d): | ||||
deny = True | ||||
break | ||||
if deny: | ||||
continue | ||||
ville
|
r988 | #print "dp",dp | ||
Ville M. Vainio
|
r1341 | bname = os.path.basename(dp) | ||
ville
|
r988 | for deny_pat in denied_dirs: | ||
Ville M. Vainio
|
r1341 | if fnmatch.fnmatch( bname, deny_pat): | ||
ville
|
r988 | deny = True | ||
Ville M. Vainio
|
r1341 | cur_rejected_dirs.add(dp) | ||
ville
|
r988 | break | ||
if deny: | ||||
continue | ||||
for f in fnames: | ||||
matched = False | ||||
for p in pats: | ||||
if fnmatch.fnmatch(f,p): | ||||
matched = True | ||||
break | ||||
if matched: | ||||
yield os.path.join(dp,f) | ||||
def once_filter(seq): | ||||
for it in seq: | ||||
p = os.path.abspath(it) | ||||
if p in done_set: | ||||
continue | ||||
done_set.add(p) | ||||
deny = False | ||||
for deny_pat in denied_set: | ||||
if fnmatch.fnmatch(os.path.basename(p), deny_pat): | ||||
deny = True | ||||
break | ||||
Ville M. Vainio
|
r1341 | if cont_set: | ||
try: | ||||
cont = open(p).read() | ||||
except IOError: | ||||
# deny | ||||
continue | ||||
for pat in cont_set: | ||||
if not re.search(pat,cont, re.IGNORECASE): | ||||
deny = True | ||||
break | ||||
ville
|
r988 | if not deny: | ||
yield it | ||||
return | ||||
res = [] | ||||
for ent in flist: | ||||
ent = os.path.expanduser(os.path.expandvars(ent)) | ||||
if ent.lower().startswith('rec:'): | ||||
fields = ent[4:].split('=') | ||||
if len(fields) == 2: | ||||
pth, patlist = fields | ||||
elif len(fields) == 1: | ||||
if os.path.isdir(fields[0]): | ||||
# single arg is dir | ||||
pth, patlist = fields[0], '*' | ||||
else: | ||||
# single arg is pattern | ||||
pth, patlist = '.', fields[0] | ||||
elif len(fields) == 0: | ||||
pth, pathlist = '.','*' | ||||
pats = patlist.split(',') | ||||
res.extend(once_filter(recfind(pth, pats))) | ||||
# filelist | ||||
elif ent.startswith('@') and os.path.isfile(ent[1:]): | ||||
res.extend(once_filter(open(ent[1:]).read().splitlines())) | ||||
# exclusion | ||||
elif ent.startswith('!'): | ||||
denied_set.add(ent[1:]) | ||||
# glob only dirs | ||||
elif ent.lower().startswith('dir:'): | ||||
res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) | ||||
Ville M. Vainio
|
r1341 | elif ent.lower().startswith('cont:'): | ||
cont_set.add(ent[5:]) | ||||
ville
|
r988 | # get all files in the specified dir | ||
elif os.path.isdir(ent) and exp_dirs: | ||||
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) | ||||
# glob only files | ||||
elif '*' in ent or '?' in ent: | ||||
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) | ||||
else: | ||||
res.extend(once_filter([ent])) | ||||
return res | ||||
def test(): | ||||
assert ( | ||||
expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == | ||||
expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) | ||||
) | ||||
def main(): | ||||
if len(sys.argv) < 2: | ||||
print globsyntax | ||||
return | ||||
print "\n".join(expand(sys.argv[1:])), | ||||
def mglob_f(self, arg): | ||||
from IPython.genutils import SList | ||||
if arg.strip(): | ||||
return SList(expand(arg)) | ||||
print "Please specify pattern!" | ||||
print globsyntax | ||||
def init_ipython(ip): | ||||
""" register %mglob for IPython """ | ||||
mglob_f.__doc__ = globsyntax | ||||
ip.expose_magic("mglob",mglob_f) | ||||
# test() | ||||
if __name__ == "__main__": | ||||
main() | ||||