##// END OF EJS Templates
Turn little test into proper doctest. Cleanup and document.
Turn little test into proper doctest. Cleanup and document.

File last commit:

r1341:fbc5f8cb
r1417:e6bda754
Show More
mglob.py
231 lines | 7.5 KiB | text/x-python | PythonLexer
ville
initialization (no svn history)
r988 #!/usr/bin/env python
r""" mglob - enhanced file list expansion module
Use as stand-alone utility (for xargs, `backticks` etc.),
or a globbing library for own python programs. Globbing the sys.argv is something
that almost every Windows script has to perform manually, and this module is here
to help with that task. Also Unix users will benefit from enhanced modes
such as recursion, exclusion, directory omission...
Unlike glob.glob, directories are not included in the glob unless specified
with 'dir:'
'expand' is the function to use in python programs. Typical use
to expand argv (esp. in windows)::
try:
import mglob
files = mglob.expand(sys.argv[1:])
except ImportError:
print "mglob not found; try 'easy_install mglob' for extra features"
files = sys.argv[1:]
Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
Therefore, you might want to use quotes with normal wildcards to prevent this
expansion, in order for mglob to see the wildcards and get the wanted behaviour.
Not quoting the wildcards is harmless and typically has equivalent results, though.
Author: Ville Vainio <vivainio@gmail.com>
License: MIT Open Source license
"""
#Assigned in variable for "usage" printing convenience"
globsyntax = """\
Ville M. Vainio
%magic: -rest argument generates restructuredtext output and returns it. indent mglob docstring
r1143 This program allows specifying filenames with "mglob" mechanism.
Supported syntax in globs (wilcard matching patterns)::
*.cpp ?ellowo*
- obvious. Differs from normal glob in that dirs are not included.
Unix users might want to write this as: "*.cpp" "?ellowo*"
rec:/usr/share=*.txt,*.doc
- get all *.txt and *.doc under /usr/share,
recursively
rec:/usr/share
- All files under /usr/share, recursively
rec:*.py
- All .py files under current working dir, recursively
foo
- File or dir foo
!*.bak readme*
- readme*, exclude files ending with .bak
!.svn/ !.hg/ !*_Data/ rec:.
- Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 Trailing / is the key, \ does not work! Use !.*/ for all hidden.
Ville M. Vainio
%magic: -rest argument generates restructuredtext output and returns it. indent mglob docstring
r1143 dir:foo
- the directory foo if it exists (not files in foo)
dir:*
- all directories in current folder
foo.py bar.* !h* rec:*.py
- Obvious. !h* exclusion only applies for rec:*.py.
foo.py is *not* included twice.
@filelist.txt
- All files listed in 'filelist.txt' file, on separate lines.
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 "cont:class \wak:" rec:*.py
- Match files containing regexp. Applies to subsequent files.
note quotes because of whitespace.
ville
initialization (no svn history)
r988 """
__version__ = "0.2"
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 import os,glob,fnmatch,sys,re
ville
initialization (no svn history)
r988 from sets import Set as set
def expand(flist,exp_dirs = False):
""" Expand the glob(s) in flist.
flist may be either a whitespace-separated list of globs/files
or an array of globs/files.
if exp_dirs is true, directory names in glob are expanded to the files
contained in them - otherwise, directory names are returned as is.
"""
if isinstance(flist, basestring):
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 import shlex
flist = shlex.split(flist)
ville
initialization (no svn history)
r988 done_set = set()
denied_set = set()
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 cont_set = set()
cur_rejected_dirs = set()
ville
initialization (no svn history)
r988 def recfind(p, pats = ["*"]):
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")]
ville
initialization (no svn history)
r988 for (dp,dnames,fnames) in os.walk(p):
# see if we should ignore the whole directory
dp_norm = dp.replace("\\","/") + "/"
deny = False
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 # do not traverse under already rejected dirs
for d in cur_rejected_dirs:
if dp.startswith(d):
deny = True
break
if deny:
continue
ville
initialization (no svn history)
r988 #print "dp",dp
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 bname = os.path.basename(dp)
ville
initialization (no svn history)
r988 for deny_pat in denied_dirs:
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 if fnmatch.fnmatch( bname, deny_pat):
ville
initialization (no svn history)
r988 deny = True
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 cur_rejected_dirs.add(dp)
ville
initialization (no svn history)
r988 break
if deny:
continue
for f in fnames:
matched = False
for p in pats:
if fnmatch.fnmatch(f,p):
matched = True
break
if matched:
yield os.path.join(dp,f)
def once_filter(seq):
for it in seq:
p = os.path.abspath(it)
if p in done_set:
continue
done_set.add(p)
deny = False
for deny_pat in denied_set:
if fnmatch.fnmatch(os.path.basename(p), deny_pat):
deny = True
break
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 if cont_set:
try:
cont = open(p).read()
except IOError:
# deny
continue
for pat in cont_set:
if not re.search(pat,cont, re.IGNORECASE):
deny = True
break
ville
initialization (no svn history)
r988 if not deny:
yield it
return
res = []
for ent in flist:
ent = os.path.expanduser(os.path.expandvars(ent))
if ent.lower().startswith('rec:'):
fields = ent[4:].split('=')
if len(fields) == 2:
pth, patlist = fields
elif len(fields) == 1:
if os.path.isdir(fields[0]):
# single arg is dir
pth, patlist = fields[0], '*'
else:
# single arg is pattern
pth, patlist = '.', fields[0]
elif len(fields) == 0:
pth, pathlist = '.','*'
pats = patlist.split(',')
res.extend(once_filter(recfind(pth, pats)))
# filelist
elif ent.startswith('@') and os.path.isfile(ent[1:]):
res.extend(once_filter(open(ent[1:]).read().splitlines()))
# exclusion
elif ent.startswith('!'):
denied_set.add(ent[1:])
# glob only dirs
elif ent.lower().startswith('dir:'):
res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
Ville M. Vainio
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
r1341 elif ent.lower().startswith('cont:'):
cont_set.add(ent[5:])
ville
initialization (no svn history)
r988 # get all files in the specified dir
elif os.path.isdir(ent) and exp_dirs:
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
# glob only files
elif '*' in ent or '?' in ent:
res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
else:
res.extend(once_filter([ent]))
return res
def test():
assert (
expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
)
def main():
if len(sys.argv) < 2:
print globsyntax
return
print "\n".join(expand(sys.argv[1:])),
def mglob_f(self, arg):
from IPython.genutils import SList
if arg.strip():
return SList(expand(arg))
print "Please specify pattern!"
print globsyntax
def init_ipython(ip):
""" register %mglob for IPython """
mglob_f.__doc__ = globsyntax
ip.expose_magic("mglob",mglob_f)
# test()
if __name__ == "__main__":
main()