##// END OF EJS Templates
Update mglob to new magic API.
Fernando Perez -
Show More
@@ -1,227 +1,233 b''
1 r""" mglob - enhanced file list expansion module
1 r""" mglob - enhanced file list expansion module
2
2
3 Use as stand-alone utility (for xargs, `backticks` etc.),
3 Use as stand-alone utility (for xargs, `backticks` etc.),
4 or a globbing library for own python programs. Globbing the sys.argv is something
4 or a globbing library for own python programs. Globbing the sys.argv is something
5 that almost every Windows script has to perform manually, and this module is here
5 that almost every Windows script has to perform manually, and this module is here
6 to help with that task. Also Unix users will benefit from enhanced modes
6 to help with that task. Also Unix users will benefit from enhanced modes
7 such as recursion, exclusion, directory omission...
7 such as recursion, exclusion, directory omission...
8
8
9 Unlike glob.glob, directories are not included in the glob unless specified
9 Unlike glob.glob, directories are not included in the glob unless specified
10 with 'dir:'
10 with 'dir:'
11
11
12 'expand' is the function to use in python programs. Typical use
12 'expand' is the function to use in python programs. Typical use
13 to expand argv (esp. in windows)::
13 to expand argv (esp. in windows)::
14
14
15 try:
15 try:
16 import mglob
16 import mglob
17 files = mglob.expand(sys.argv[1:])
17 files = mglob.expand(sys.argv[1:])
18 except ImportError:
18 except ImportError:
19 print "mglob not found; try 'easy_install mglob' for extra features"
19 print "mglob not found; try 'easy_install mglob' for extra features"
20 files = sys.argv[1:]
20 files = sys.argv[1:]
21
21
22 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
22 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
23 Therefore, you might want to use quotes with normal wildcards to prevent this
23 Therefore, you might want to use quotes with normal wildcards to prevent this
24 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
24 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
25 Not quoting the wildcards is harmless and typically has equivalent results, though.
25 Not quoting the wildcards is harmless and typically has equivalent results, though.
26
26
27 Author: Ville Vainio <vivainio@gmail.com>
27 Author: Ville Vainio <vivainio@gmail.com>
28 License: MIT Open Source license
28 License: MIT Open Source license
29
29
30 """
30 """
31
31
32 #Assigned in variable for "usage" printing convenience"
32 #Assigned in variable for "usage" printing convenience"
33
33
34 globsyntax = """\
34 globsyntax = """\
35 This program allows specifying filenames with "mglob" mechanism.
35 This program allows specifying filenames with "mglob" mechanism.
36 Supported syntax in globs (wilcard matching patterns)::
36 Supported syntax in globs (wilcard matching patterns)::
37
37
38 *.cpp ?ellowo*
38 *.cpp ?ellowo*
39 - obvious. Differs from normal glob in that dirs are not included.
39 - obvious. Differs from normal glob in that dirs are not included.
40 Unix users might want to write this as: "*.cpp" "?ellowo*"
40 Unix users might want to write this as: "*.cpp" "?ellowo*"
41 rec:/usr/share=*.txt,*.doc
41 rec:/usr/share=*.txt,*.doc
42 - get all *.txt and *.doc under /usr/share,
42 - get all *.txt and *.doc under /usr/share,
43 recursively
43 recursively
44 rec:/usr/share
44 rec:/usr/share
45 - All files under /usr/share, recursively
45 - All files under /usr/share, recursively
46 rec:*.py
46 rec:*.py
47 - All .py files under current working dir, recursively
47 - All .py files under current working dir, recursively
48 foo
48 foo
49 - File or dir foo
49 - File or dir foo
50 !*.bak readme*
50 !*.bak readme*
51 - readme*, exclude files ending with .bak
51 - readme*, exclude files ending with .bak
52 !.svn/ !.hg/ !*_Data/ rec:.
52 !.svn/ !.hg/ !*_Data/ rec:.
53 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
53 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
54 Trailing / is the key, \ does not work! Use !.*/ for all hidden.
54 Trailing / is the key, \ does not work! Use !.*/ for all hidden.
55 dir:foo
55 dir:foo
56 - the directory foo if it exists (not files in foo)
56 - the directory foo if it exists (not files in foo)
57 dir:*
57 dir:*
58 - all directories in current folder
58 - all directories in current folder
59 foo.py bar.* !h* rec:*.py
59 foo.py bar.* !h* rec:*.py
60 - Obvious. !h* exclusion only applies for rec:*.py.
60 - Obvious. !h* exclusion only applies for rec:*.py.
61 foo.py is *not* included twice.
61 foo.py is *not* included twice.
62 @filelist.txt
62 @filelist.txt
63 - All files listed in 'filelist.txt' file, on separate lines.
63 - All files listed in 'filelist.txt' file, on separate lines.
64 "cont:class \wak:" rec:*.py
64 "cont:class \wak:" rec:*.py
65 - Match files containing regexp. Applies to subsequent files.
65 - Match files containing regexp. Applies to subsequent files.
66 note quotes because of whitespace.
66 note quotes because of whitespace.
67 """
67 """
68
68
69
69
70 __version__ = "0.2"
70 __version__ = "0.2"
71
71
72
72
73 import os,glob,fnmatch,sys,re
73 import os,glob,fnmatch,sys,re
74
74
75 def expand(flist,exp_dirs = False):
75 def expand(flist,exp_dirs = False):
76 """ Expand the glob(s) in flist.
76 """ Expand the glob(s) in flist.
77
77
78 flist may be either a whitespace-separated list of globs/files
78 flist may be either a whitespace-separated list of globs/files
79 or an array of globs/files.
79 or an array of globs/files.
80
80
81 if exp_dirs is true, directory names in glob are expanded to the files
81 if exp_dirs is true, directory names in glob are expanded to the files
82 contained in them - otherwise, directory names are returned as is.
82 contained in them - otherwise, directory names are returned as is.
83
83
84 """
84 """
85 if isinstance(flist, basestring):
85 if isinstance(flist, basestring):
86 import shlex
86 import shlex
87 flist = shlex.split(flist)
87 flist = shlex.split(flist)
88 done_set = set()
88 done_set = set()
89 denied_set = set()
89 denied_set = set()
90 cont_set = set()
90 cont_set = set()
91 cur_rejected_dirs = set()
91 cur_rejected_dirs = set()
92
92
93 def recfind(p, pats = ["*"]):
93 def recfind(p, pats = ["*"]):
94 denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")]
94 denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")]
95 for (dp,dnames,fnames) in os.walk(p):
95 for (dp,dnames,fnames) in os.walk(p):
96 # see if we should ignore the whole directory
96 # see if we should ignore the whole directory
97 dp_norm = dp.replace("\\","/") + "/"
97 dp_norm = dp.replace("\\","/") + "/"
98 deny = False
98 deny = False
99 # do not traverse under already rejected dirs
99 # do not traverse under already rejected dirs
100 for d in cur_rejected_dirs:
100 for d in cur_rejected_dirs:
101 if dp.startswith(d):
101 if dp.startswith(d):
102 deny = True
102 deny = True
103 break
103 break
104 if deny:
104 if deny:
105 continue
105 continue
106
106
107
107
108 #print "dp",dp
108 #print "dp",dp
109 bname = os.path.basename(dp)
109 bname = os.path.basename(dp)
110 for deny_pat in denied_dirs:
110 for deny_pat in denied_dirs:
111 if fnmatch.fnmatch( bname, deny_pat):
111 if fnmatch.fnmatch( bname, deny_pat):
112 deny = True
112 deny = True
113 cur_rejected_dirs.add(dp)
113 cur_rejected_dirs.add(dp)
114 break
114 break
115 if deny:
115 if deny:
116 continue
116 continue
117
117
118
118
119 for f in fnames:
119 for f in fnames:
120 matched = False
120 matched = False
121 for p in pats:
121 for p in pats:
122 if fnmatch.fnmatch(f,p):
122 if fnmatch.fnmatch(f,p):
123 matched = True
123 matched = True
124 break
124 break
125 if matched:
125 if matched:
126 yield os.path.join(dp,f)
126 yield os.path.join(dp,f)
127
127
128 def once_filter(seq):
128 def once_filter(seq):
129 for it in seq:
129 for it in seq:
130 p = os.path.abspath(it)
130 p = os.path.abspath(it)
131 if p in done_set:
131 if p in done_set:
132 continue
132 continue
133 done_set.add(p)
133 done_set.add(p)
134 deny = False
134 deny = False
135 for deny_pat in denied_set:
135 for deny_pat in denied_set:
136 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
136 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
137 deny = True
137 deny = True
138 break
138 break
139 if cont_set:
139 if cont_set:
140 try:
140 try:
141 cont = open(p).read()
141 cont = open(p).read()
142 except IOError:
142 except IOError:
143 # deny
143 # deny
144 continue
144 continue
145 for pat in cont_set:
145 for pat in cont_set:
146 if not re.search(pat,cont, re.IGNORECASE):
146 if not re.search(pat,cont, re.IGNORECASE):
147 deny = True
147 deny = True
148 break
148 break
149
149
150 if not deny:
150 if not deny:
151 yield it
151 yield it
152 return
152 return
153
153
154 res = []
154 res = []
155
155
156 for ent in flist:
156 for ent in flist:
157 ent = os.path.expanduser(os.path.expandvars(ent))
157 ent = os.path.expanduser(os.path.expandvars(ent))
158 if ent.lower().startswith('rec:'):
158 if ent.lower().startswith('rec:'):
159 fields = ent[4:].split('=')
159 fields = ent[4:].split('=')
160 if len(fields) == 2:
160 if len(fields) == 2:
161 pth, patlist = fields
161 pth, patlist = fields
162 elif len(fields) == 1:
162 elif len(fields) == 1:
163 if os.path.isdir(fields[0]):
163 if os.path.isdir(fields[0]):
164 # single arg is dir
164 # single arg is dir
165 pth, patlist = fields[0], '*'
165 pth, patlist = fields[0], '*'
166 else:
166 else:
167 # single arg is pattern
167 # single arg is pattern
168 pth, patlist = '.', fields[0]
168 pth, patlist = '.', fields[0]
169
169
170 elif len(fields) == 0:
170 elif len(fields) == 0:
171 pth, pathlist = '.','*'
171 pth, pathlist = '.','*'
172
172
173 pats = patlist.split(',')
173 pats = patlist.split(',')
174 res.extend(once_filter(recfind(pth, pats)))
174 res.extend(once_filter(recfind(pth, pats)))
175 # filelist
175 # filelist
176 elif ent.startswith('@') and os.path.isfile(ent[1:]):
176 elif ent.startswith('@') and os.path.isfile(ent[1:]):
177 res.extend(once_filter(open(ent[1:]).read().splitlines()))
177 res.extend(once_filter(open(ent[1:]).read().splitlines()))
178 # exclusion
178 # exclusion
179 elif ent.startswith('!'):
179 elif ent.startswith('!'):
180 denied_set.add(ent[1:])
180 denied_set.add(ent[1:])
181 # glob only dirs
181 # glob only dirs
182 elif ent.lower().startswith('dir:'):
182 elif ent.lower().startswith('dir:'):
183 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
183 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
184 elif ent.lower().startswith('cont:'):
184 elif ent.lower().startswith('cont:'):
185 cont_set.add(ent[5:])
185 cont_set.add(ent[5:])
186 # get all files in the specified dir
186 # get all files in the specified dir
187 elif os.path.isdir(ent) and exp_dirs:
187 elif os.path.isdir(ent) and exp_dirs:
188 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
188 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
189
189
190 # glob only files
190 # glob only files
191
191
192 elif '*' in ent or '?' in ent:
192 elif '*' in ent or '?' in ent:
193 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
193 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
194
194
195 else:
195 else:
196 res.extend(once_filter([ent]))
196 res.extend(once_filter([ent]))
197 return res
197 return res
198
198
199
199
200 def test():
200 def test():
201 assert (
201 assert (
202 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
202 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
203 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
203 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
204 )
204 )
205
205
206 def main():
206 def main():
207 if len(sys.argv) < 2:
207 if len(sys.argv) < 2:
208 print globsyntax
208 print globsyntax
209 return
209 return
210
210
211 print "\n".join(expand(sys.argv[1:])),
211 print "\n".join(expand(sys.argv[1:])),
212
212
213 def mglob_f(self, arg):
213
214 def mglob(self, arg):
214 from IPython.utils.text import SList
215 from IPython.utils.text import SList
215 if arg.strip():
216 if arg.strip():
216 return SList(expand(arg))
217 return SList(expand(arg))
217 print "Please specify pattern!"
218 print "Please specify pattern!"
218 print globsyntax
219 print globsyntax
219
220
221
222 mglob.__doc__ = globsyntax
223
224
220 def init_ipython(ip):
225 def init_ipython(ip):
221 """ register %mglob for IPython """
226 """ register %mglob for IPython """
222 mglob_f.__doc__ = globsyntax
227
223 ip.define_magic("mglob",mglob_f)
228 ip.function_as_magic(mglob)
229
224
230
225 # test()
231 # test()
226 if __name__ == "__main__":
232 if __name__ == "__main__":
227 main()
233 main()
General Comments 0
You need to be logged in to leave comments. Login now