##// END OF EJS Templates
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
Ville M. Vainio -
Show More
@@ -1,203 +1,231
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 r""" mglob - enhanced file list expansion module
3 r""" mglob - enhanced file list expansion module
4
4
5 Use as stand-alone utility (for xargs, `backticks` etc.),
5 Use as stand-alone utility (for xargs, `backticks` etc.),
6 or a globbing library for own python programs. Globbing the sys.argv is something
6 or a globbing library for own python programs. Globbing the sys.argv is something
7 that almost every Windows script has to perform manually, and this module is here
7 that almost every Windows script has to perform manually, and this module is here
8 to help with that task. Also Unix users will benefit from enhanced modes
8 to help with that task. Also Unix users will benefit from enhanced modes
9 such as recursion, exclusion, directory omission...
9 such as recursion, exclusion, directory omission...
10
10
11 Unlike glob.glob, directories are not included in the glob unless specified
11 Unlike glob.glob, directories are not included in the glob unless specified
12 with 'dir:'
12 with 'dir:'
13
13
14 'expand' is the function to use in python programs. Typical use
14 'expand' is the function to use in python programs. Typical use
15 to expand argv (esp. in windows)::
15 to expand argv (esp. in windows)::
16
16
17 try:
17 try:
18 import mglob
18 import mglob
19 files = mglob.expand(sys.argv[1:])
19 files = mglob.expand(sys.argv[1:])
20 except ImportError:
20 except ImportError:
21 print "mglob not found; try 'easy_install mglob' for extra features"
21 print "mglob not found; try 'easy_install mglob' for extra features"
22 files = sys.argv[1:]
22 files = sys.argv[1:]
23
23
24 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
24 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
25 Therefore, you might want to use quotes with normal wildcards to prevent this
25 Therefore, you might want to use quotes with normal wildcards to prevent this
26 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
26 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
27 Not quoting the wildcards is harmless and typically has equivalent results, though.
27 Not quoting the wildcards is harmless and typically has equivalent results, though.
28
28
29 Author: Ville Vainio <vivainio@gmail.com>
29 Author: Ville Vainio <vivainio@gmail.com>
30 License: MIT Open Source license
30 License: MIT Open Source license
31
31
32 """
32 """
33
33
34 #Assigned in variable for "usage" printing convenience"
34 #Assigned in variable for "usage" printing convenience"
35
35
36 globsyntax = """\
36 globsyntax = """\
37 This program allows specifying filenames with "mglob" mechanism.
37 This program allows specifying filenames with "mglob" mechanism.
38 Supported syntax in globs (wilcard matching patterns)::
38 Supported syntax in globs (wilcard matching patterns)::
39
39
40 *.cpp ?ellowo*
40 *.cpp ?ellowo*
41 - obvious. Differs from normal glob in that dirs are not included.
41 - obvious. Differs from normal glob in that dirs are not included.
42 Unix users might want to write this as: "*.cpp" "?ellowo*"
42 Unix users might want to write this as: "*.cpp" "?ellowo*"
43 rec:/usr/share=*.txt,*.doc
43 rec:/usr/share=*.txt,*.doc
44 - get all *.txt and *.doc under /usr/share,
44 - get all *.txt and *.doc under /usr/share,
45 recursively
45 recursively
46 rec:/usr/share
46 rec:/usr/share
47 - All files under /usr/share, recursively
47 - All files under /usr/share, recursively
48 rec:*.py
48 rec:*.py
49 - All .py files under current working dir, recursively
49 - All .py files under current working dir, recursively
50 foo
50 foo
51 - File or dir foo
51 - File or dir foo
52 !*.bak readme*
52 !*.bak readme*
53 - readme*, exclude files ending with .bak
53 - readme*, exclude files ending with .bak
54 !.svn/ !.hg/ !*_Data/ rec:.
54 !.svn/ !.hg/ !*_Data/ rec:.
55 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
55 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
56 Trailing / is the key, \ does not work!
56 Trailing / is the key, \ does not work! Use !.*/ for all hidden.
57 dir:foo
57 dir:foo
58 - the directory foo if it exists (not files in foo)
58 - the directory foo if it exists (not files in foo)
59 dir:*
59 dir:*
60 - all directories in current folder
60 - all directories in current folder
61 foo.py bar.* !h* rec:*.py
61 foo.py bar.* !h* rec:*.py
62 - Obvious. !h* exclusion only applies for rec:*.py.
62 - Obvious. !h* exclusion only applies for rec:*.py.
63 foo.py is *not* included twice.
63 foo.py is *not* included twice.
64 @filelist.txt
64 @filelist.txt
65 - All files listed in 'filelist.txt' file, on separate lines.
65 - All files listed in 'filelist.txt' file, on separate lines.
66 "cont:class \wak:" rec:*.py
67 - Match files containing regexp. Applies to subsequent files.
68 note quotes because of whitespace.
66 """
69 """
67
70
68
71
69 __version__ = "0.2"
72 __version__ = "0.2"
70
73
71
74
72 import os,glob,fnmatch,sys
75 import os,glob,fnmatch,sys,re
73 from sets import Set as set
76 from sets import Set as set
74
77
75
78
76 def expand(flist,exp_dirs = False):
79 def expand(flist,exp_dirs = False):
77 """ Expand the glob(s) in flist.
80 """ Expand the glob(s) in flist.
78
81
79 flist may be either a whitespace-separated list of globs/files
82 flist may be either a whitespace-separated list of globs/files
80 or an array of globs/files.
83 or an array of globs/files.
81
84
82 if exp_dirs is true, directory names in glob are expanded to the files
85 if exp_dirs is true, directory names in glob are expanded to the files
83 contained in them - otherwise, directory names are returned as is.
86 contained in them - otherwise, directory names are returned as is.
84
87
85 """
88 """
86 if isinstance(flist, basestring):
89 if isinstance(flist, basestring):
87 flist = flist.split()
90 import shlex
91 flist = shlex.split(flist)
88 done_set = set()
92 done_set = set()
89 denied_set = set()
93 denied_set = set()
94 cont_set = set()
95 cur_rejected_dirs = set()
90
96
91 def recfind(p, pats = ["*"]):
97 def recfind(p, pats = ["*"]):
92 denied_dirs = ["*" + d+"*" for d in denied_set if d.endswith("/")]
98 denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")]
93 #print "de", denied_dirs
94 for (dp,dnames,fnames) in os.walk(p):
99 for (dp,dnames,fnames) in os.walk(p):
95 # see if we should ignore the whole directory
100 # see if we should ignore the whole directory
96 dp_norm = dp.replace("\\","/") + "/"
101 dp_norm = dp.replace("\\","/") + "/"
97 deny = False
102 deny = False
103 # do not traverse under already rejected dirs
104 for d in cur_rejected_dirs:
105 if dp.startswith(d):
106 deny = True
107 break
108 if deny:
109 continue
110
111
98 #print "dp",dp
112 #print "dp",dp
113 bname = os.path.basename(dp)
99 for deny_pat in denied_dirs:
114 for deny_pat in denied_dirs:
100 if fnmatch.fnmatch( dp_norm, deny_pat):
115 if fnmatch.fnmatch( bname, deny_pat):
101 deny = True
116 deny = True
117 cur_rejected_dirs.add(dp)
102 break
118 break
103 if deny:
119 if deny:
104 continue
120 continue
105
121
106
122
107 for f in fnames:
123 for f in fnames:
108 matched = False
124 matched = False
109 for p in pats:
125 for p in pats:
110 if fnmatch.fnmatch(f,p):
126 if fnmatch.fnmatch(f,p):
111 matched = True
127 matched = True
112 break
128 break
113 if matched:
129 if matched:
114 yield os.path.join(dp,f)
130 yield os.path.join(dp,f)
115
131
116 def once_filter(seq):
132 def once_filter(seq):
117 for it in seq:
133 for it in seq:
118 p = os.path.abspath(it)
134 p = os.path.abspath(it)
119 if p in done_set:
135 if p in done_set:
120 continue
136 continue
121 done_set.add(p)
137 done_set.add(p)
122 deny = False
138 deny = False
123 for deny_pat in denied_set:
139 for deny_pat in denied_set:
124 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
140 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
125 deny = True
141 deny = True
126 break
142 break
143 if cont_set:
144 try:
145 cont = open(p).read()
146 except IOError:
147 # deny
148 continue
149 for pat in cont_set:
150 if not re.search(pat,cont, re.IGNORECASE):
151 deny = True
152 break
153
127 if not deny:
154 if not deny:
128 yield it
155 yield it
129 return
156 return
130
157
131 res = []
158 res = []
132
159
133 for ent in flist:
160 for ent in flist:
134 ent = os.path.expanduser(os.path.expandvars(ent))
161 ent = os.path.expanduser(os.path.expandvars(ent))
135 if ent.lower().startswith('rec:'):
162 if ent.lower().startswith('rec:'):
136 fields = ent[4:].split('=')
163 fields = ent[4:].split('=')
137 if len(fields) == 2:
164 if len(fields) == 2:
138 pth, patlist = fields
165 pth, patlist = fields
139 elif len(fields) == 1:
166 elif len(fields) == 1:
140 if os.path.isdir(fields[0]):
167 if os.path.isdir(fields[0]):
141 # single arg is dir
168 # single arg is dir
142 pth, patlist = fields[0], '*'
169 pth, patlist = fields[0], '*'
143 else:
170 else:
144 # single arg is pattern
171 # single arg is pattern
145 pth, patlist = '.', fields[0]
172 pth, patlist = '.', fields[0]
146
173
147 elif len(fields) == 0:
174 elif len(fields) == 0:
148 pth, pathlist = '.','*'
175 pth, pathlist = '.','*'
149
176
150 pats = patlist.split(',')
177 pats = patlist.split(',')
151 res.extend(once_filter(recfind(pth, pats)))
178 res.extend(once_filter(recfind(pth, pats)))
152 # filelist
179 # filelist
153 elif ent.startswith('@') and os.path.isfile(ent[1:]):
180 elif ent.startswith('@') and os.path.isfile(ent[1:]):
154 res.extend(once_filter(open(ent[1:]).read().splitlines()))
181 res.extend(once_filter(open(ent[1:]).read().splitlines()))
155 # exclusion
182 # exclusion
156 elif ent.startswith('!'):
183 elif ent.startswith('!'):
157 denied_set.add(ent[1:])
184 denied_set.add(ent[1:])
158 # glob only dirs
185 # glob only dirs
159 elif ent.lower().startswith('dir:'):
186 elif ent.lower().startswith('dir:'):
160 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
187 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
161
188 elif ent.lower().startswith('cont:'):
189 cont_set.add(ent[5:])
162 # get all files in the specified dir
190 # get all files in the specified dir
163 elif os.path.isdir(ent) and exp_dirs:
191 elif os.path.isdir(ent) and exp_dirs:
164 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
192 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
165
193
166 # glob only files
194 # glob only files
167
195
168 elif '*' in ent or '?' in ent:
196 elif '*' in ent or '?' in ent:
169 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
197 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
170
198
171 else:
199 else:
172 res.extend(once_filter([ent]))
200 res.extend(once_filter([ent]))
173 return res
201 return res
174
202
175
203
176 def test():
204 def test():
177 assert (
205 assert (
178 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
206 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
179 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
207 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
180 )
208 )
181
209
182 def main():
210 def main():
183 if len(sys.argv) < 2:
211 if len(sys.argv) < 2:
184 print globsyntax
212 print globsyntax
185 return
213 return
186
214
187 print "\n".join(expand(sys.argv[1:])),
215 print "\n".join(expand(sys.argv[1:])),
188
216
189 def mglob_f(self, arg):
217 def mglob_f(self, arg):
190 from IPython.genutils import SList
218 from IPython.genutils import SList
191 if arg.strip():
219 if arg.strip():
192 return SList(expand(arg))
220 return SList(expand(arg))
193 print "Please specify pattern!"
221 print "Please specify pattern!"
194 print globsyntax
222 print globsyntax
195
223
196 def init_ipython(ip):
224 def init_ipython(ip):
197 """ register %mglob for IPython """
225 """ register %mglob for IPython """
198 mglob_f.__doc__ = globsyntax
226 mglob_f.__doc__ = globsyntax
199 ip.expose_magic("mglob",mglob_f)
227 ip.expose_magic("mglob",mglob_f)
200
228
201 # test()
229 # test()
202 if __name__ == "__main__":
230 if __name__ == "__main__":
203 main()
231 main()
General Comments 0
You need to be logged in to leave comments. Login now