##// END OF EJS Templates
mglob: add cont:foo for searching content (a'la grep), sanitize !mydir*/ behaviour
Ville M. Vainio -
Show More
@@ -1,203 +1,231
1 1 #!/usr/bin/env python
2 2
3 3 r""" mglob - enhanced file list expansion module
4 4
5 5 Use as stand-alone utility (for xargs, `backticks` etc.),
6 6 or a globbing library for own python programs. Globbing the sys.argv is something
7 7 that almost every Windows script has to perform manually, and this module is here
8 8 to help with that task. Also Unix users will benefit from enhanced modes
9 9 such as recursion, exclusion, directory omission...
10 10
11 11 Unlike glob.glob, directories are not included in the glob unless specified
12 12 with 'dir:'
13 13
14 14 'expand' is the function to use in python programs. Typical use
15 15 to expand argv (esp. in windows)::
16 16
17 17 try:
18 18 import mglob
19 19 files = mglob.expand(sys.argv[1:])
20 20 except ImportError:
21 21 print "mglob not found; try 'easy_install mglob' for extra features"
22 22 files = sys.argv[1:]
23 23
24 24 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
25 25 Therefore, you might want to use quotes with normal wildcards to prevent this
26 26 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
27 27 Not quoting the wildcards is harmless and typically has equivalent results, though.
28 28
29 29 Author: Ville Vainio <vivainio@gmail.com>
30 30 License: MIT Open Source license
31 31
32 32 """
33 33
34 34 #Assigned in variable for "usage" printing convenience"
35 35
36 36 globsyntax = """\
37 37 This program allows specifying filenames with "mglob" mechanism.
38 38 Supported syntax in globs (wilcard matching patterns)::
39 39
40 40 *.cpp ?ellowo*
41 41 - obvious. Differs from normal glob in that dirs are not included.
42 42 Unix users might want to write this as: "*.cpp" "?ellowo*"
43 43 rec:/usr/share=*.txt,*.doc
44 44 - get all *.txt and *.doc under /usr/share,
45 45 recursively
46 46 rec:/usr/share
47 47 - All files under /usr/share, recursively
48 48 rec:*.py
49 49 - All .py files under current working dir, recursively
50 50 foo
51 51 - File or dir foo
52 52 !*.bak readme*
53 53 - readme*, exclude files ending with .bak
54 54 !.svn/ !.hg/ !*_Data/ rec:.
55 55 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
56 Trailing / is the key, \ does not work!
56 Trailing / is the key, \ does not work! Use !.*/ for all hidden.
57 57 dir:foo
58 58 - the directory foo if it exists (not files in foo)
59 59 dir:*
60 60 - all directories in current folder
61 61 foo.py bar.* !h* rec:*.py
62 62 - Obvious. !h* exclusion only applies for rec:*.py.
63 63 foo.py is *not* included twice.
64 64 @filelist.txt
65 65 - All files listed in 'filelist.txt' file, on separate lines.
66 "cont:class \wak:" rec:*.py
67 - Match files containing regexp. Applies to subsequent files.
68 note quotes because of whitespace.
66 69 """
67 70
68 71
69 72 __version__ = "0.2"
70 73
71 74
72 import os,glob,fnmatch,sys
75 import os,glob,fnmatch,sys,re
73 76 from sets import Set as set
74 77
75 78
76 79 def expand(flist,exp_dirs = False):
77 80 """ Expand the glob(s) in flist.
78 81
79 82 flist may be either a whitespace-separated list of globs/files
80 83 or an array of globs/files.
81 84
82 85 if exp_dirs is true, directory names in glob are expanded to the files
83 86 contained in them - otherwise, directory names are returned as is.
84 87
85 88 """
86 89 if isinstance(flist, basestring):
87 flist = flist.split()
90 import shlex
91 flist = shlex.split(flist)
88 92 done_set = set()
89 93 denied_set = set()
94 cont_set = set()
95 cur_rejected_dirs = set()
90 96
91 97 def recfind(p, pats = ["*"]):
92 denied_dirs = ["*" + d+"*" for d in denied_set if d.endswith("/")]
93 #print "de", denied_dirs
98 denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")]
94 99 for (dp,dnames,fnames) in os.walk(p):
95 100 # see if we should ignore the whole directory
96 101 dp_norm = dp.replace("\\","/") + "/"
97 102 deny = False
103 # do not traverse under already rejected dirs
104 for d in cur_rejected_dirs:
105 if dp.startswith(d):
106 deny = True
107 break
108 if deny:
109 continue
110
111
98 112 #print "dp",dp
113 bname = os.path.basename(dp)
99 114 for deny_pat in denied_dirs:
100 if fnmatch.fnmatch( dp_norm, deny_pat):
115 if fnmatch.fnmatch( bname, deny_pat):
101 116 deny = True
117 cur_rejected_dirs.add(dp)
102 118 break
103 119 if deny:
104 120 continue
105 121
106 122
107 123 for f in fnames:
108 124 matched = False
109 125 for p in pats:
110 126 if fnmatch.fnmatch(f,p):
111 127 matched = True
112 128 break
113 129 if matched:
114 130 yield os.path.join(dp,f)
115 131
116 132 def once_filter(seq):
117 133 for it in seq:
118 134 p = os.path.abspath(it)
119 135 if p in done_set:
120 136 continue
121 137 done_set.add(p)
122 138 deny = False
123 139 for deny_pat in denied_set:
124 140 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
125 141 deny = True
126 142 break
143 if cont_set:
144 try:
145 cont = open(p).read()
146 except IOError:
147 # deny
148 continue
149 for pat in cont_set:
150 if not re.search(pat,cont, re.IGNORECASE):
151 deny = True
152 break
153
127 154 if not deny:
128 155 yield it
129 156 return
130 157
131 158 res = []
132 159
133 160 for ent in flist:
134 161 ent = os.path.expanduser(os.path.expandvars(ent))
135 162 if ent.lower().startswith('rec:'):
136 163 fields = ent[4:].split('=')
137 164 if len(fields) == 2:
138 165 pth, patlist = fields
139 166 elif len(fields) == 1:
140 167 if os.path.isdir(fields[0]):
141 168 # single arg is dir
142 169 pth, patlist = fields[0], '*'
143 170 else:
144 171 # single arg is pattern
145 172 pth, patlist = '.', fields[0]
146 173
147 174 elif len(fields) == 0:
148 175 pth, pathlist = '.','*'
149 176
150 177 pats = patlist.split(',')
151 178 res.extend(once_filter(recfind(pth, pats)))
152 179 # filelist
153 180 elif ent.startswith('@') and os.path.isfile(ent[1:]):
154 181 res.extend(once_filter(open(ent[1:]).read().splitlines()))
155 182 # exclusion
156 183 elif ent.startswith('!'):
157 184 denied_set.add(ent[1:])
158 185 # glob only dirs
159 186 elif ent.lower().startswith('dir:'):
160 187 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
161
188 elif ent.lower().startswith('cont:'):
189 cont_set.add(ent[5:])
162 190 # get all files in the specified dir
163 191 elif os.path.isdir(ent) and exp_dirs:
164 192 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
165 193
166 194 # glob only files
167 195
168 196 elif '*' in ent or '?' in ent:
169 197 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
170 198
171 199 else:
172 200 res.extend(once_filter([ent]))
173 201 return res
174 202
175 203
176 204 def test():
177 205 assert (
178 206 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
179 207 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
180 208 )
181 209
182 210 def main():
183 211 if len(sys.argv) < 2:
184 212 print globsyntax
185 213 return
186 214
187 215 print "\n".join(expand(sys.argv[1:])),
188 216
189 217 def mglob_f(self, arg):
190 218 from IPython.genutils import SList
191 219 if arg.strip():
192 220 return SList(expand(arg))
193 221 print "Please specify pattern!"
194 222 print globsyntax
195 223
196 224 def init_ipython(ip):
197 225 """ register %mglob for IPython """
198 226 mglob_f.__doc__ = globsyntax
199 227 ip.expose_magic("mglob",mglob_f)
200 228
201 229 # test()
202 230 if __name__ == "__main__":
203 231 main()
General Comments 0
You need to be logged in to leave comments. Login now