Show More
@@ -1,203 +1,231 | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 |
|
2 | |||
3 | r""" mglob - enhanced file list expansion module |
|
3 | r""" mglob - enhanced file list expansion module | |
4 |
|
4 | |||
5 | Use as stand-alone utility (for xargs, `backticks` etc.), |
|
5 | Use as stand-alone utility (for xargs, `backticks` etc.), | |
6 | or a globbing library for own python programs. Globbing the sys.argv is something |
|
6 | or a globbing library for own python programs. Globbing the sys.argv is something | |
7 | that almost every Windows script has to perform manually, and this module is here |
|
7 | that almost every Windows script has to perform manually, and this module is here | |
8 | to help with that task. Also Unix users will benefit from enhanced modes |
|
8 | to help with that task. Also Unix users will benefit from enhanced modes | |
9 | such as recursion, exclusion, directory omission... |
|
9 | such as recursion, exclusion, directory omission... | |
10 |
|
10 | |||
11 | Unlike glob.glob, directories are not included in the glob unless specified |
|
11 | Unlike glob.glob, directories are not included in the glob unless specified | |
12 | with 'dir:' |
|
12 | with 'dir:' | |
13 |
|
13 | |||
14 | 'expand' is the function to use in python programs. Typical use |
|
14 | 'expand' is the function to use in python programs. Typical use | |
15 | to expand argv (esp. in windows):: |
|
15 | to expand argv (esp. in windows):: | |
16 |
|
16 | |||
17 | try: |
|
17 | try: | |
18 | import mglob |
|
18 | import mglob | |
19 | files = mglob.expand(sys.argv[1:]) |
|
19 | files = mglob.expand(sys.argv[1:]) | |
20 | except ImportError: |
|
20 | except ImportError: | |
21 | print "mglob not found; try 'easy_install mglob' for extra features" |
|
21 | print "mglob not found; try 'easy_install mglob' for extra features" | |
22 | files = sys.argv[1:] |
|
22 | files = sys.argv[1:] | |
23 |
|
23 | |||
24 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. |
|
24 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. | |
25 | Therefore, you might want to use quotes with normal wildcards to prevent this |
|
25 | Therefore, you might want to use quotes with normal wildcards to prevent this | |
26 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. |
|
26 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. | |
27 | Not quoting the wildcards is harmless and typically has equivalent results, though. |
|
27 | Not quoting the wildcards is harmless and typically has equivalent results, though. | |
28 |
|
28 | |||
29 | Author: Ville Vainio <vivainio@gmail.com> |
|
29 | Author: Ville Vainio <vivainio@gmail.com> | |
30 | License: MIT Open Source license |
|
30 | License: MIT Open Source license | |
31 |
|
31 | |||
32 | """ |
|
32 | """ | |
33 |
|
33 | |||
34 | #Assigned in variable for "usage" printing convenience" |
|
34 | #Assigned in variable for "usage" printing convenience" | |
35 |
|
35 | |||
36 | globsyntax = """\ |
|
36 | globsyntax = """\ | |
37 | This program allows specifying filenames with "mglob" mechanism. |
|
37 | This program allows specifying filenames with "mglob" mechanism. | |
38 | Supported syntax in globs (wilcard matching patterns):: |
|
38 | Supported syntax in globs (wilcard matching patterns):: | |
39 |
|
39 | |||
40 | *.cpp ?ellowo* |
|
40 | *.cpp ?ellowo* | |
41 | - obvious. Differs from normal glob in that dirs are not included. |
|
41 | - obvious. Differs from normal glob in that dirs are not included. | |
42 | Unix users might want to write this as: "*.cpp" "?ellowo*" |
|
42 | Unix users might want to write this as: "*.cpp" "?ellowo*" | |
43 | rec:/usr/share=*.txt,*.doc |
|
43 | rec:/usr/share=*.txt,*.doc | |
44 | - get all *.txt and *.doc under /usr/share, |
|
44 | - get all *.txt and *.doc under /usr/share, | |
45 | recursively |
|
45 | recursively | |
46 | rec:/usr/share |
|
46 | rec:/usr/share | |
47 | - All files under /usr/share, recursively |
|
47 | - All files under /usr/share, recursively | |
48 | rec:*.py |
|
48 | rec:*.py | |
49 | - All .py files under current working dir, recursively |
|
49 | - All .py files under current working dir, recursively | |
50 | foo |
|
50 | foo | |
51 | - File or dir foo |
|
51 | - File or dir foo | |
52 | !*.bak readme* |
|
52 | !*.bak readme* | |
53 | - readme*, exclude files ending with .bak |
|
53 | - readme*, exclude files ending with .bak | |
54 | !.svn/ !.hg/ !*_Data/ rec:. |
|
54 | !.svn/ !.hg/ !*_Data/ rec:. | |
55 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. |
|
55 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. | |
56 | Trailing / is the key, \ does not work! |
|
56 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. | |
57 | dir:foo |
|
57 | dir:foo | |
58 | - the directory foo if it exists (not files in foo) |
|
58 | - the directory foo if it exists (not files in foo) | |
59 | dir:* |
|
59 | dir:* | |
60 | - all directories in current folder |
|
60 | - all directories in current folder | |
61 | foo.py bar.* !h* rec:*.py |
|
61 | foo.py bar.* !h* rec:*.py | |
62 | - Obvious. !h* exclusion only applies for rec:*.py. |
|
62 | - Obvious. !h* exclusion only applies for rec:*.py. | |
63 | foo.py is *not* included twice. |
|
63 | foo.py is *not* included twice. | |
64 | @filelist.txt |
|
64 | @filelist.txt | |
65 | - All files listed in 'filelist.txt' file, on separate lines. |
|
65 | - All files listed in 'filelist.txt' file, on separate lines. | |
|
66 | "cont:class \wak:" rec:*.py | |||
|
67 | - Match files containing regexp. Applies to subsequent files. | |||
|
68 | note quotes because of whitespace. | |||
66 | """ |
|
69 | """ | |
67 |
|
70 | |||
68 |
|
71 | |||
69 | __version__ = "0.2" |
|
72 | __version__ = "0.2" | |
70 |
|
73 | |||
71 |
|
74 | |||
72 | import os,glob,fnmatch,sys |
|
75 | import os,glob,fnmatch,sys,re | |
73 | from sets import Set as set |
|
76 | from sets import Set as set | |
74 |
|
77 | |||
75 |
|
78 | |||
76 | def expand(flist,exp_dirs = False): |
|
79 | def expand(flist,exp_dirs = False): | |
77 | """ Expand the glob(s) in flist. |
|
80 | """ Expand the glob(s) in flist. | |
78 |
|
81 | |||
79 | flist may be either a whitespace-separated list of globs/files |
|
82 | flist may be either a whitespace-separated list of globs/files | |
80 | or an array of globs/files. |
|
83 | or an array of globs/files. | |
81 |
|
84 | |||
82 | if exp_dirs is true, directory names in glob are expanded to the files |
|
85 | if exp_dirs is true, directory names in glob are expanded to the files | |
83 | contained in them - otherwise, directory names are returned as is. |
|
86 | contained in them - otherwise, directory names are returned as is. | |
84 |
|
87 | |||
85 | """ |
|
88 | """ | |
86 | if isinstance(flist, basestring): |
|
89 | if isinstance(flist, basestring): | |
87 | flist = flist.split() |
|
90 | import shlex | |
|
91 | flist = shlex.split(flist) | |||
88 | done_set = set() |
|
92 | done_set = set() | |
89 | denied_set = set() |
|
93 | denied_set = set() | |
|
94 | cont_set = set() | |||
|
95 | cur_rejected_dirs = set() | |||
90 |
|
96 | |||
91 | def recfind(p, pats = ["*"]): |
|
97 | def recfind(p, pats = ["*"]): | |
92 |
denied_dirs = [ |
|
98 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] | |
93 | #print "de", denied_dirs |
|
|||
94 | for (dp,dnames,fnames) in os.walk(p): |
|
99 | for (dp,dnames,fnames) in os.walk(p): | |
95 | # see if we should ignore the whole directory |
|
100 | # see if we should ignore the whole directory | |
96 | dp_norm = dp.replace("\\","/") + "/" |
|
101 | dp_norm = dp.replace("\\","/") + "/" | |
97 | deny = False |
|
102 | deny = False | |
|
103 | # do not traverse under already rejected dirs | |||
|
104 | for d in cur_rejected_dirs: | |||
|
105 | if dp.startswith(d): | |||
|
106 | deny = True | |||
|
107 | break | |||
|
108 | if deny: | |||
|
109 | continue | |||
|
110 | ||||
|
111 | ||||
98 | #print "dp",dp |
|
112 | #print "dp",dp | |
|
113 | bname = os.path.basename(dp) | |||
99 | for deny_pat in denied_dirs: |
|
114 | for deny_pat in denied_dirs: | |
100 |
if fnmatch.fnmatch( |
|
115 | if fnmatch.fnmatch( bname, deny_pat): | |
101 | deny = True |
|
116 | deny = True | |
|
117 | cur_rejected_dirs.add(dp) | |||
102 | break |
|
118 | break | |
103 | if deny: |
|
119 | if deny: | |
104 | continue |
|
120 | continue | |
105 |
|
121 | |||
106 |
|
122 | |||
107 | for f in fnames: |
|
123 | for f in fnames: | |
108 | matched = False |
|
124 | matched = False | |
109 | for p in pats: |
|
125 | for p in pats: | |
110 | if fnmatch.fnmatch(f,p): |
|
126 | if fnmatch.fnmatch(f,p): | |
111 | matched = True |
|
127 | matched = True | |
112 | break |
|
128 | break | |
113 | if matched: |
|
129 | if matched: | |
114 | yield os.path.join(dp,f) |
|
130 | yield os.path.join(dp,f) | |
115 |
|
131 | |||
116 | def once_filter(seq): |
|
132 | def once_filter(seq): | |
117 | for it in seq: |
|
133 | for it in seq: | |
118 | p = os.path.abspath(it) |
|
134 | p = os.path.abspath(it) | |
119 | if p in done_set: |
|
135 | if p in done_set: | |
120 | continue |
|
136 | continue | |
121 | done_set.add(p) |
|
137 | done_set.add(p) | |
122 | deny = False |
|
138 | deny = False | |
123 | for deny_pat in denied_set: |
|
139 | for deny_pat in denied_set: | |
124 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): |
|
140 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): | |
125 | deny = True |
|
141 | deny = True | |
126 | break |
|
142 | break | |
|
143 | if cont_set: | |||
|
144 | try: | |||
|
145 | cont = open(p).read() | |||
|
146 | except IOError: | |||
|
147 | # deny | |||
|
148 | continue | |||
|
149 | for pat in cont_set: | |||
|
150 | if not re.search(pat,cont, re.IGNORECASE): | |||
|
151 | deny = True | |||
|
152 | break | |||
|
153 | ||||
127 | if not deny: |
|
154 | if not deny: | |
128 | yield it |
|
155 | yield it | |
129 | return |
|
156 | return | |
130 |
|
157 | |||
131 | res = [] |
|
158 | res = [] | |
132 |
|
159 | |||
133 | for ent in flist: |
|
160 | for ent in flist: | |
134 | ent = os.path.expanduser(os.path.expandvars(ent)) |
|
161 | ent = os.path.expanduser(os.path.expandvars(ent)) | |
135 | if ent.lower().startswith('rec:'): |
|
162 | if ent.lower().startswith('rec:'): | |
136 | fields = ent[4:].split('=') |
|
163 | fields = ent[4:].split('=') | |
137 | if len(fields) == 2: |
|
164 | if len(fields) == 2: | |
138 | pth, patlist = fields |
|
165 | pth, patlist = fields | |
139 | elif len(fields) == 1: |
|
166 | elif len(fields) == 1: | |
140 | if os.path.isdir(fields[0]): |
|
167 | if os.path.isdir(fields[0]): | |
141 | # single arg is dir |
|
168 | # single arg is dir | |
142 | pth, patlist = fields[0], '*' |
|
169 | pth, patlist = fields[0], '*' | |
143 | else: |
|
170 | else: | |
144 | # single arg is pattern |
|
171 | # single arg is pattern | |
145 | pth, patlist = '.', fields[0] |
|
172 | pth, patlist = '.', fields[0] | |
146 |
|
173 | |||
147 | elif len(fields) == 0: |
|
174 | elif len(fields) == 0: | |
148 | pth, pathlist = '.','*' |
|
175 | pth, pathlist = '.','*' | |
149 |
|
176 | |||
150 | pats = patlist.split(',') |
|
177 | pats = patlist.split(',') | |
151 | res.extend(once_filter(recfind(pth, pats))) |
|
178 | res.extend(once_filter(recfind(pth, pats))) | |
152 | # filelist |
|
179 | # filelist | |
153 | elif ent.startswith('@') and os.path.isfile(ent[1:]): |
|
180 | elif ent.startswith('@') and os.path.isfile(ent[1:]): | |
154 | res.extend(once_filter(open(ent[1:]).read().splitlines())) |
|
181 | res.extend(once_filter(open(ent[1:]).read().splitlines())) | |
155 | # exclusion |
|
182 | # exclusion | |
156 | elif ent.startswith('!'): |
|
183 | elif ent.startswith('!'): | |
157 | denied_set.add(ent[1:]) |
|
184 | denied_set.add(ent[1:]) | |
158 | # glob only dirs |
|
185 | # glob only dirs | |
159 | elif ent.lower().startswith('dir:'): |
|
186 | elif ent.lower().startswith('dir:'): | |
160 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) |
|
187 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) | |
161 |
|
188 | elif ent.lower().startswith('cont:'): | ||
|
189 | cont_set.add(ent[5:]) | |||
162 | # get all files in the specified dir |
|
190 | # get all files in the specified dir | |
163 | elif os.path.isdir(ent) and exp_dirs: |
|
191 | elif os.path.isdir(ent) and exp_dirs: | |
164 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) |
|
192 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) | |
165 |
|
193 | |||
166 | # glob only files |
|
194 | # glob only files | |
167 |
|
195 | |||
168 | elif '*' in ent or '?' in ent: |
|
196 | elif '*' in ent or '?' in ent: | |
169 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) |
|
197 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) | |
170 |
|
198 | |||
171 | else: |
|
199 | else: | |
172 | res.extend(once_filter([ent])) |
|
200 | res.extend(once_filter([ent])) | |
173 | return res |
|
201 | return res | |
174 |
|
202 | |||
175 |
|
203 | |||
176 | def test(): |
|
204 | def test(): | |
177 | assert ( |
|
205 | assert ( | |
178 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == |
|
206 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == | |
179 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) |
|
207 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) | |
180 | ) |
|
208 | ) | |
181 |
|
209 | |||
182 | def main(): |
|
210 | def main(): | |
183 | if len(sys.argv) < 2: |
|
211 | if len(sys.argv) < 2: | |
184 | print globsyntax |
|
212 | print globsyntax | |
185 | return |
|
213 | return | |
186 |
|
214 | |||
187 | print "\n".join(expand(sys.argv[1:])), |
|
215 | print "\n".join(expand(sys.argv[1:])), | |
188 |
|
216 | |||
189 | def mglob_f(self, arg): |
|
217 | def mglob_f(self, arg): | |
190 | from IPython.genutils import SList |
|
218 | from IPython.genutils import SList | |
191 | if arg.strip(): |
|
219 | if arg.strip(): | |
192 | return SList(expand(arg)) |
|
220 | return SList(expand(arg)) | |
193 | print "Please specify pattern!" |
|
221 | print "Please specify pattern!" | |
194 | print globsyntax |
|
222 | print globsyntax | |
195 |
|
223 | |||
196 | def init_ipython(ip): |
|
224 | def init_ipython(ip): | |
197 | """ register %mglob for IPython """ |
|
225 | """ register %mglob for IPython """ | |
198 | mglob_f.__doc__ = globsyntax |
|
226 | mglob_f.__doc__ = globsyntax | |
199 | ip.expose_magic("mglob",mglob_f) |
|
227 | ip.expose_magic("mglob",mglob_f) | |
200 |
|
228 | |||
201 | # test() |
|
229 | # test() | |
202 | if __name__ == "__main__": |
|
230 | if __name__ == "__main__": | |
203 | main() |
|
231 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now