Show More
@@ -1,227 +1,233 b'' | |||||
1 | r""" mglob - enhanced file list expansion module |
|
1 | r""" mglob - enhanced file list expansion module | |
2 |
|
2 | |||
3 | Use as stand-alone utility (for xargs, `backticks` etc.), |
|
3 | Use as stand-alone utility (for xargs, `backticks` etc.), | |
4 | or a globbing library for own python programs. Globbing the sys.argv is something |
|
4 | or a globbing library for own python programs. Globbing the sys.argv is something | |
5 | that almost every Windows script has to perform manually, and this module is here |
|
5 | that almost every Windows script has to perform manually, and this module is here | |
6 | to help with that task. Also Unix users will benefit from enhanced modes |
|
6 | to help with that task. Also Unix users will benefit from enhanced modes | |
7 | such as recursion, exclusion, directory omission... |
|
7 | such as recursion, exclusion, directory omission... | |
8 |
|
8 | |||
9 | Unlike glob.glob, directories are not included in the glob unless specified |
|
9 | Unlike glob.glob, directories are not included in the glob unless specified | |
10 | with 'dir:' |
|
10 | with 'dir:' | |
11 |
|
11 | |||
12 | 'expand' is the function to use in python programs. Typical use |
|
12 | 'expand' is the function to use in python programs. Typical use | |
13 | to expand argv (esp. in windows):: |
|
13 | to expand argv (esp. in windows):: | |
14 |
|
14 | |||
15 | try: |
|
15 | try: | |
16 | import mglob |
|
16 | import mglob | |
17 | files = mglob.expand(sys.argv[1:]) |
|
17 | files = mglob.expand(sys.argv[1:]) | |
18 | except ImportError: |
|
18 | except ImportError: | |
19 | print "mglob not found; try 'easy_install mglob' for extra features" |
|
19 | print "mglob not found; try 'easy_install mglob' for extra features" | |
20 | files = sys.argv[1:] |
|
20 | files = sys.argv[1:] | |
21 |
|
21 | |||
22 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. |
|
22 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. | |
23 | Therefore, you might want to use quotes with normal wildcards to prevent this |
|
23 | Therefore, you might want to use quotes with normal wildcards to prevent this | |
24 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. |
|
24 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. | |
25 | Not quoting the wildcards is harmless and typically has equivalent results, though. |
|
25 | Not quoting the wildcards is harmless and typically has equivalent results, though. | |
26 |
|
26 | |||
27 | Author: Ville Vainio <vivainio@gmail.com> |
|
27 | Author: Ville Vainio <vivainio@gmail.com> | |
28 | License: MIT Open Source license |
|
28 | License: MIT Open Source license | |
29 |
|
29 | |||
30 | """ |
|
30 | """ | |
31 |
|
31 | |||
32 | #Assigned in variable for "usage" printing convenience" |
|
32 | #Assigned in variable for "usage" printing convenience" | |
33 |
|
33 | |||
34 | globsyntax = """\ |
|
34 | globsyntax = """\ | |
35 | This program allows specifying filenames with "mglob" mechanism. |
|
35 | This program allows specifying filenames with "mglob" mechanism. | |
36 | Supported syntax in globs (wilcard matching patterns):: |
|
36 | Supported syntax in globs (wilcard matching patterns):: | |
37 |
|
37 | |||
38 | *.cpp ?ellowo* |
|
38 | *.cpp ?ellowo* | |
39 | - obvious. Differs from normal glob in that dirs are not included. |
|
39 | - obvious. Differs from normal glob in that dirs are not included. | |
40 | Unix users might want to write this as: "*.cpp" "?ellowo*" |
|
40 | Unix users might want to write this as: "*.cpp" "?ellowo*" | |
41 | rec:/usr/share=*.txt,*.doc |
|
41 | rec:/usr/share=*.txt,*.doc | |
42 | - get all *.txt and *.doc under /usr/share, |
|
42 | - get all *.txt and *.doc under /usr/share, | |
43 | recursively |
|
43 | recursively | |
44 | rec:/usr/share |
|
44 | rec:/usr/share | |
45 | - All files under /usr/share, recursively |
|
45 | - All files under /usr/share, recursively | |
46 | rec:*.py |
|
46 | rec:*.py | |
47 | - All .py files under current working dir, recursively |
|
47 | - All .py files under current working dir, recursively | |
48 | foo |
|
48 | foo | |
49 | - File or dir foo |
|
49 | - File or dir foo | |
50 | !*.bak readme* |
|
50 | !*.bak readme* | |
51 | - readme*, exclude files ending with .bak |
|
51 | - readme*, exclude files ending with .bak | |
52 | !.svn/ !.hg/ !*_Data/ rec:. |
|
52 | !.svn/ !.hg/ !*_Data/ rec:. | |
53 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. |
|
53 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. | |
54 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. |
|
54 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. | |
55 | dir:foo |
|
55 | dir:foo | |
56 | - the directory foo if it exists (not files in foo) |
|
56 | - the directory foo if it exists (not files in foo) | |
57 | dir:* |
|
57 | dir:* | |
58 | - all directories in current folder |
|
58 | - all directories in current folder | |
59 | foo.py bar.* !h* rec:*.py |
|
59 | foo.py bar.* !h* rec:*.py | |
60 | - Obvious. !h* exclusion only applies for rec:*.py. |
|
60 | - Obvious. !h* exclusion only applies for rec:*.py. | |
61 | foo.py is *not* included twice. |
|
61 | foo.py is *not* included twice. | |
62 | @filelist.txt |
|
62 | @filelist.txt | |
63 | - All files listed in 'filelist.txt' file, on separate lines. |
|
63 | - All files listed in 'filelist.txt' file, on separate lines. | |
64 | "cont:class \wak:" rec:*.py |
|
64 | "cont:class \wak:" rec:*.py | |
65 | - Match files containing regexp. Applies to subsequent files. |
|
65 | - Match files containing regexp. Applies to subsequent files. | |
66 | note quotes because of whitespace. |
|
66 | note quotes because of whitespace. | |
67 | """ |
|
67 | """ | |
68 |
|
68 | |||
69 |
|
69 | |||
70 | __version__ = "0.2" |
|
70 | __version__ = "0.2" | |
71 |
|
71 | |||
72 |
|
72 | |||
73 | import os,glob,fnmatch,sys,re |
|
73 | import os,glob,fnmatch,sys,re | |
74 |
|
74 | |||
75 | def expand(flist,exp_dirs = False): |
|
75 | def expand(flist,exp_dirs = False): | |
76 | """ Expand the glob(s) in flist. |
|
76 | """ Expand the glob(s) in flist. | |
77 |
|
77 | |||
78 | flist may be either a whitespace-separated list of globs/files |
|
78 | flist may be either a whitespace-separated list of globs/files | |
79 | or an array of globs/files. |
|
79 | or an array of globs/files. | |
80 |
|
80 | |||
81 | if exp_dirs is true, directory names in glob are expanded to the files |
|
81 | if exp_dirs is true, directory names in glob are expanded to the files | |
82 | contained in them - otherwise, directory names are returned as is. |
|
82 | contained in them - otherwise, directory names are returned as is. | |
83 |
|
83 | |||
84 | """ |
|
84 | """ | |
85 | if isinstance(flist, basestring): |
|
85 | if isinstance(flist, basestring): | |
86 | import shlex |
|
86 | import shlex | |
87 | flist = shlex.split(flist) |
|
87 | flist = shlex.split(flist) | |
88 | done_set = set() |
|
88 | done_set = set() | |
89 | denied_set = set() |
|
89 | denied_set = set() | |
90 | cont_set = set() |
|
90 | cont_set = set() | |
91 | cur_rejected_dirs = set() |
|
91 | cur_rejected_dirs = set() | |
92 |
|
92 | |||
93 | def recfind(p, pats = ["*"]): |
|
93 | def recfind(p, pats = ["*"]): | |
94 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] |
|
94 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] | |
95 | for (dp,dnames,fnames) in os.walk(p): |
|
95 | for (dp,dnames,fnames) in os.walk(p): | |
96 | # see if we should ignore the whole directory |
|
96 | # see if we should ignore the whole directory | |
97 | dp_norm = dp.replace("\\","/") + "/" |
|
97 | dp_norm = dp.replace("\\","/") + "/" | |
98 | deny = False |
|
98 | deny = False | |
99 | # do not traverse under already rejected dirs |
|
99 | # do not traverse under already rejected dirs | |
100 | for d in cur_rejected_dirs: |
|
100 | for d in cur_rejected_dirs: | |
101 | if dp.startswith(d): |
|
101 | if dp.startswith(d): | |
102 | deny = True |
|
102 | deny = True | |
103 | break |
|
103 | break | |
104 | if deny: |
|
104 | if deny: | |
105 | continue |
|
105 | continue | |
106 |
|
106 | |||
107 |
|
107 | |||
108 | #print "dp",dp |
|
108 | #print "dp",dp | |
109 | bname = os.path.basename(dp) |
|
109 | bname = os.path.basename(dp) | |
110 | for deny_pat in denied_dirs: |
|
110 | for deny_pat in denied_dirs: | |
111 | if fnmatch.fnmatch( bname, deny_pat): |
|
111 | if fnmatch.fnmatch( bname, deny_pat): | |
112 | deny = True |
|
112 | deny = True | |
113 | cur_rejected_dirs.add(dp) |
|
113 | cur_rejected_dirs.add(dp) | |
114 | break |
|
114 | break | |
115 | if deny: |
|
115 | if deny: | |
116 | continue |
|
116 | continue | |
117 |
|
117 | |||
118 |
|
118 | |||
119 | for f in fnames: |
|
119 | for f in fnames: | |
120 | matched = False |
|
120 | matched = False | |
121 | for p in pats: |
|
121 | for p in pats: | |
122 | if fnmatch.fnmatch(f,p): |
|
122 | if fnmatch.fnmatch(f,p): | |
123 | matched = True |
|
123 | matched = True | |
124 | break |
|
124 | break | |
125 | if matched: |
|
125 | if matched: | |
126 | yield os.path.join(dp,f) |
|
126 | yield os.path.join(dp,f) | |
127 |
|
127 | |||
128 | def once_filter(seq): |
|
128 | def once_filter(seq): | |
129 | for it in seq: |
|
129 | for it in seq: | |
130 | p = os.path.abspath(it) |
|
130 | p = os.path.abspath(it) | |
131 | if p in done_set: |
|
131 | if p in done_set: | |
132 | continue |
|
132 | continue | |
133 | done_set.add(p) |
|
133 | done_set.add(p) | |
134 | deny = False |
|
134 | deny = False | |
135 | for deny_pat in denied_set: |
|
135 | for deny_pat in denied_set: | |
136 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): |
|
136 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): | |
137 | deny = True |
|
137 | deny = True | |
138 | break |
|
138 | break | |
139 | if cont_set: |
|
139 | if cont_set: | |
140 | try: |
|
140 | try: | |
141 | cont = open(p).read() |
|
141 | cont = open(p).read() | |
142 | except IOError: |
|
142 | except IOError: | |
143 | # deny |
|
143 | # deny | |
144 | continue |
|
144 | continue | |
145 | for pat in cont_set: |
|
145 | for pat in cont_set: | |
146 | if not re.search(pat,cont, re.IGNORECASE): |
|
146 | if not re.search(pat,cont, re.IGNORECASE): | |
147 | deny = True |
|
147 | deny = True | |
148 | break |
|
148 | break | |
149 |
|
149 | |||
150 | if not deny: |
|
150 | if not deny: | |
151 | yield it |
|
151 | yield it | |
152 | return |
|
152 | return | |
153 |
|
153 | |||
154 | res = [] |
|
154 | res = [] | |
155 |
|
155 | |||
156 | for ent in flist: |
|
156 | for ent in flist: | |
157 | ent = os.path.expanduser(os.path.expandvars(ent)) |
|
157 | ent = os.path.expanduser(os.path.expandvars(ent)) | |
158 | if ent.lower().startswith('rec:'): |
|
158 | if ent.lower().startswith('rec:'): | |
159 | fields = ent[4:].split('=') |
|
159 | fields = ent[4:].split('=') | |
160 | if len(fields) == 2: |
|
160 | if len(fields) == 2: | |
161 | pth, patlist = fields |
|
161 | pth, patlist = fields | |
162 | elif len(fields) == 1: |
|
162 | elif len(fields) == 1: | |
163 | if os.path.isdir(fields[0]): |
|
163 | if os.path.isdir(fields[0]): | |
164 | # single arg is dir |
|
164 | # single arg is dir | |
165 | pth, patlist = fields[0], '*' |
|
165 | pth, patlist = fields[0], '*' | |
166 | else: |
|
166 | else: | |
167 | # single arg is pattern |
|
167 | # single arg is pattern | |
168 | pth, patlist = '.', fields[0] |
|
168 | pth, patlist = '.', fields[0] | |
169 |
|
169 | |||
170 | elif len(fields) == 0: |
|
170 | elif len(fields) == 0: | |
171 | pth, pathlist = '.','*' |
|
171 | pth, pathlist = '.','*' | |
172 |
|
172 | |||
173 | pats = patlist.split(',') |
|
173 | pats = patlist.split(',') | |
174 | res.extend(once_filter(recfind(pth, pats))) |
|
174 | res.extend(once_filter(recfind(pth, pats))) | |
175 | # filelist |
|
175 | # filelist | |
176 | elif ent.startswith('@') and os.path.isfile(ent[1:]): |
|
176 | elif ent.startswith('@') and os.path.isfile(ent[1:]): | |
177 | res.extend(once_filter(open(ent[1:]).read().splitlines())) |
|
177 | res.extend(once_filter(open(ent[1:]).read().splitlines())) | |
178 | # exclusion |
|
178 | # exclusion | |
179 | elif ent.startswith('!'): |
|
179 | elif ent.startswith('!'): | |
180 | denied_set.add(ent[1:]) |
|
180 | denied_set.add(ent[1:]) | |
181 | # glob only dirs |
|
181 | # glob only dirs | |
182 | elif ent.lower().startswith('dir:'): |
|
182 | elif ent.lower().startswith('dir:'): | |
183 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) |
|
183 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) | |
184 | elif ent.lower().startswith('cont:'): |
|
184 | elif ent.lower().startswith('cont:'): | |
185 | cont_set.add(ent[5:]) |
|
185 | cont_set.add(ent[5:]) | |
186 | # get all files in the specified dir |
|
186 | # get all files in the specified dir | |
187 | elif os.path.isdir(ent) and exp_dirs: |
|
187 | elif os.path.isdir(ent) and exp_dirs: | |
188 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) |
|
188 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) | |
189 |
|
189 | |||
190 | # glob only files |
|
190 | # glob only files | |
191 |
|
191 | |||
192 | elif '*' in ent or '?' in ent: |
|
192 | elif '*' in ent or '?' in ent: | |
193 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) |
|
193 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) | |
194 |
|
194 | |||
195 | else: |
|
195 | else: | |
196 | res.extend(once_filter([ent])) |
|
196 | res.extend(once_filter([ent])) | |
197 | return res |
|
197 | return res | |
198 |
|
198 | |||
199 |
|
199 | |||
200 | def test(): |
|
200 | def test(): | |
201 | assert ( |
|
201 | assert ( | |
202 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == |
|
202 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == | |
203 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) |
|
203 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) | |
204 | ) |
|
204 | ) | |
205 |
|
205 | |||
206 | def main(): |
|
206 | def main(): | |
207 | if len(sys.argv) < 2: |
|
207 | if len(sys.argv) < 2: | |
208 | print globsyntax |
|
208 | print globsyntax | |
209 | return |
|
209 | return | |
210 |
|
210 | |||
211 | print "\n".join(expand(sys.argv[1:])), |
|
211 | print "\n".join(expand(sys.argv[1:])), | |
212 |
|
212 | |||
213 | def mglob_f(self, arg): |
|
213 | ||
|
214 | def mglob(self, arg): | |||
214 | from IPython.utils.text import SList |
|
215 | from IPython.utils.text import SList | |
215 | if arg.strip(): |
|
216 | if arg.strip(): | |
216 | return SList(expand(arg)) |
|
217 | return SList(expand(arg)) | |
217 | print "Please specify pattern!" |
|
218 | print "Please specify pattern!" | |
218 | print globsyntax |
|
219 | print globsyntax | |
219 |
|
220 | |||
|
221 | ||||
|
222 | mglob.__doc__ = globsyntax | |||
|
223 | ||||
|
224 | ||||
220 | def init_ipython(ip): |
|
225 | def init_ipython(ip): | |
221 | """ register %mglob for IPython """ |
|
226 | """ register %mglob for IPython """ | |
222 | mglob_f.__doc__ = globsyntax |
|
227 | ||
223 |
ip. |
|
228 | ip.function_as_magic(mglob) | |
|
229 | ||||
224 |
|
230 | |||
225 | # test() |
|
231 | # test() | |
226 | if __name__ == "__main__": |
|
232 | if __name__ == "__main__": | |
227 | main() |
|
233 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now