Show More
@@ -1,182 +1,204 b'' | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 |
|
2 | |||
3 | """ mglob - enhanced file list expansion module |
|
3 | r""" mglob - enhanced file list expansion module | |
4 |
|
4 | |||
5 | Use as stand-alone utility (for xargs, `backticks` etc.), |
|
5 | Use as stand-alone utility (for xargs, `backticks` etc.), | |
6 | or a globbing library for own python programs. Globbing the sys.argv is something |
|
6 | or a globbing library for own python programs. Globbing the sys.argv is something | |
7 | that almost every Windows script has to perform manually, and this module is here |
|
7 | that almost every Windows script has to perform manually, and this module is here | |
8 | to help with that task. Also Unix users will benefit from enhanced modes |
|
8 | to help with that task. Also Unix users will benefit from enhanced modes | |
9 | such as recursion, exclusion, directory omission... |
|
9 | such as recursion, exclusion, directory omission... | |
10 |
|
10 | |||
11 | Unlike glob.glob, directories are not included in the glob unless specified |
|
11 | Unlike glob.glob, directories are not included in the glob unless specified | |
12 | with 'dir:' |
|
12 | with 'dir:' | |
13 |
|
13 | |||
14 | 'expand' is the function to use in python programs. Typical use |
|
14 | 'expand' is the function to use in python programs. Typical use | |
15 | to expand argv (esp. in windows):: |
|
15 | to expand argv (esp. in windows):: | |
16 |
|
16 | |||
17 | try: |
|
17 | try: | |
18 | import mglob |
|
18 | import mglob | |
19 | files = mglob.expand(sys.argv[1:]) |
|
19 | files = mglob.expand(sys.argv[1:]) | |
20 | except ImportError: |
|
20 | except ImportError: | |
21 | print "mglob not found; try 'easy_install mglob' for extra features" |
|
21 | print "mglob not found; try 'easy_install mglob' for extra features" | |
22 | files = sys.argv[1:] |
|
22 | files = sys.argv[1:] | |
23 |
|
23 | |||
24 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. |
|
24 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. | |
25 | Therefore, you might want to use quotes with normal wildcards to prevent this |
|
25 | Therefore, you might want to use quotes with normal wildcards to prevent this | |
26 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. |
|
26 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. | |
27 | Not quoting the wildcards is harmless and typically has equivalent results, though. |
|
27 | Not quoting the wildcards is harmless and typically has equivalent results, though. | |
28 |
|
28 | |||
29 | Author: Ville Vainio <vivainio@gmail.com> |
|
29 | Author: Ville Vainio <vivainio@gmail.com> | |
30 | License: MIT Open Source license |
|
30 | License: MIT Open Source license | |
31 |
|
31 | |||
32 | """ |
|
32 | """ | |
33 |
|
33 | |||
34 | #Assigned in variable for "usage" printing convenience" |
|
34 | #Assigned in variable for "usage" printing convenience" | |
35 |
|
35 | |||
36 | globsyntax = """\ |
|
36 | globsyntax = """\ | |
37 | This program allows specifying filenames with "mglob" mechanism. |
|
37 | This program allows specifying filenames with "mglob" mechanism. | |
38 | Supported syntax in globs (wilcard matching patterns):: |
|
38 | Supported syntax in globs (wilcard matching patterns):: | |
39 |
|
39 | |||
40 | *.cpp ?ellowo* |
|
40 | *.cpp ?ellowo* | |
41 | - obvious. Differs from normal glob in that dirs are not included. |
|
41 | - obvious. Differs from normal glob in that dirs are not included. | |
42 | Unix users might want to write this as: "*.cpp" "?ellowo*" |
|
42 | Unix users might want to write this as: "*.cpp" "?ellowo*" | |
43 | rec:/usr/share=*.txt,*.doc |
|
43 | rec:/usr/share=*.txt,*.doc | |
44 | - get all *.txt and *.doc under /usr/share, |
|
44 | - get all *.txt and *.doc under /usr/share, | |
45 | recursively |
|
45 | recursively | |
46 | rec:/usr/share |
|
46 | rec:/usr/share | |
47 | - All files under /usr/share, recursively |
|
47 | - All files under /usr/share, recursively | |
48 | rec:*.py |
|
48 | rec:*.py | |
49 | - All .py files under current working dir, recursively |
|
49 | - All .py files under current working dir, recursively | |
50 | foo |
|
50 | foo | |
51 | - File foo, or all files in dir foo |
|
51 | - File foo, or all files in dir foo | |
52 | !*.bak readme* |
|
52 | !*.bak readme* | |
53 | - readme*, exclude files ending with .bak |
|
53 | - readme*, exclude files ending with .bak | |
|
54 | !.svn/ !.hg/ !*_Data/ rec:. | |||
|
55 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. | |||
|
56 | Trailing / is the key, \ does not work! | |||
54 | dir:foo |
|
57 | dir:foo | |
55 | - the directory foo (not files in foo) |
|
58 | - the directory foo (not files in foo) | |
56 | dir:* |
|
59 | dir:* | |
57 | - all directories in current folder |
|
60 | - all directories in current folder | |
58 | foo.py bar.* !h* rec:*.py |
|
61 | foo.py bar.* !h* rec:*.py | |
59 | - Obvious. !h* exclusion only applies for rec:*.py. |
|
62 | - Obvious. !h* exclusion only applies for rec:*.py. | |
60 | foo.py is *not* included twice. |
|
63 | foo.py is *not* included twice. | |
61 | @filelist.txt |
|
64 | @filelist.txt | |
62 | - All files listed in 'filelist.txt' file, on separate lines. |
|
65 | - All files listed in 'filelist.txt' file, on separate lines. | |
63 | """ |
|
66 | """ | |
64 |
|
67 | |||
65 |
|
68 | |||
66 | __version__ = "0.2" |
|
69 | __version__ = "0.2" | |
67 |
|
70 | |||
68 |
|
71 | |||
69 | import os,glob,fnmatch,sys |
|
72 | import os,glob,fnmatch,sys | |
70 | from sets import Set as set |
|
73 | from sets import Set as set | |
71 |
|
74 | |||
72 | def recfind(p, pats = ["*"]): |
|
|||
73 | for (dp,dnames,fnames) in os.walk(p): |
|
|||
74 | for f in fnames: |
|
|||
75 | matched = False |
|
|||
76 | for p in pats: |
|
|||
77 | if fnmatch.fnmatch(f,p): |
|
|||
78 | matched = True |
|
|||
79 | break |
|
|||
80 | if matched: |
|
|||
81 | yield os.path.join(dp,f) |
|
|||
82 |
|
75 | |||
83 | def expand(flist): |
|
76 | def expand(flist): | |
84 | """ Expand the glob(s) in flist. |
|
77 | """ Expand the glob(s) in flist. | |
85 |
|
78 | |||
86 | flist may be either a whitespace-separated list of globs/files |
|
79 | flist may be either a whitespace-separated list of globs/files | |
87 | or an array of globs/files. |
|
80 | or an array of globs/files. | |
88 |
|
81 | |||
89 | """ |
|
82 | """ | |
90 | if isinstance(flist, basestring): |
|
83 | if isinstance(flist, basestring): | |
91 | flist = flist.split() |
|
84 | flist = flist.split() | |
92 | done_set = set() |
|
85 | done_set = set() | |
93 | denied_set = set() |
|
86 | denied_set = set() | |
|
87 | ||||
|
88 | def recfind(p, pats = ["*"]): | |||
|
89 | denied_dirs = ["*" + d+"*" for d in denied_set if d.endswith("/")] | |||
|
90 | #print "de", denied_dirs | |||
|
91 | for (dp,dnames,fnames) in os.walk(p): | |||
|
92 | # see if we should ignore the whole directory | |||
|
93 | dp_norm = dp.replace("\\","/") + "/" | |||
|
94 | deny = False | |||
|
95 | #print "dp",dp | |||
|
96 | for deny_pat in denied_dirs: | |||
|
97 | if fnmatch.fnmatch( dp_norm, deny_pat): | |||
|
98 | deny = True | |||
|
99 | break | |||
|
100 | if deny: | |||
|
101 | continue | |||
|
102 | ||||
|
103 | ||||
|
104 | for f in fnames: | |||
|
105 | matched = False | |||
|
106 | for p in pats: | |||
|
107 | if fnmatch.fnmatch(f,p): | |||
|
108 | matched = True | |||
|
109 | break | |||
|
110 | if matched: | |||
|
111 | yield os.path.join(dp,f) | |||
|
112 | ||||
94 | def once_filter(seq): |
|
113 | def once_filter(seq): | |
95 | for it in seq: |
|
114 | for it in seq: | |
96 | p = os.path.abspath(it) |
|
115 | p = os.path.abspath(it) | |
97 | if p in done_set: |
|
116 | if p in done_set: | |
98 | continue |
|
117 | continue | |
99 | done_set.add(p) |
|
118 | done_set.add(p) | |
100 | deny = False |
|
119 | deny = False | |
101 | for deny_pat in denied_set: |
|
120 | for deny_pat in denied_set: | |
102 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): |
|
121 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): | |
103 | deny = True |
|
122 | deny = True | |
104 | break |
|
123 | break | |
105 | if not deny: |
|
124 | if not deny: | |
106 | yield it |
|
125 | yield it | |
107 | return |
|
126 | return | |
108 |
|
127 | |||
109 | res = [] |
|
128 | res = [] | |
110 |
|
129 | |||
111 | for ent in flist: |
|
130 | for ent in flist: | |
112 | ent = os.path.expanduser(os.path.expandvars(ent)) |
|
131 | ent = os.path.expanduser(os.path.expandvars(ent)) | |
113 | if ent.lower().startswith('rec:'): |
|
132 | if ent.lower().startswith('rec:'): | |
114 | fields = ent[4:].split('=') |
|
133 | fields = ent[4:].split('=') | |
115 | if len(fields) == 2: |
|
134 | if len(fields) == 2: | |
116 | pth, patlist = fields |
|
135 | pth, patlist = fields | |
117 | elif len(fields) == 1: |
|
136 | elif len(fields) == 1: | |
118 | if os.path.isdir(fields[0]): |
|
137 | if os.path.isdir(fields[0]): | |
119 | # single arg is dir |
|
138 | # single arg is dir | |
120 | pth, patlist = fields[0], '*' |
|
139 | pth, patlist = fields[0], '*' | |
121 | else: |
|
140 | else: | |
122 | # single arg is pattern |
|
141 | # single arg is pattern | |
123 | pth, patlist = '.', fields[0] |
|
142 | pth, patlist = '.', fields[0] | |
124 |
|
143 | |||
125 | elif len(fields) == 0: |
|
144 | elif len(fields) == 0: | |
126 | pth, pathlist = '.','*' |
|
145 | pth, pathlist = '.','*' | |
127 |
|
146 | |||
128 | pats = patlist.split(',') |
|
147 | pats = patlist.split(',') | |
129 | res.extend(once_filter(recfind(pth, pats))) |
|
148 | res.extend(once_filter(recfind(pth, pats))) | |
130 | # filelist |
|
149 | # filelist | |
131 | elif ent.startswith('@') and os.path.isfile(ent[1:]): |
|
150 | elif ent.startswith('@') and os.path.isfile(ent[1:]): | |
132 | res.extend(once_filter(open(ent[1:]).read().splitlines())) |
|
151 | res.extend(once_filter(open(ent[1:]).read().splitlines())) | |
133 | # exclusion |
|
152 | # exclusion | |
134 | elif ent.startswith('!'): |
|
153 | elif ent.startswith('!'): | |
135 | denied_set.add(ent[1:]) |
|
154 | denied_set.add(ent[1:]) | |
136 | # glob only dirs |
|
155 | # glob only dirs | |
137 | elif ent.lower().startswith('dir:'): |
|
156 | elif ent.lower().startswith('dir:'): | |
138 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) |
|
157 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) | |
139 |
|
158 | |||
140 | # get all files in the specified dir |
|
159 | # get all files in the specified dir | |
141 | elif os.path.isdir(ent): |
|
160 | elif os.path.isdir(ent): | |
142 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) |
|
161 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) | |
143 |
|
162 | |||
144 | # glob only files |
|
163 | # glob only files | |
145 |
|
164 | |||
146 | elif '*' in ent or '?' in ent: |
|
165 | elif '*' in ent or '?' in ent: | |
147 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) |
|
166 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) | |
148 |
|
167 | |||
149 | else: |
|
168 | else: | |
150 | res.extend(once_filter([ent])) |
|
169 | res.extend(once_filter([ent])) | |
151 | return res |
|
170 | return res | |
152 |
|
171 | |||
153 |
|
172 | |||
154 | def test(): |
|
173 | def test(): | |
155 | assert ( |
|
174 | assert ( | |
156 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == |
|
175 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == | |
157 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) |
|
176 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) | |
158 | ) |
|
177 | ) | |
159 |
|
178 | |||
160 | def main(): |
|
179 | def main(): | |
161 | if len(sys.argv) < 2: |
|
180 | if len(sys.argv) < 2: | |
162 | print globsyntax |
|
181 | print globsyntax | |
163 | return |
|
182 | return | |
164 |
|
183 | |||
165 | print "\n".join(expand(sys.argv[1:])), |
|
184 | print "\n".join(expand(sys.argv[1:])), | |
166 |
|
185 | |||
167 |
def mglob_f(self, arg): |
|
186 | def mglob_f(self, arg): | |
168 | return expand(arg) |
|
187 | if arg.strip(): | |
|
188 | return expand(arg) | |||
|
189 | print "Please specify pattern!" | |||
|
190 | print globsyntax | |||
169 |
|
191 | |||
170 | def ipython_install(): |
|
192 | def ipython_install(): | |
171 | """ register %mglob for IPython """ |
|
193 | """ register %mglob for IPython """ | |
172 | import IPython.ipapi |
|
194 | import IPython.ipapi | |
173 | ip = IPython.ipapi.get() |
|
195 | ip = IPython.ipapi.get() | |
174 | mglob_f.__doc__ = globsyntax |
|
196 | mglob_f.__doc__ = globsyntax | |
175 | ip.expose_magic("mglob",mglob_f) |
|
197 | ip.expose_magic("mglob",mglob_f) | |
176 |
|
198 | |||
177 |
|
199 | |||
178 |
|
200 | |||
179 |
|
201 | |||
180 | # test() |
|
202 | # test() | |
181 | if __name__ == "__main__": |
|
203 | if __name__ == "__main__": | |
182 | main() |
|
204 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now