Show More
@@ -1,227 +1,233 b'' | |||
|
1 | 1 | r""" mglob - enhanced file list expansion module |
|
2 | 2 | |
|
3 | 3 | Use as stand-alone utility (for xargs, `backticks` etc.), |
|
4 | 4 | or a globbing library for own python programs. Globbing the sys.argv is something |
|
5 | 5 | that almost every Windows script has to perform manually, and this module is here |
|
6 | 6 | to help with that task. Also Unix users will benefit from enhanced modes |
|
7 | 7 | such as recursion, exclusion, directory omission... |
|
8 | 8 | |
|
9 | 9 | Unlike glob.glob, directories are not included in the glob unless specified |
|
10 | 10 | with 'dir:' |
|
11 | 11 | |
|
12 | 12 | 'expand' is the function to use in python programs. Typical use |
|
13 | 13 | to expand argv (esp. in windows):: |
|
14 | 14 | |
|
15 | 15 | try: |
|
16 | 16 | import mglob |
|
17 | 17 | files = mglob.expand(sys.argv[1:]) |
|
18 | 18 | except ImportError: |
|
19 | 19 | print "mglob not found; try 'easy_install mglob' for extra features" |
|
20 | 20 | files = sys.argv[1:] |
|
21 | 21 | |
|
22 | 22 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. |
|
23 | 23 | Therefore, you might want to use quotes with normal wildcards to prevent this |
|
24 | 24 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. |
|
25 | 25 | Not quoting the wildcards is harmless and typically has equivalent results, though. |
|
26 | 26 | |
|
27 | 27 | Author: Ville Vainio <vivainio@gmail.com> |
|
28 | 28 | License: MIT Open Source license |
|
29 | 29 | |
|
30 | 30 | """ |
|
31 | 31 | |
|
32 | 32 | #Assigned in variable for "usage" printing convenience" |
|
33 | 33 | |
|
34 | 34 | globsyntax = """\ |
|
35 | 35 | This program allows specifying filenames with "mglob" mechanism. |
|
36 | 36 | Supported syntax in globs (wilcard matching patterns):: |
|
37 | 37 | |
|
38 | 38 | *.cpp ?ellowo* |
|
39 | 39 | - obvious. Differs from normal glob in that dirs are not included. |
|
40 | 40 | Unix users might want to write this as: "*.cpp" "?ellowo*" |
|
41 | 41 | rec:/usr/share=*.txt,*.doc |
|
42 | 42 | - get all *.txt and *.doc under /usr/share, |
|
43 | 43 | recursively |
|
44 | 44 | rec:/usr/share |
|
45 | 45 | - All files under /usr/share, recursively |
|
46 | 46 | rec:*.py |
|
47 | 47 | - All .py files under current working dir, recursively |
|
48 | 48 | foo |
|
49 | 49 | - File or dir foo |
|
50 | 50 | !*.bak readme* |
|
51 | 51 | - readme*, exclude files ending with .bak |
|
52 | 52 | !.svn/ !.hg/ !*_Data/ rec:. |
|
53 | 53 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. |
|
54 | 54 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. |
|
55 | 55 | dir:foo |
|
56 | 56 | - the directory foo if it exists (not files in foo) |
|
57 | 57 | dir:* |
|
58 | 58 | - all directories in current folder |
|
59 | 59 | foo.py bar.* !h* rec:*.py |
|
60 | 60 | - Obvious. !h* exclusion only applies for rec:*.py. |
|
61 | 61 | foo.py is *not* included twice. |
|
62 | 62 | @filelist.txt |
|
63 | 63 | - All files listed in 'filelist.txt' file, on separate lines. |
|
64 | 64 | "cont:class \wak:" rec:*.py |
|
65 | 65 | - Match files containing regexp. Applies to subsequent files. |
|
66 | 66 | note quotes because of whitespace. |
|
67 | 67 | """ |
|
68 | 68 | |
|
69 | 69 | |
|
70 | 70 | __version__ = "0.2" |
|
71 | 71 | |
|
72 | 72 | |
|
73 | 73 | import os,glob,fnmatch,sys,re |
|
74 | 74 | |
|
75 | 75 | def expand(flist,exp_dirs = False): |
|
76 | 76 | """ Expand the glob(s) in flist. |
|
77 | 77 | |
|
78 | 78 | flist may be either a whitespace-separated list of globs/files |
|
79 | 79 | or an array of globs/files. |
|
80 | 80 | |
|
81 | 81 | if exp_dirs is true, directory names in glob are expanded to the files |
|
82 | 82 | contained in them - otherwise, directory names are returned as is. |
|
83 | 83 | |
|
84 | 84 | """ |
|
85 | 85 | if isinstance(flist, basestring): |
|
86 | 86 | import shlex |
|
87 | 87 | flist = shlex.split(flist) |
|
88 | 88 | done_set = set() |
|
89 | 89 | denied_set = set() |
|
90 | 90 | cont_set = set() |
|
91 | 91 | cur_rejected_dirs = set() |
|
92 | 92 | |
|
93 | 93 | def recfind(p, pats = ["*"]): |
|
94 | 94 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] |
|
95 | 95 | for (dp,dnames,fnames) in os.walk(p): |
|
96 | 96 | # see if we should ignore the whole directory |
|
97 | 97 | dp_norm = dp.replace("\\","/") + "/" |
|
98 | 98 | deny = False |
|
99 | 99 | # do not traverse under already rejected dirs |
|
100 | 100 | for d in cur_rejected_dirs: |
|
101 | 101 | if dp.startswith(d): |
|
102 | 102 | deny = True |
|
103 | 103 | break |
|
104 | 104 | if deny: |
|
105 | 105 | continue |
|
106 | 106 | |
|
107 | 107 | |
|
108 | 108 | #print "dp",dp |
|
109 | 109 | bname = os.path.basename(dp) |
|
110 | 110 | for deny_pat in denied_dirs: |
|
111 | 111 | if fnmatch.fnmatch( bname, deny_pat): |
|
112 | 112 | deny = True |
|
113 | 113 | cur_rejected_dirs.add(dp) |
|
114 | 114 | break |
|
115 | 115 | if deny: |
|
116 | 116 | continue |
|
117 | 117 | |
|
118 | 118 | |
|
119 | 119 | for f in fnames: |
|
120 | 120 | matched = False |
|
121 | 121 | for p in pats: |
|
122 | 122 | if fnmatch.fnmatch(f,p): |
|
123 | 123 | matched = True |
|
124 | 124 | break |
|
125 | 125 | if matched: |
|
126 | 126 | yield os.path.join(dp,f) |
|
127 | 127 | |
|
128 | 128 | def once_filter(seq): |
|
129 | 129 | for it in seq: |
|
130 | 130 | p = os.path.abspath(it) |
|
131 | 131 | if p in done_set: |
|
132 | 132 | continue |
|
133 | 133 | done_set.add(p) |
|
134 | 134 | deny = False |
|
135 | 135 | for deny_pat in denied_set: |
|
136 | 136 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): |
|
137 | 137 | deny = True |
|
138 | 138 | break |
|
139 | 139 | if cont_set: |
|
140 | 140 | try: |
|
141 | 141 | cont = open(p).read() |
|
142 | 142 | except IOError: |
|
143 | 143 | # deny |
|
144 | 144 | continue |
|
145 | 145 | for pat in cont_set: |
|
146 | 146 | if not re.search(pat,cont, re.IGNORECASE): |
|
147 | 147 | deny = True |
|
148 | 148 | break |
|
149 | 149 | |
|
150 | 150 | if not deny: |
|
151 | 151 | yield it |
|
152 | 152 | return |
|
153 | 153 | |
|
154 | 154 | res = [] |
|
155 | 155 | |
|
156 | 156 | for ent in flist: |
|
157 | 157 | ent = os.path.expanduser(os.path.expandvars(ent)) |
|
158 | 158 | if ent.lower().startswith('rec:'): |
|
159 | 159 | fields = ent[4:].split('=') |
|
160 | 160 | if len(fields) == 2: |
|
161 | 161 | pth, patlist = fields |
|
162 | 162 | elif len(fields) == 1: |
|
163 | 163 | if os.path.isdir(fields[0]): |
|
164 | 164 | # single arg is dir |
|
165 | 165 | pth, patlist = fields[0], '*' |
|
166 | 166 | else: |
|
167 | 167 | # single arg is pattern |
|
168 | 168 | pth, patlist = '.', fields[0] |
|
169 | 169 | |
|
170 | 170 | elif len(fields) == 0: |
|
171 | 171 | pth, pathlist = '.','*' |
|
172 | 172 | |
|
173 | 173 | pats = patlist.split(',') |
|
174 | 174 | res.extend(once_filter(recfind(pth, pats))) |
|
175 | 175 | # filelist |
|
176 | 176 | elif ent.startswith('@') and os.path.isfile(ent[1:]): |
|
177 | 177 | res.extend(once_filter(open(ent[1:]).read().splitlines())) |
|
178 | 178 | # exclusion |
|
179 | 179 | elif ent.startswith('!'): |
|
180 | 180 | denied_set.add(ent[1:]) |
|
181 | 181 | # glob only dirs |
|
182 | 182 | elif ent.lower().startswith('dir:'): |
|
183 | 183 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) |
|
184 | 184 | elif ent.lower().startswith('cont:'): |
|
185 | 185 | cont_set.add(ent[5:]) |
|
186 | 186 | # get all files in the specified dir |
|
187 | 187 | elif os.path.isdir(ent) and exp_dirs: |
|
188 | 188 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) |
|
189 | 189 | |
|
190 | 190 | # glob only files |
|
191 | 191 | |
|
192 | 192 | elif '*' in ent or '?' in ent: |
|
193 | 193 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) |
|
194 | 194 | |
|
195 | 195 | else: |
|
196 | 196 | res.extend(once_filter([ent])) |
|
197 | 197 | return res |
|
198 | 198 | |
|
199 | 199 | |
|
200 | 200 | def test(): |
|
201 | 201 | assert ( |
|
202 | 202 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == |
|
203 | 203 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) |
|
204 | 204 | ) |
|
205 | 205 | |
|
206 | 206 | def main(): |
|
207 | 207 | if len(sys.argv) < 2: |
|
208 | 208 | print globsyntax |
|
209 | 209 | return |
|
210 | 210 | |
|
211 | 211 | print "\n".join(expand(sys.argv[1:])), |
|
212 | 212 | |
|
213 | def mglob_f(self, arg): | |
|
213 | ||
|
214 | def mglob(self, arg): | |
|
214 | 215 | from IPython.utils.text import SList |
|
215 | 216 | if arg.strip(): |
|
216 | 217 | return SList(expand(arg)) |
|
217 | 218 | print "Please specify pattern!" |
|
218 | 219 | print globsyntax |
|
219 | 220 | |
|
221 | ||
|
222 | mglob.__doc__ = globsyntax | |
|
223 | ||
|
224 | ||
|
220 | 225 | def init_ipython(ip): |
|
221 | 226 | """ register %mglob for IPython """ |
|
222 | mglob_f.__doc__ = globsyntax | |
|
223 |
ip. |
|
|
227 | ||
|
228 | ip.function_as_magic(mglob) | |
|
229 | ||
|
224 | 230 | |
|
225 | 231 | # test() |
|
226 | 232 | if __name__ == "__main__": |
|
227 | 233 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now