Show More
@@ -1,203 +1,231 | |||
|
1 | 1 | #!/usr/bin/env python |
|
2 | 2 | |
|
3 | 3 | r""" mglob - enhanced file list expansion module |
|
4 | 4 | |
|
5 | 5 | Use as stand-alone utility (for xargs, `backticks` etc.), |
|
6 | 6 | or a globbing library for own python programs. Globbing the sys.argv is something |
|
7 | 7 | that almost every Windows script has to perform manually, and this module is here |
|
8 | 8 | to help with that task. Also Unix users will benefit from enhanced modes |
|
9 | 9 | such as recursion, exclusion, directory omission... |
|
10 | 10 | |
|
11 | 11 | Unlike glob.glob, directories are not included in the glob unless specified |
|
12 | 12 | with 'dir:' |
|
13 | 13 | |
|
14 | 14 | 'expand' is the function to use in python programs. Typical use |
|
15 | 15 | to expand argv (esp. in windows):: |
|
16 | 16 | |
|
17 | 17 | try: |
|
18 | 18 | import mglob |
|
19 | 19 | files = mglob.expand(sys.argv[1:]) |
|
20 | 20 | except ImportError: |
|
21 | 21 | print "mglob not found; try 'easy_install mglob' for extra features" |
|
22 | 22 | files = sys.argv[1:] |
|
23 | 23 | |
|
24 | 24 | Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv. |
|
25 | 25 | Therefore, you might want to use quotes with normal wildcards to prevent this |
|
26 | 26 | expansion, in order for mglob to see the wildcards and get the wanted behaviour. |
|
27 | 27 | Not quoting the wildcards is harmless and typically has equivalent results, though. |
|
28 | 28 | |
|
29 | 29 | Author: Ville Vainio <vivainio@gmail.com> |
|
30 | 30 | License: MIT Open Source license |
|
31 | 31 | |
|
32 | 32 | """ |
|
33 | 33 | |
|
34 | 34 | #Assigned in variable for "usage" printing convenience" |
|
35 | 35 | |
|
36 | 36 | globsyntax = """\ |
|
37 | 37 | This program allows specifying filenames with "mglob" mechanism. |
|
38 | 38 | Supported syntax in globs (wilcard matching patterns):: |
|
39 | 39 | |
|
40 | 40 | *.cpp ?ellowo* |
|
41 | 41 | - obvious. Differs from normal glob in that dirs are not included. |
|
42 | 42 | Unix users might want to write this as: "*.cpp" "?ellowo*" |
|
43 | 43 | rec:/usr/share=*.txt,*.doc |
|
44 | 44 | - get all *.txt and *.doc under /usr/share, |
|
45 | 45 | recursively |
|
46 | 46 | rec:/usr/share |
|
47 | 47 | - All files under /usr/share, recursively |
|
48 | 48 | rec:*.py |
|
49 | 49 | - All .py files under current working dir, recursively |
|
50 | 50 | foo |
|
51 | 51 | - File or dir foo |
|
52 | 52 | !*.bak readme* |
|
53 | 53 | - readme*, exclude files ending with .bak |
|
54 | 54 | !.svn/ !.hg/ !*_Data/ rec:. |
|
55 | 55 | - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse. |
|
56 | Trailing / is the key, \ does not work! | |
|
56 | Trailing / is the key, \ does not work! Use !.*/ for all hidden. | |
|
57 | 57 | dir:foo |
|
58 | 58 | - the directory foo if it exists (not files in foo) |
|
59 | 59 | dir:* |
|
60 | 60 | - all directories in current folder |
|
61 | 61 | foo.py bar.* !h* rec:*.py |
|
62 | 62 | - Obvious. !h* exclusion only applies for rec:*.py. |
|
63 | 63 | foo.py is *not* included twice. |
|
64 | 64 | @filelist.txt |
|
65 | 65 | - All files listed in 'filelist.txt' file, on separate lines. |
|
66 | "cont:class \wak:" rec:*.py | |
|
67 | - Match files containing regexp. Applies to subsequent files. | |
|
68 | note quotes because of whitespace. | |
|
66 | 69 | """ |
|
67 | 70 | |
|
68 | 71 | |
|
69 | 72 | __version__ = "0.2" |
|
70 | 73 | |
|
71 | 74 | |
|
72 | import os,glob,fnmatch,sys | |
|
75 | import os,glob,fnmatch,sys,re | |
|
73 | 76 | from sets import Set as set |
|
74 | 77 | |
|
75 | 78 | |
|
76 | 79 | def expand(flist,exp_dirs = False): |
|
77 | 80 | """ Expand the glob(s) in flist. |
|
78 | 81 | |
|
79 | 82 | flist may be either a whitespace-separated list of globs/files |
|
80 | 83 | or an array of globs/files. |
|
81 | 84 | |
|
82 | 85 | if exp_dirs is true, directory names in glob are expanded to the files |
|
83 | 86 | contained in them - otherwise, directory names are returned as is. |
|
84 | 87 | |
|
85 | 88 | """ |
|
86 | 89 | if isinstance(flist, basestring): |
|
87 | flist = flist.split() | |
|
90 | import shlex | |
|
91 | flist = shlex.split(flist) | |
|
88 | 92 | done_set = set() |
|
89 | 93 | denied_set = set() |
|
94 | cont_set = set() | |
|
95 | cur_rejected_dirs = set() | |
|
90 | 96 | |
|
91 | 97 | def recfind(p, pats = ["*"]): |
|
92 |
denied_dirs = [ |
|
|
93 | #print "de", denied_dirs | |
|
98 | denied_dirs = [os.path.dirname(d) for d in denied_set if d.endswith("/")] | |
|
94 | 99 | for (dp,dnames,fnames) in os.walk(p): |
|
95 | 100 | # see if we should ignore the whole directory |
|
96 | 101 | dp_norm = dp.replace("\\","/") + "/" |
|
97 | 102 | deny = False |
|
103 | # do not traverse under already rejected dirs | |
|
104 | for d in cur_rejected_dirs: | |
|
105 | if dp.startswith(d): | |
|
106 | deny = True | |
|
107 | break | |
|
108 | if deny: | |
|
109 | continue | |
|
110 | ||
|
111 | ||
|
98 | 112 | #print "dp",dp |
|
113 | bname = os.path.basename(dp) | |
|
99 | 114 | for deny_pat in denied_dirs: |
|
100 |
if fnmatch.fnmatch( |
|
|
115 | if fnmatch.fnmatch( bname, deny_pat): | |
|
101 | 116 | deny = True |
|
117 | cur_rejected_dirs.add(dp) | |
|
102 | 118 | break |
|
103 | 119 | if deny: |
|
104 | 120 | continue |
|
105 | 121 | |
|
106 | 122 | |
|
107 | 123 | for f in fnames: |
|
108 | 124 | matched = False |
|
109 | 125 | for p in pats: |
|
110 | 126 | if fnmatch.fnmatch(f,p): |
|
111 | 127 | matched = True |
|
112 | 128 | break |
|
113 | 129 | if matched: |
|
114 | 130 | yield os.path.join(dp,f) |
|
115 | 131 | |
|
116 | 132 | def once_filter(seq): |
|
117 | 133 | for it in seq: |
|
118 | 134 | p = os.path.abspath(it) |
|
119 | 135 | if p in done_set: |
|
120 | 136 | continue |
|
121 | 137 | done_set.add(p) |
|
122 | 138 | deny = False |
|
123 | 139 | for deny_pat in denied_set: |
|
124 | 140 | if fnmatch.fnmatch(os.path.basename(p), deny_pat): |
|
125 | 141 | deny = True |
|
126 | 142 | break |
|
143 | if cont_set: | |
|
144 | try: | |
|
145 | cont = open(p).read() | |
|
146 | except IOError: | |
|
147 | # deny | |
|
148 | continue | |
|
149 | for pat in cont_set: | |
|
150 | if not re.search(pat,cont, re.IGNORECASE): | |
|
151 | deny = True | |
|
152 | break | |
|
153 | ||
|
127 | 154 | if not deny: |
|
128 | 155 | yield it |
|
129 | 156 | return |
|
130 | 157 | |
|
131 | 158 | res = [] |
|
132 | 159 | |
|
133 | 160 | for ent in flist: |
|
134 | 161 | ent = os.path.expanduser(os.path.expandvars(ent)) |
|
135 | 162 | if ent.lower().startswith('rec:'): |
|
136 | 163 | fields = ent[4:].split('=') |
|
137 | 164 | if len(fields) == 2: |
|
138 | 165 | pth, patlist = fields |
|
139 | 166 | elif len(fields) == 1: |
|
140 | 167 | if os.path.isdir(fields[0]): |
|
141 | 168 | # single arg is dir |
|
142 | 169 | pth, patlist = fields[0], '*' |
|
143 | 170 | else: |
|
144 | 171 | # single arg is pattern |
|
145 | 172 | pth, patlist = '.', fields[0] |
|
146 | 173 | |
|
147 | 174 | elif len(fields) == 0: |
|
148 | 175 | pth, pathlist = '.','*' |
|
149 | 176 | |
|
150 | 177 | pats = patlist.split(',') |
|
151 | 178 | res.extend(once_filter(recfind(pth, pats))) |
|
152 | 179 | # filelist |
|
153 | 180 | elif ent.startswith('@') and os.path.isfile(ent[1:]): |
|
154 | 181 | res.extend(once_filter(open(ent[1:]).read().splitlines())) |
|
155 | 182 | # exclusion |
|
156 | 183 | elif ent.startswith('!'): |
|
157 | 184 | denied_set.add(ent[1:]) |
|
158 | 185 | # glob only dirs |
|
159 | 186 | elif ent.lower().startswith('dir:'): |
|
160 | 187 | res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:])))) |
|
161 | ||
|
188 | elif ent.lower().startswith('cont:'): | |
|
189 | cont_set.add(ent[5:]) | |
|
162 | 190 | # get all files in the specified dir |
|
163 | 191 | elif os.path.isdir(ent) and exp_dirs: |
|
164 | 192 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*")))) |
|
165 | 193 | |
|
166 | 194 | # glob only files |
|
167 | 195 | |
|
168 | 196 | elif '*' in ent or '?' in ent: |
|
169 | 197 | res.extend(once_filter(filter(os.path.isfile,glob.glob(ent)))) |
|
170 | 198 | |
|
171 | 199 | else: |
|
172 | 200 | res.extend(once_filter([ent])) |
|
173 | 201 | return res |
|
174 | 202 | |
|
175 | 203 | |
|
176 | 204 | def test(): |
|
177 | 205 | assert ( |
|
178 | 206 | expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") == |
|
179 | 207 | expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] ) |
|
180 | 208 | ) |
|
181 | 209 | |
|
182 | 210 | def main(): |
|
183 | 211 | if len(sys.argv) < 2: |
|
184 | 212 | print globsyntax |
|
185 | 213 | return |
|
186 | 214 | |
|
187 | 215 | print "\n".join(expand(sys.argv[1:])), |
|
188 | 216 | |
|
189 | 217 | def mglob_f(self, arg): |
|
190 | 218 | from IPython.genutils import SList |
|
191 | 219 | if arg.strip(): |
|
192 | 220 | return SList(expand(arg)) |
|
193 | 221 | print "Please specify pattern!" |
|
194 | 222 | print globsyntax |
|
195 | 223 | |
|
196 | 224 | def init_ipython(ip): |
|
197 | 225 | """ register %mglob for IPython """ |
|
198 | 226 | mglob_f.__doc__ = globsyntax |
|
199 | 227 | ip.expose_magic("mglob",mglob_f) |
|
200 | 228 | |
|
201 | 229 | # test() |
|
202 | 230 | if __name__ == "__main__": |
|
203 | 231 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now