##// END OF EJS Templates
add dir skipping (on recursive walk) to mglob
vivainio -
Show More
@@ -1,182 +1,204 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 """ mglob - enhanced file list expansion module
3 r""" mglob - enhanced file list expansion module
4
4
5 Use as stand-alone utility (for xargs, `backticks` etc.),
5 Use as stand-alone utility (for xargs, `backticks` etc.),
6 or a globbing library for own python programs. Globbing the sys.argv is something
6 or a globbing library for own python programs. Globbing the sys.argv is something
7 that almost every Windows script has to perform manually, and this module is here
7 that almost every Windows script has to perform manually, and this module is here
8 to help with that task. Also Unix users will benefit from enhanced modes
8 to help with that task. Also Unix users will benefit from enhanced modes
9 such as recursion, exclusion, directory omission...
9 such as recursion, exclusion, directory omission...
10
10
11 Unlike glob.glob, directories are not included in the glob unless specified
11 Unlike glob.glob, directories are not included in the glob unless specified
12 with 'dir:'
12 with 'dir:'
13
13
14 'expand' is the function to use in python programs. Typical use
14 'expand' is the function to use in python programs. Typical use
15 to expand argv (esp. in windows)::
15 to expand argv (esp. in windows)::
16
16
17 try:
17 try:
18 import mglob
18 import mglob
19 files = mglob.expand(sys.argv[1:])
19 files = mglob.expand(sys.argv[1:])
20 except ImportError:
20 except ImportError:
21 print "mglob not found; try 'easy_install mglob' for extra features"
21 print "mglob not found; try 'easy_install mglob' for extra features"
22 files = sys.argv[1:]
22 files = sys.argv[1:]
23
23
24 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
24 Note that for unix, shell expands *normal* wildcards (*.cpp, etc.) in argv.
25 Therefore, you might want to use quotes with normal wildcards to prevent this
25 Therefore, you might want to use quotes with normal wildcards to prevent this
26 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
26 expansion, in order for mglob to see the wildcards and get the wanted behaviour.
27 Not quoting the wildcards is harmless and typically has equivalent results, though.
27 Not quoting the wildcards is harmless and typically has equivalent results, though.
28
28
29 Author: Ville Vainio <vivainio@gmail.com>
29 Author: Ville Vainio <vivainio@gmail.com>
30 License: MIT Open Source license
30 License: MIT Open Source license
31
31
32 """
32 """
33
33
34 #Assigned in variable for "usage" printing convenience"
34 #Assigned in variable for "usage" printing convenience"
35
35
36 globsyntax = """\
36 globsyntax = """\
37 This program allows specifying filenames with "mglob" mechanism.
37 This program allows specifying filenames with "mglob" mechanism.
38 Supported syntax in globs (wilcard matching patterns)::
38 Supported syntax in globs (wilcard matching patterns)::
39
39
40 *.cpp ?ellowo*
40 *.cpp ?ellowo*
41 - obvious. Differs from normal glob in that dirs are not included.
41 - obvious. Differs from normal glob in that dirs are not included.
42 Unix users might want to write this as: "*.cpp" "?ellowo*"
42 Unix users might want to write this as: "*.cpp" "?ellowo*"
43 rec:/usr/share=*.txt,*.doc
43 rec:/usr/share=*.txt,*.doc
44 - get all *.txt and *.doc under /usr/share,
44 - get all *.txt and *.doc under /usr/share,
45 recursively
45 recursively
46 rec:/usr/share
46 rec:/usr/share
47 - All files under /usr/share, recursively
47 - All files under /usr/share, recursively
48 rec:*.py
48 rec:*.py
49 - All .py files under current working dir, recursively
49 - All .py files under current working dir, recursively
50 foo
50 foo
51 - File foo, or all files in dir foo
51 - File foo, or all files in dir foo
52 !*.bak readme*
52 !*.bak readme*
53 - readme*, exclude files ending with .bak
53 - readme*, exclude files ending with .bak
54 !.svn/ !.hg/ !*_Data/ rec:.
55 - Skip .svn, .hg, foo_Data dirs (and their subdirs) in recurse.
56 Trailing / is the key, \ does not work!
54 dir:foo
57 dir:foo
55 - the directory foo (not files in foo)
58 - the directory foo (not files in foo)
56 dir:*
59 dir:*
57 - all directories in current folder
60 - all directories in current folder
58 foo.py bar.* !h* rec:*.py
61 foo.py bar.* !h* rec:*.py
59 - Obvious. !h* exclusion only applies for rec:*.py.
62 - Obvious. !h* exclusion only applies for rec:*.py.
60 foo.py is *not* included twice.
63 foo.py is *not* included twice.
61 @filelist.txt
64 @filelist.txt
62 - All files listed in 'filelist.txt' file, on separate lines.
65 - All files listed in 'filelist.txt' file, on separate lines.
63 """
66 """
64
67
65
68
66 __version__ = "0.2"
69 __version__ = "0.2"
67
70
68
71
69 import os,glob,fnmatch,sys
72 import os,glob,fnmatch,sys
70 from sets import Set as set
73 from sets import Set as set
71
74
72 def recfind(p, pats = ["*"]):
73 for (dp,dnames,fnames) in os.walk(p):
74 for f in fnames:
75 matched = False
76 for p in pats:
77 if fnmatch.fnmatch(f,p):
78 matched = True
79 break
80 if matched:
81 yield os.path.join(dp,f)
82
75
83 def expand(flist):
76 def expand(flist):
84 """ Expand the glob(s) in flist.
77 """ Expand the glob(s) in flist.
85
78
86 flist may be either a whitespace-separated list of globs/files
79 flist may be either a whitespace-separated list of globs/files
87 or an array of globs/files.
80 or an array of globs/files.
88
81
89 """
82 """
90 if isinstance(flist, basestring):
83 if isinstance(flist, basestring):
91 flist = flist.split()
84 flist = flist.split()
92 done_set = set()
85 done_set = set()
93 denied_set = set()
86 denied_set = set()
87
88 def recfind(p, pats = ["*"]):
89 denied_dirs = ["*" + d+"*" for d in denied_set if d.endswith("/")]
90 #print "de", denied_dirs
91 for (dp,dnames,fnames) in os.walk(p):
92 # see if we should ignore the whole directory
93 dp_norm = dp.replace("\\","/") + "/"
94 deny = False
95 #print "dp",dp
96 for deny_pat in denied_dirs:
97 if fnmatch.fnmatch( dp_norm, deny_pat):
98 deny = True
99 break
100 if deny:
101 continue
102
103
104 for f in fnames:
105 matched = False
106 for p in pats:
107 if fnmatch.fnmatch(f,p):
108 matched = True
109 break
110 if matched:
111 yield os.path.join(dp,f)
112
94 def once_filter(seq):
113 def once_filter(seq):
95 for it in seq:
114 for it in seq:
96 p = os.path.abspath(it)
115 p = os.path.abspath(it)
97 if p in done_set:
116 if p in done_set:
98 continue
117 continue
99 done_set.add(p)
118 done_set.add(p)
100 deny = False
119 deny = False
101 for deny_pat in denied_set:
120 for deny_pat in denied_set:
102 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
121 if fnmatch.fnmatch(os.path.basename(p), deny_pat):
103 deny = True
122 deny = True
104 break
123 break
105 if not deny:
124 if not deny:
106 yield it
125 yield it
107 return
126 return
108
127
109 res = []
128 res = []
110
129
111 for ent in flist:
130 for ent in flist:
112 ent = os.path.expanduser(os.path.expandvars(ent))
131 ent = os.path.expanduser(os.path.expandvars(ent))
113 if ent.lower().startswith('rec:'):
132 if ent.lower().startswith('rec:'):
114 fields = ent[4:].split('=')
133 fields = ent[4:].split('=')
115 if len(fields) == 2:
134 if len(fields) == 2:
116 pth, patlist = fields
135 pth, patlist = fields
117 elif len(fields) == 1:
136 elif len(fields) == 1:
118 if os.path.isdir(fields[0]):
137 if os.path.isdir(fields[0]):
119 # single arg is dir
138 # single arg is dir
120 pth, patlist = fields[0], '*'
139 pth, patlist = fields[0], '*'
121 else:
140 else:
122 # single arg is pattern
141 # single arg is pattern
123 pth, patlist = '.', fields[0]
142 pth, patlist = '.', fields[0]
124
143
125 elif len(fields) == 0:
144 elif len(fields) == 0:
126 pth, pathlist = '.','*'
145 pth, pathlist = '.','*'
127
146
128 pats = patlist.split(',')
147 pats = patlist.split(',')
129 res.extend(once_filter(recfind(pth, pats)))
148 res.extend(once_filter(recfind(pth, pats)))
130 # filelist
149 # filelist
131 elif ent.startswith('@') and os.path.isfile(ent[1:]):
150 elif ent.startswith('@') and os.path.isfile(ent[1:]):
132 res.extend(once_filter(open(ent[1:]).read().splitlines()))
151 res.extend(once_filter(open(ent[1:]).read().splitlines()))
133 # exclusion
152 # exclusion
134 elif ent.startswith('!'):
153 elif ent.startswith('!'):
135 denied_set.add(ent[1:])
154 denied_set.add(ent[1:])
136 # glob only dirs
155 # glob only dirs
137 elif ent.lower().startswith('dir:'):
156 elif ent.lower().startswith('dir:'):
138 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
157 res.extend(once_filter(filter(os.path.isdir,glob.glob(ent[4:]))))
139
158
140 # get all files in the specified dir
159 # get all files in the specified dir
141 elif os.path.isdir(ent):
160 elif os.path.isdir(ent):
142 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
161 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent + os.sep+"*"))))
143
162
144 # glob only files
163 # glob only files
145
164
146 elif '*' in ent or '?' in ent:
165 elif '*' in ent or '?' in ent:
147 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
166 res.extend(once_filter(filter(os.path.isfile,glob.glob(ent))))
148
167
149 else:
168 else:
150 res.extend(once_filter([ent]))
169 res.extend(once_filter([ent]))
151 return res
170 return res
152
171
153
172
154 def test():
173 def test():
155 assert (
174 assert (
156 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
175 expand("*.py ~/.ipython/*.py rec:/usr/share/doc-base") ==
157 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
176 expand( ['*.py', '~/.ipython/*.py', 'rec:/usr/share/doc-base'] )
158 )
177 )
159
178
160 def main():
179 def main():
161 if len(sys.argv) < 2:
180 if len(sys.argv) < 2:
162 print globsyntax
181 print globsyntax
163 return
182 return
164
183
165 print "\n".join(expand(sys.argv[1:])),
184 print "\n".join(expand(sys.argv[1:])),
166
185
167 def mglob_f(self, arg):
186 def mglob_f(self, arg):
168 return expand(arg)
187 if arg.strip():
188 return expand(arg)
189 print "Please specify pattern!"
190 print globsyntax
169
191
170 def ipython_install():
192 def ipython_install():
171 """ register %mglob for IPython """
193 """ register %mglob for IPython """
172 import IPython.ipapi
194 import IPython.ipapi
173 ip = IPython.ipapi.get()
195 ip = IPython.ipapi.get()
174 mglob_f.__doc__ = globsyntax
196 mglob_f.__doc__ = globsyntax
175 ip.expose_magic("mglob",mglob_f)
197 ip.expose_magic("mglob",mglob_f)
176
198
177
199
178
200
179
201
180 # test()
202 # test()
181 if __name__ == "__main__":
203 if __name__ == "__main__":
182 main()
204 main()
General Comments 0
You need to be logged in to leave comments. Login now