##// END OF EJS Templates
fileset: add size() predicate
Matt Mackall -
r14683:281102f3 default
parent child Browse files
Show More
@@ -1,322 +1,393
1 # fileset.py - file set queries for mercurial
1 # fileset.py - file set queries for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import parser, error, util, merge, re
8 import parser, error, util, merge, re
9 from i18n import _
9 from i18n import _
10
10
11 elements = {
11 elements = {
12 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
12 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
13 "-": (5, ("negate", 19), ("minus", 5)),
13 "-": (5, ("negate", 19), ("minus", 5)),
14 "not": (10, ("not", 10)),
14 "not": (10, ("not", 10)),
15 "!": (10, ("not", 10)),
15 "!": (10, ("not", 10)),
16 "and": (5, None, ("and", 5)),
16 "and": (5, None, ("and", 5)),
17 "&": (5, None, ("and", 5)),
17 "&": (5, None, ("and", 5)),
18 "or": (4, None, ("or", 4)),
18 "or": (4, None, ("or", 4)),
19 "|": (4, None, ("or", 4)),
19 "|": (4, None, ("or", 4)),
20 "+": (4, None, ("or", 4)),
20 "+": (4, None, ("or", 4)),
21 ",": (2, None, ("list", 2)),
21 ",": (2, None, ("list", 2)),
22 ")": (0, None, None),
22 ")": (0, None, None),
23 "symbol": (0, ("symbol",), None),
23 "symbol": (0, ("symbol",), None),
24 "string": (0, ("string",), None),
24 "string": (0, ("string",), None),
25 "end": (0, None, None),
25 "end": (0, None, None),
26 }
26 }
27
27
28 keywords = set(['and', 'or', 'not'])
28 keywords = set(['and', 'or', 'not'])
29
29
30 globchars = ".*{}[]?/\\"
30 globchars = ".*{}[]?/\\"
31
31
32 def tokenize(program):
32 def tokenize(program):
33 pos, l = 0, len(program)
33 pos, l = 0, len(program)
34 while pos < l:
34 while pos < l:
35 c = program[pos]
35 c = program[pos]
36 if c.isspace(): # skip inter-token whitespace
36 if c.isspace(): # skip inter-token whitespace
37 pass
37 pass
38 elif c in "(),-|&+!": # handle simple operators
38 elif c in "(),-|&+!": # handle simple operators
39 yield (c, None, pos)
39 yield (c, None, pos)
40 elif (c in '"\'' or c == 'r' and
40 elif (c in '"\'' or c == 'r' and
41 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
41 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
42 if c == 'r':
42 if c == 'r':
43 pos += 1
43 pos += 1
44 c = program[pos]
44 c = program[pos]
45 decode = lambda x: x
45 decode = lambda x: x
46 else:
46 else:
47 decode = lambda x: x.decode('string-escape')
47 decode = lambda x: x.decode('string-escape')
48 pos += 1
48 pos += 1
49 s = pos
49 s = pos
50 while pos < l: # find closing quote
50 while pos < l: # find closing quote
51 d = program[pos]
51 d = program[pos]
52 if d == '\\': # skip over escaped characters
52 if d == '\\': # skip over escaped characters
53 pos += 2
53 pos += 2
54 continue
54 continue
55 if d == c:
55 if d == c:
56 yield ('string', decode(program[s:pos]), s)
56 yield ('string', decode(program[s:pos]), s)
57 break
57 break
58 pos += 1
58 pos += 1
59 else:
59 else:
60 raise error.ParseError(_("unterminated string"), s)
60 raise error.ParseError(_("unterminated string"), s)
61 elif c.isalnum() or c in globchars or ord(c) > 127:
61 elif c.isalnum() or c in globchars or ord(c) > 127:
62 # gather up a symbol/keyword
62 # gather up a symbol/keyword
63 s = pos
63 s = pos
64 pos += 1
64 pos += 1
65 while pos < l: # find end of symbol
65 while pos < l: # find end of symbol
66 d = program[pos]
66 d = program[pos]
67 if not (d.isalnum() or d in globchars or ord(d) > 127):
67 if not (d.isalnum() or d in globchars or ord(d) > 127):
68 break
68 break
69 pos += 1
69 pos += 1
70 sym = program[s:pos]
70 sym = program[s:pos]
71 if sym in keywords: # operator keywords
71 if sym in keywords: # operator keywords
72 yield (sym, None, s)
72 yield (sym, None, s)
73 else:
73 else:
74 yield ('symbol', sym, s)
74 yield ('symbol', sym, s)
75 pos -= 1
75 pos -= 1
76 else:
76 else:
77 raise error.ParseError(_("syntax error"), pos)
77 raise error.ParseError(_("syntax error"), pos)
78 pos += 1
78 pos += 1
79 yield ('end', None, pos)
79 yield ('end', None, pos)
80
80
81 parse = parser.parser(tokenize, elements).parse
81 parse = parser.parser(tokenize, elements).parse
82
82
83 def getstring(x, err):
83 def getstring(x, err):
84 if x and (x[0] == 'string' or x[0] == 'symbol'):
84 if x and (x[0] == 'string' or x[0] == 'symbol'):
85 return x[1]
85 return x[1]
86 raise error.ParseError(err)
86 raise error.ParseError(err)
87
87
88 def getset(mctx, x):
88 def getset(mctx, x):
89 if not x:
89 if not x:
90 raise error.ParseError(_("missing argument"))
90 raise error.ParseError(_("missing argument"))
91 return methods[x[0]](mctx, *x[1:])
91 return methods[x[0]](mctx, *x[1:])
92
92
93 def stringset(mctx, x):
93 def stringset(mctx, x):
94 m = mctx.matcher([x])
94 m = mctx.matcher([x])
95 return [f for f in mctx.subset if m(f)]
95 return [f for f in mctx.subset if m(f)]
96
96
97 def andset(mctx, x, y):
97 def andset(mctx, x, y):
98 return getset(mctx.narrow(getset(mctx, x)), y)
98 return getset(mctx.narrow(getset(mctx, x)), y)
99
99
100 def orset(mctx, x, y):
100 def orset(mctx, x, y):
101 # needs optimizing
101 # needs optimizing
102 xl = getset(mctx, x)
102 xl = getset(mctx, x)
103 yl = getset(mctx, y)
103 yl = getset(mctx, y)
104 return xl + [f for f in yl if f not in xl]
104 return xl + [f for f in yl if f not in xl]
105
105
106 def notset(mctx, x):
106 def notset(mctx, x):
107 s = set(getset(mctx, x))
107 s = set(getset(mctx, x))
108 return [r for r in mctx.subset if r not in s]
108 return [r for r in mctx.subset if r not in s]
109
109
110 def listset(mctx, a, b):
110 def listset(mctx, a, b):
111 raise error.ParseError(_("can't use a list in this context"))
111 raise error.ParseError(_("can't use a list in this context"))
112
112
113 def modified(mctx, x):
113 def modified(mctx, x):
114 """``modified()``
114 """``modified()``
115 File that is modified according to status.
115 File that is modified according to status.
116 """
116 """
117 getargs(x, 0, 0, _("modified takes no arguments"))
117 getargs(x, 0, 0, _("modified takes no arguments"))
118 s = mctx.status()[0]
118 s = mctx.status()[0]
119 return [f for f in mctx.subset if f in s]
119 return [f for f in mctx.subset if f in s]
120
120
121 def added(mctx, x):
121 def added(mctx, x):
122 """``added()``
122 """``added()``
123 File that is added according to status.
123 File that is added according to status.
124 """
124 """
125 getargs(x, 0, 0, _("added takes no arguments"))
125 getargs(x, 0, 0, _("added takes no arguments"))
126 s = mctx.status()[1]
126 s = mctx.status()[1]
127 return [f for f in mctx.subset if f in s]
127 return [f for f in mctx.subset if f in s]
128
128
129 def removed(mctx, x):
129 def removed(mctx, x):
130 """``removed()``
130 """``removed()``
131 File that is removed according to status.
131 File that is removed according to status.
132 """
132 """
133 getargs(x, 0, 0, _("removed takes no arguments"))
133 getargs(x, 0, 0, _("removed takes no arguments"))
134 s = mctx.status()[2]
134 s = mctx.status()[2]
135 return [f for f in mctx.subset if f in s]
135 return [f for f in mctx.subset if f in s]
136
136
137 def deleted(mctx, x):
137 def deleted(mctx, x):
138 """``deleted()``
138 """``deleted()``
139 File that is deleted according to status.
139 File that is deleted according to status.
140 """
140 """
141 getargs(x, 0, 0, _("deleted takes no arguments"))
141 getargs(x, 0, 0, _("deleted takes no arguments"))
142 s = mctx.status()[3]
142 s = mctx.status()[3]
143 return [f for f in mctx.subset if f in s]
143 return [f for f in mctx.subset if f in s]
144
144
145 def unknown(mctx, x):
145 def unknown(mctx, x):
146 """``unknown()``
146 """``unknown()``
147 File that is unknown according to status. These files will only be
147 File that is unknown according to status. These files will only be
148 considered if this predicate is used.
148 considered if this predicate is used.
149 """
149 """
150 getargs(x, 0, 0, _("unknown takes no arguments"))
150 getargs(x, 0, 0, _("unknown takes no arguments"))
151 s = mctx.status()[4]
151 s = mctx.status()[4]
152 return [f for f in mctx.subset if f in s]
152 return [f for f in mctx.subset if f in s]
153
153
154 def ignored(mctx, x):
154 def ignored(mctx, x):
155 """``ignored()``
155 """``ignored()``
156 File that is ignored according to status. These files will only be
156 File that is ignored according to status. These files will only be
157 considered if this predicate is used.
157 considered if this predicate is used.
158 """
158 """
159 getargs(x, 0, 0, _("ignored takes no arguments"))
159 getargs(x, 0, 0, _("ignored takes no arguments"))
160 s = mctx.status()[5]
160 s = mctx.status()[5]
161 return [f for f in mctx.subset if f in s]
161 return [f for f in mctx.subset if f in s]
162
162
163 def clean(mctx, x):
163 def clean(mctx, x):
164 """``clean()``
164 """``clean()``
165 File that is clean according to status.
165 File that is clean according to status.
166 """
166 """
167 getargs(x, 0, 0, _("clean takes no arguments"))
167 getargs(x, 0, 0, _("clean takes no arguments"))
168 s = mctx.status()[6]
168 s = mctx.status()[6]
169 return [f for f in mctx.subset if f in s]
169 return [f for f in mctx.subset if f in s]
170
170
171 def func(mctx, a, b):
171 def func(mctx, a, b):
172 if a[0] == 'symbol' and a[1] in symbols:
172 if a[0] == 'symbol' and a[1] in symbols:
173 return symbols[a[1]](mctx, b)
173 return symbols[a[1]](mctx, b)
174 raise error.ParseError(_("not a function: %s") % a[1])
174 raise error.ParseError(_("not a function: %s") % a[1])
175
175
176 def getlist(x):
176 def getlist(x):
177 if not x:
177 if not x:
178 return []
178 return []
179 if x[0] == 'list':
179 if x[0] == 'list':
180 return getlist(x[1]) + [x[2]]
180 return getlist(x[1]) + [x[2]]
181 return [x]
181 return [x]
182
182
183 def getargs(x, min, max, err):
183 def getargs(x, min, max, err):
184 l = getlist(x)
184 l = getlist(x)
185 if len(l) < min or len(l) > max:
185 if len(l) < min or len(l) > max:
186 raise error.ParseError(err)
186 raise error.ParseError(err)
187 return l
187 return l
188
188
189 def binary(mctx, x):
189 def binary(mctx, x):
190 """``binary()``
190 """``binary()``
191 File that appears to be binary (contails NUL bytes).
191 File that appears to be binary (contails NUL bytes).
192 """
192 """
193 getargs(x, 0, 0, _("binary takes no arguments"))
193 getargs(x, 0, 0, _("binary takes no arguments"))
194 return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]
194 return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]
195
195
196 def exec_(mctx, x):
196 def exec_(mctx, x):
197 """``exec()``
197 """``exec()``
198 File that is marked as executable.
198 File that is marked as executable.
199 """
199 """
200 getargs(x, 0, 0, _("exec takes no arguments"))
200 getargs(x, 0, 0, _("exec takes no arguments"))
201 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']
201 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']
202
202
203 def symlink(mctx, x):
203 def symlink(mctx, x):
204 """``symlink()``
204 """``symlink()``
205 File that is marked as a symlink.
205 File that is marked as a symlink.
206 """
206 """
207 getargs(x, 0, 0, _("symlink takes no arguments"))
207 getargs(x, 0, 0, _("symlink takes no arguments"))
208 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']
208 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']
209
209
210 def resolved(mctx, x):
210 def resolved(mctx, x):
211 """``resolved()``
211 """``resolved()``
212 File that is marked resolved according to the resolve state.
212 File that is marked resolved according to the resolve state.
213 """
213 """
214 getargs(x, 0, 0, _("resolved takes no arguments"))
214 getargs(x, 0, 0, _("resolved takes no arguments"))
215 if mctx.ctx.rev() is not None:
215 if mctx.ctx.rev() is not None:
216 return []
216 return []
217 ms = merge.mergestate(mctx.ctx._repo)
217 ms = merge.mergestate(mctx.ctx._repo)
218 return [f for f in mctx.subset if f in ms and ms[f] == 'r']
218 return [f for f in mctx.subset if f in ms and ms[f] == 'r']
219
219
220 def unresolved(mctx, x):
220 def unresolved(mctx, x):
221 """``unresolved()``
221 """``unresolved()``
222 File that is marked unresolved according to the resolve state.
222 File that is marked unresolved according to the resolve state.
223 """
223 """
224 getargs(x, 0, 0, _("unresolved takes no arguments"))
224 getargs(x, 0, 0, _("unresolved takes no arguments"))
225 if mctx.ctx.rev() is not None:
225 if mctx.ctx.rev() is not None:
226 return []
226 return []
227 ms = merge.mergestate(mctx.ctx._repo)
227 ms = merge.mergestate(mctx.ctx._repo)
228 return [f for f in mctx.subset if f in ms and ms[f] == 'u']
228 return [f for f in mctx.subset if f in ms and ms[f] == 'u']
229
229
230 def hgignore(mctx, x):
230 def hgignore(mctx, x):
231 """``resolved()``
231 """``resolved()``
232 File that matches the active .hgignore pattern.
232 File that matches the active .hgignore pattern.
233 """
233 """
234 getargs(x, 0, 0, _("hgignore takes no arguments"))
234 getargs(x, 0, 0, _("hgignore takes no arguments"))
235 ignore = mctx.ctx._repo.dirstate._ignore
235 ignore = mctx.ctx._repo.dirstate._ignore
236 return [f for f in mctx.subset if ignore(f)]
236 return [f for f in mctx.subset if ignore(f)]
237
237
238 def grep(mctx, x):
238 def grep(mctx, x):
239 """``grep(regex)``
239 """``grep(regex)``
240 File contains the given regular expression.
240 File contains the given regular expression.
241 """
241 """
242 pat = getstring(x, _("grep requires a pattern"))
242 pat = getstring(x, _("grep requires a pattern"))
243 r = re.compile(pat)
243 r = re.compile(pat)
244 return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]
244 return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]
245
245
246 _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,
247 M=2**20, MB=2**20, G=2**30, GB=2**30,
248 kiB=10**3, MiB=10**6, GiB=10**9)
249
250 def _sizetoint(s):
251 try:
252 s = s.strip()
253 for k, v in _units.items():
254 if s.endswith(k):
255 return int(float(s[:-len(k)]) * v)
256 return int(s)
257 except ValueError:
258 raise
259 raise error.ParseError(_("couldn't parse size"), s)
260
261 def _sizetomax(s):
262 try:
263 s = s.strip()
264 for k, v in _units.items():
265 if s.endswith(k):
266 # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
267 n = s[:-len(k)]
268 inc = 1.0
269 if "." in n:
270 inc /= 10 ** len(n.split(".")[1])
271 return int((float(n) + inc) * v) - 1
272 # no extension, this is a precise value
273 return int(s)
274 except ValueError:
275 raise
276 raise error.ParseError(_("couldn't parse size"), s)
277
278 def size(mctx, x):
279 """``size(expression)``
280 File size matches the given expression. Examples:
281
282 - 1k (files from 1024 to 2047 bytes)
283 - 1.0kiB (files from 1000 to 1100 bytes)
284 - < 20k (files less than 20480 bytes)
285 - >= .5MiB (files at least 500000 bytes)
286 - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
287 """
288
289 expr = getstring(x, _("grep requires a pattern")).strip()
290 if '-' in expr: # do we have a range?
291 a, b = expr.split('-', 1)
292 a = _sizetoint(a)
293 b = _sizetoint(b)
294 m = lambda x: x >= a and x <= b
295 elif expr.startswith("<="):
296 a = _sizetoint(expr[2:])
297 m = lambda x: x <= a
298 elif expr.startswith("<"):
299 a = _sizetoint(expr[1:])
300 m = lambda x: x < a
301 elif expr.startswith(">="):
302 a = _sizetoint(expr[2:])
303 m = lambda x: x >= a
304 elif expr.startswith(">"):
305 a = _sizetoint(expr[1:])
306 m = lambda x: x > a
307 elif expr[0].isdigit or expr[0] == '.':
308 a = _sizetoint(expr)
309 b = _sizetomax(expr)
310 m = lambda x: x >=a and x <= b
311 else:
312 raise error.ParseError(_("couldn't parse size"), expr)
313
314 return [f for f in mctx.subset if m(mctx.ctx[f].size())]
315
246 symbols = {
316 symbols = {
247 'added': added,
317 'added': added,
248 'binary': binary,
318 'binary': binary,
249 'clean': clean,
319 'clean': clean,
250 'deleted': deleted,
320 'deleted': deleted,
251 'exec': exec_,
321 'exec': exec_,
252 'grep': grep,
322 'grep': grep,
253 'ignored': ignored,
323 'ignored': ignored,
254 'hgignore': hgignore,
324 'hgignore': hgignore,
255 'modified': modified,
325 'modified': modified,
256 'removed': removed,
326 'removed': removed,
257 'resolved': resolved,
327 'resolved': resolved,
328 'size': size,
258 'symlink': symlink,
329 'symlink': symlink,
259 'unknown': unknown,
330 'unknown': unknown,
260 'unresolved': unresolved,
331 'unresolved': unresolved,
261 }
332 }
262
333
263 methods = {
334 methods = {
264 'string': stringset,
335 'string': stringset,
265 'symbol': stringset,
336 'symbol': stringset,
266 'and': andset,
337 'and': andset,
267 'or': orset,
338 'or': orset,
268 'list': listset,
339 'list': listset,
269 'group': getset,
340 'group': getset,
270 'not': notset,
341 'not': notset,
271 'func': func,
342 'func': func,
272 }
343 }
273
344
274 class matchctx(object):
345 class matchctx(object):
275 def __init__(self, ctx, subset=None, status=None):
346 def __init__(self, ctx, subset=None, status=None):
276 self.ctx = ctx
347 self.ctx = ctx
277 self.subset = subset
348 self.subset = subset
278 self._status = status
349 self._status = status
279 def status(self):
350 def status(self):
280 return self._status
351 return self._status
281 def matcher(self, patterns):
352 def matcher(self, patterns):
282 return self.ctx.match(patterns)
353 return self.ctx.match(patterns)
283 def filter(self, files):
354 def filter(self, files):
284 return [f for f in files if f in self.subset]
355 return [f for f in files if f in self.subset]
285 def narrow(self, files):
356 def narrow(self, files):
286 return matchctx(self.ctx, self.filter(files), self._status)
357 return matchctx(self.ctx, self.filter(files), self._status)
287
358
288 def _intree(funcs, tree):
359 def _intree(funcs, tree):
289 if isinstance(tree, tuple):
360 if isinstance(tree, tuple):
290 if tree[0] == 'func' and tree[1][0] == 'symbol':
361 if tree[0] == 'func' and tree[1][0] == 'symbol':
291 if tree[1][1] in funcs:
362 if tree[1][1] in funcs:
292 return True
363 return True
293 for s in tree[1:]:
364 for s in tree[1:]:
294 if _intree(funcs, s):
365 if _intree(funcs, s):
295 return True
366 return True
296 return False
367 return False
297
368
298 def getfileset(ctx, expr):
369 def getfileset(ctx, expr):
299 tree, pos = parse(expr)
370 tree, pos = parse(expr)
300 if (pos != len(expr)):
371 if (pos != len(expr)):
301 raise error.ParseError("invalid token", pos)
372 raise error.ParseError("invalid token", pos)
302
373
303 # do we need status info?
374 # do we need status info?
304 if _intree(['modified', 'added', 'removed', 'deleted',
375 if _intree(['modified', 'added', 'removed', 'deleted',
305 'unknown', 'ignored', 'clean'], tree):
376 'unknown', 'ignored', 'clean'], tree):
306 unknown = _intree(['unknown'], tree)
377 unknown = _intree(['unknown'], tree)
307 ignored = _intree(['ignored'], tree)
378 ignored = _intree(['ignored'], tree)
308
379
309 r = ctx._repo
380 r = ctx._repo
310 status = r.status(ctx.p1(), ctx,
381 status = r.status(ctx.p1(), ctx,
311 unknown=unknown, ignored=ignored, clean=True)
382 unknown=unknown, ignored=ignored, clean=True)
312 subset = []
383 subset = []
313 for c in status:
384 for c in status:
314 subset.extend(c)
385 subset.extend(c)
315 else:
386 else:
316 status = None
387 status = None
317 subset = ctx.walk(ctx.match([]))
388 subset = ctx.walk(ctx.match([]))
318
389
319 return getset(matchctx(ctx, subset, status), tree)
390 return getset(matchctx(ctx, subset, status), tree)
320
391
321 # tell hggettext to extract docstrings from these functions:
392 # tell hggettext to extract docstrings from these functions:
322 i18nfunctions = symbols.values()
393 i18nfunctions = symbols.values()
General Comments 0
You need to be logged in to leave comments. Login now