##// END OF EJS Templates
fileset: add size() predicate
Matt Mackall -
r14683:281102f3 default
parent child Browse files
Show More
@@ -1,322 +1,393
1 1 # fileset.py - file set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import parser, error, util, merge, re
9 9 from i18n import _
10 10
11 11 elements = {
12 12 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
13 13 "-": (5, ("negate", 19), ("minus", 5)),
14 14 "not": (10, ("not", 10)),
15 15 "!": (10, ("not", 10)),
16 16 "and": (5, None, ("and", 5)),
17 17 "&": (5, None, ("and", 5)),
18 18 "or": (4, None, ("or", 4)),
19 19 "|": (4, None, ("or", 4)),
20 20 "+": (4, None, ("or", 4)),
21 21 ",": (2, None, ("list", 2)),
22 22 ")": (0, None, None),
23 23 "symbol": (0, ("symbol",), None),
24 24 "string": (0, ("string",), None),
25 25 "end": (0, None, None),
26 26 }
27 27
28 28 keywords = set(['and', 'or', 'not'])
29 29
30 30 globchars = ".*{}[]?/\\"
31 31
32 32 def tokenize(program):
33 33 pos, l = 0, len(program)
34 34 while pos < l:
35 35 c = program[pos]
36 36 if c.isspace(): # skip inter-token whitespace
37 37 pass
38 38 elif c in "(),-|&+!": # handle simple operators
39 39 yield (c, None, pos)
40 40 elif (c in '"\'' or c == 'r' and
41 41 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
42 42 if c == 'r':
43 43 pos += 1
44 44 c = program[pos]
45 45 decode = lambda x: x
46 46 else:
47 47 decode = lambda x: x.decode('string-escape')
48 48 pos += 1
49 49 s = pos
50 50 while pos < l: # find closing quote
51 51 d = program[pos]
52 52 if d == '\\': # skip over escaped characters
53 53 pos += 2
54 54 continue
55 55 if d == c:
56 56 yield ('string', decode(program[s:pos]), s)
57 57 break
58 58 pos += 1
59 59 else:
60 60 raise error.ParseError(_("unterminated string"), s)
61 61 elif c.isalnum() or c in globchars or ord(c) > 127:
62 62 # gather up a symbol/keyword
63 63 s = pos
64 64 pos += 1
65 65 while pos < l: # find end of symbol
66 66 d = program[pos]
67 67 if not (d.isalnum() or d in globchars or ord(d) > 127):
68 68 break
69 69 pos += 1
70 70 sym = program[s:pos]
71 71 if sym in keywords: # operator keywords
72 72 yield (sym, None, s)
73 73 else:
74 74 yield ('symbol', sym, s)
75 75 pos -= 1
76 76 else:
77 77 raise error.ParseError(_("syntax error"), pos)
78 78 pos += 1
79 79 yield ('end', None, pos)
80 80
81 81 parse = parser.parser(tokenize, elements).parse
82 82
83 83 def getstring(x, err):
84 84 if x and (x[0] == 'string' or x[0] == 'symbol'):
85 85 return x[1]
86 86 raise error.ParseError(err)
87 87
88 88 def getset(mctx, x):
89 89 if not x:
90 90 raise error.ParseError(_("missing argument"))
91 91 return methods[x[0]](mctx, *x[1:])
92 92
93 93 def stringset(mctx, x):
94 94 m = mctx.matcher([x])
95 95 return [f for f in mctx.subset if m(f)]
96 96
97 97 def andset(mctx, x, y):
98 98 return getset(mctx.narrow(getset(mctx, x)), y)
99 99
100 100 def orset(mctx, x, y):
101 101 # needs optimizing
102 102 xl = getset(mctx, x)
103 103 yl = getset(mctx, y)
104 104 return xl + [f for f in yl if f not in xl]
105 105
106 106 def notset(mctx, x):
107 107 s = set(getset(mctx, x))
108 108 return [r for r in mctx.subset if r not in s]
109 109
110 110 def listset(mctx, a, b):
111 111 raise error.ParseError(_("can't use a list in this context"))
112 112
113 113 def modified(mctx, x):
114 114 """``modified()``
115 115 File that is modified according to status.
116 116 """
117 117 getargs(x, 0, 0, _("modified takes no arguments"))
118 118 s = mctx.status()[0]
119 119 return [f for f in mctx.subset if f in s]
120 120
121 121 def added(mctx, x):
122 122 """``added()``
123 123 File that is added according to status.
124 124 """
125 125 getargs(x, 0, 0, _("added takes no arguments"))
126 126 s = mctx.status()[1]
127 127 return [f for f in mctx.subset if f in s]
128 128
129 129 def removed(mctx, x):
130 130 """``removed()``
131 131 File that is removed according to status.
132 132 """
133 133 getargs(x, 0, 0, _("removed takes no arguments"))
134 134 s = mctx.status()[2]
135 135 return [f for f in mctx.subset if f in s]
136 136
137 137 def deleted(mctx, x):
138 138 """``deleted()``
139 139 File that is deleted according to status.
140 140 """
141 141 getargs(x, 0, 0, _("deleted takes no arguments"))
142 142 s = mctx.status()[3]
143 143 return [f for f in mctx.subset if f in s]
144 144
145 145 def unknown(mctx, x):
146 146 """``unknown()``
147 147 File that is unknown according to status. These files will only be
148 148 considered if this predicate is used.
149 149 """
150 150 getargs(x, 0, 0, _("unknown takes no arguments"))
151 151 s = mctx.status()[4]
152 152 return [f for f in mctx.subset if f in s]
153 153
154 154 def ignored(mctx, x):
155 155 """``ignored()``
156 156 File that is ignored according to status. These files will only be
157 157 considered if this predicate is used.
158 158 """
159 159 getargs(x, 0, 0, _("ignored takes no arguments"))
160 160 s = mctx.status()[5]
161 161 return [f for f in mctx.subset if f in s]
162 162
163 163 def clean(mctx, x):
164 164 """``clean()``
165 165 File that is clean according to status.
166 166 """
167 167 getargs(x, 0, 0, _("clean takes no arguments"))
168 168 s = mctx.status()[6]
169 169 return [f for f in mctx.subset if f in s]
170 170
171 171 def func(mctx, a, b):
172 172 if a[0] == 'symbol' and a[1] in symbols:
173 173 return symbols[a[1]](mctx, b)
174 174 raise error.ParseError(_("not a function: %s") % a[1])
175 175
176 176 def getlist(x):
177 177 if not x:
178 178 return []
179 179 if x[0] == 'list':
180 180 return getlist(x[1]) + [x[2]]
181 181 return [x]
182 182
183 183 def getargs(x, min, max, err):
184 184 l = getlist(x)
185 185 if len(l) < min or len(l) > max:
186 186 raise error.ParseError(err)
187 187 return l
188 188
189 189 def binary(mctx, x):
190 190 """``binary()``
191 191 File that appears to be binary (contails NUL bytes).
192 192 """
193 193 getargs(x, 0, 0, _("binary takes no arguments"))
194 194 return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]
195 195
196 196 def exec_(mctx, x):
197 197 """``exec()``
198 198 File that is marked as executable.
199 199 """
200 200 getargs(x, 0, 0, _("exec takes no arguments"))
201 201 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']
202 202
203 203 def symlink(mctx, x):
204 204 """``symlink()``
205 205 File that is marked as a symlink.
206 206 """
207 207 getargs(x, 0, 0, _("symlink takes no arguments"))
208 208 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']
209 209
210 210 def resolved(mctx, x):
211 211 """``resolved()``
212 212 File that is marked resolved according to the resolve state.
213 213 """
214 214 getargs(x, 0, 0, _("resolved takes no arguments"))
215 215 if mctx.ctx.rev() is not None:
216 216 return []
217 217 ms = merge.mergestate(mctx.ctx._repo)
218 218 return [f for f in mctx.subset if f in ms and ms[f] == 'r']
219 219
220 220 def unresolved(mctx, x):
221 221 """``unresolved()``
222 222 File that is marked unresolved according to the resolve state.
223 223 """
224 224 getargs(x, 0, 0, _("unresolved takes no arguments"))
225 225 if mctx.ctx.rev() is not None:
226 226 return []
227 227 ms = merge.mergestate(mctx.ctx._repo)
228 228 return [f for f in mctx.subset if f in ms and ms[f] == 'u']
229 229
230 230 def hgignore(mctx, x):
231 231 """``resolved()``
232 232 File that matches the active .hgignore pattern.
233 233 """
234 234 getargs(x, 0, 0, _("hgignore takes no arguments"))
235 235 ignore = mctx.ctx._repo.dirstate._ignore
236 236 return [f for f in mctx.subset if ignore(f)]
237 237
238 238 def grep(mctx, x):
239 239 """``grep(regex)``
240 240 File contains the given regular expression.
241 241 """
242 242 pat = getstring(x, _("grep requires a pattern"))
243 243 r = re.compile(pat)
244 244 return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]
245 245
246 _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,
247 M=2**20, MB=2**20, G=2**30, GB=2**30,
248 kiB=10**3, MiB=10**6, GiB=10**9)
249
250 def _sizetoint(s):
251 try:
252 s = s.strip()
253 for k, v in _units.items():
254 if s.endswith(k):
255 return int(float(s[:-len(k)]) * v)
256 return int(s)
257 except ValueError:
258 raise
259 raise error.ParseError(_("couldn't parse size"), s)
260
261 def _sizetomax(s):
262 try:
263 s = s.strip()
264 for k, v in _units.items():
265 if s.endswith(k):
266 # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
267 n = s[:-len(k)]
268 inc = 1.0
269 if "." in n:
270 inc /= 10 ** len(n.split(".")[1])
271 return int((float(n) + inc) * v) - 1
272 # no extension, this is a precise value
273 return int(s)
274 except ValueError:
275 raise
276 raise error.ParseError(_("couldn't parse size"), s)
277
278 def size(mctx, x):
279 """``size(expression)``
280 File size matches the given expression. Examples:
281
282 - 1k (files from 1024 to 2047 bytes)
283 - 1.0kiB (files from 1000 to 1100 bytes)
284 - < 20k (files less than 20480 bytes)
285 - >= .5MiB (files at least 500000 bytes)
286 - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
287 """
288
289 expr = getstring(x, _("grep requires a pattern")).strip()
290 if '-' in expr: # do we have a range?
291 a, b = expr.split('-', 1)
292 a = _sizetoint(a)
293 b = _sizetoint(b)
294 m = lambda x: x >= a and x <= b
295 elif expr.startswith("<="):
296 a = _sizetoint(expr[2:])
297 m = lambda x: x <= a
298 elif expr.startswith("<"):
299 a = _sizetoint(expr[1:])
300 m = lambda x: x < a
301 elif expr.startswith(">="):
302 a = _sizetoint(expr[2:])
303 m = lambda x: x >= a
304 elif expr.startswith(">"):
305 a = _sizetoint(expr[1:])
306 m = lambda x: x > a
307 elif expr[0].isdigit or expr[0] == '.':
308 a = _sizetoint(expr)
309 b = _sizetomax(expr)
310 m = lambda x: x >=a and x <= b
311 else:
312 raise error.ParseError(_("couldn't parse size"), expr)
313
314 return [f for f in mctx.subset if m(mctx.ctx[f].size())]
315
246 316 symbols = {
247 317 'added': added,
248 318 'binary': binary,
249 319 'clean': clean,
250 320 'deleted': deleted,
251 321 'exec': exec_,
252 322 'grep': grep,
253 323 'ignored': ignored,
254 324 'hgignore': hgignore,
255 325 'modified': modified,
256 326 'removed': removed,
257 327 'resolved': resolved,
328 'size': size,
258 329 'symlink': symlink,
259 330 'unknown': unknown,
260 331 'unresolved': unresolved,
261 332 }
262 333
263 334 methods = {
264 335 'string': stringset,
265 336 'symbol': stringset,
266 337 'and': andset,
267 338 'or': orset,
268 339 'list': listset,
269 340 'group': getset,
270 341 'not': notset,
271 342 'func': func,
272 343 }
273 344
274 345 class matchctx(object):
275 346 def __init__(self, ctx, subset=None, status=None):
276 347 self.ctx = ctx
277 348 self.subset = subset
278 349 self._status = status
279 350 def status(self):
280 351 return self._status
281 352 def matcher(self, patterns):
282 353 return self.ctx.match(patterns)
283 354 def filter(self, files):
284 355 return [f for f in files if f in self.subset]
285 356 def narrow(self, files):
286 357 return matchctx(self.ctx, self.filter(files), self._status)
287 358
288 359 def _intree(funcs, tree):
289 360 if isinstance(tree, tuple):
290 361 if tree[0] == 'func' and tree[1][0] == 'symbol':
291 362 if tree[1][1] in funcs:
292 363 return True
293 364 for s in tree[1:]:
294 365 if _intree(funcs, s):
295 366 return True
296 367 return False
297 368
298 369 def getfileset(ctx, expr):
299 370 tree, pos = parse(expr)
300 371 if (pos != len(expr)):
301 372 raise error.ParseError("invalid token", pos)
302 373
303 374 # do we need status info?
304 375 if _intree(['modified', 'added', 'removed', 'deleted',
305 376 'unknown', 'ignored', 'clean'], tree):
306 377 unknown = _intree(['unknown'], tree)
307 378 ignored = _intree(['ignored'], tree)
308 379
309 380 r = ctx._repo
310 381 status = r.status(ctx.p1(), ctx,
311 382 unknown=unknown, ignored=ignored, clean=True)
312 383 subset = []
313 384 for c in status:
314 385 subset.extend(c)
315 386 else:
316 387 status = None
317 388 subset = ctx.walk(ctx.match([]))
318 389
319 390 return getset(matchctx(ctx, subset, status), tree)
320 391
321 392 # tell hggettext to extract docstrings from these functions:
322 393 i18nfunctions = symbols.values()
General Comments 0
You need to be logged in to leave comments. Login now