##// END OF EJS Templates
fileset: exclude deleted files from matchctx.existing()...
Patrick Mezard -
r17365:8a0513bf stable
parent child Browse files
Show More
@@ -1,471 +1,488 b''
1 1 # fileset.py - file set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import parser, error, util, merge, re
9 9 from i18n import _
10 10
11 11 elements = {
12 12 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
13 13 "-": (5, ("negate", 19), ("minus", 5)),
14 14 "not": (10, ("not", 10)),
15 15 "!": (10, ("not", 10)),
16 16 "and": (5, None, ("and", 5)),
17 17 "&": (5, None, ("and", 5)),
18 18 "or": (4, None, ("or", 4)),
19 19 "|": (4, None, ("or", 4)),
20 20 "+": (4, None, ("or", 4)),
21 21 ",": (2, None, ("list", 2)),
22 22 ")": (0, None, None),
23 23 "symbol": (0, ("symbol",), None),
24 24 "string": (0, ("string",), None),
25 25 "end": (0, None, None),
26 26 }
27 27
28 28 keywords = set(['and', 'or', 'not'])
29 29
30 30 globchars = ".*{}[]?/\\"
31 31
32 32 def tokenize(program):
33 33 pos, l = 0, len(program)
34 34 while pos < l:
35 35 c = program[pos]
36 36 if c.isspace(): # skip inter-token whitespace
37 37 pass
38 38 elif c in "(),-|&+!": # handle simple operators
39 39 yield (c, None, pos)
40 40 elif (c in '"\'' or c == 'r' and
41 41 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
42 42 if c == 'r':
43 43 pos += 1
44 44 c = program[pos]
45 45 decode = lambda x: x
46 46 else:
47 47 decode = lambda x: x.decode('string-escape')
48 48 pos += 1
49 49 s = pos
50 50 while pos < l: # find closing quote
51 51 d = program[pos]
52 52 if d == '\\': # skip over escaped characters
53 53 pos += 2
54 54 continue
55 55 if d == c:
56 56 yield ('string', decode(program[s:pos]), s)
57 57 break
58 58 pos += 1
59 59 else:
60 60 raise error.ParseError(_("unterminated string"), s)
61 61 elif c.isalnum() or c in globchars or ord(c) > 127:
62 62 # gather up a symbol/keyword
63 63 s = pos
64 64 pos += 1
65 65 while pos < l: # find end of symbol
66 66 d = program[pos]
67 67 if not (d.isalnum() or d in globchars or ord(d) > 127):
68 68 break
69 69 pos += 1
70 70 sym = program[s:pos]
71 71 if sym in keywords: # operator keywords
72 72 yield (sym, None, s)
73 73 else:
74 74 yield ('symbol', sym, s)
75 75 pos -= 1
76 76 else:
77 77 raise error.ParseError(_("syntax error"), pos)
78 78 pos += 1
79 79 yield ('end', None, pos)
80 80
81 81 parse = parser.parser(tokenize, elements).parse
82 82
83 83 def getstring(x, err):
84 84 if x and (x[0] == 'string' or x[0] == 'symbol'):
85 85 return x[1]
86 86 raise error.ParseError(err)
87 87
88 88 def getset(mctx, x):
89 89 if not x:
90 90 raise error.ParseError(_("missing argument"))
91 91 return methods[x[0]](mctx, *x[1:])
92 92
93 93 def stringset(mctx, x):
94 94 m = mctx.matcher([x])
95 95 return [f for f in mctx.subset if m(f)]
96 96
97 97 def andset(mctx, x, y):
98 98 return getset(mctx.narrow(getset(mctx, x)), y)
99 99
100 100 def orset(mctx, x, y):
101 101 # needs optimizing
102 102 xl = getset(mctx, x)
103 103 yl = getset(mctx, y)
104 104 return xl + [f for f in yl if f not in xl]
105 105
106 106 def notset(mctx, x):
107 107 s = set(getset(mctx, x))
108 108 return [r for r in mctx.subset if r not in s]
109 109
110 110 def minusset(mctx, x, y):
111 111 xl = getset(mctx, x)
112 112 yl = set(getset(mctx, y))
113 113 return [f for f in xl if f not in yl]
114 114
115 115 def listset(mctx, a, b):
116 116 raise error.ParseError(_("can't use a list in this context"))
117 117
118 118 def modified(mctx, x):
119 119 """``modified()``
120 120 File that is modified according to status.
121 121 """
122 122 # i18n: "modified" is a keyword
123 123 getargs(x, 0, 0, _("modified takes no arguments"))
124 124 s = mctx.status()[0]
125 125 return [f for f in mctx.subset if f in s]
126 126
127 127 def added(mctx, x):
128 128 """``added()``
129 129 File that is added according to status.
130 130 """
131 131 # i18n: "added" is a keyword
132 132 getargs(x, 0, 0, _("added takes no arguments"))
133 133 s = mctx.status()[1]
134 134 return [f for f in mctx.subset if f in s]
135 135
136 136 def removed(mctx, x):
137 137 """``removed()``
138 138 File that is removed according to status.
139 139 """
140 140 # i18n: "removed" is a keyword
141 141 getargs(x, 0, 0, _("removed takes no arguments"))
142 142 s = mctx.status()[2]
143 143 return [f for f in mctx.subset if f in s]
144 144
145 145 def deleted(mctx, x):
146 146 """``deleted()``
147 147 File that is deleted according to status.
148 148 """
149 149 # i18n: "deleted" is a keyword
150 150 getargs(x, 0, 0, _("deleted takes no arguments"))
151 151 s = mctx.status()[3]
152 152 return [f for f in mctx.subset if f in s]
153 153
154 154 def unknown(mctx, x):
155 155 """``unknown()``
156 156 File that is unknown according to status. These files will only be
157 157 considered if this predicate is used.
158 158 """
159 159 # i18n: "unknown" is a keyword
160 160 getargs(x, 0, 0, _("unknown takes no arguments"))
161 161 s = mctx.status()[4]
162 162 return [f for f in mctx.subset if f in s]
163 163
164 164 def ignored(mctx, x):
165 165 """``ignored()``
166 166 File that is ignored according to status. These files will only be
167 167 considered if this predicate is used.
168 168 """
169 169 # i18n: "ignored" is a keyword
170 170 getargs(x, 0, 0, _("ignored takes no arguments"))
171 171 s = mctx.status()[5]
172 172 return [f for f in mctx.subset if f in s]
173 173
174 174 def clean(mctx, x):
175 175 """``clean()``
176 176 File that is clean according to status.
177 177 """
178 178 # i18n: "clean" is a keyword
179 179 getargs(x, 0, 0, _("clean takes no arguments"))
180 180 s = mctx.status()[6]
181 181 return [f for f in mctx.subset if f in s]
182 182
183 183 def func(mctx, a, b):
184 184 if a[0] == 'symbol' and a[1] in symbols:
185 185 return symbols[a[1]](mctx, b)
186 186 raise error.ParseError(_("not a function: %s") % a[1])
187 187
188 188 def getlist(x):
189 189 if not x:
190 190 return []
191 191 if x[0] == 'list':
192 192 return getlist(x[1]) + [x[2]]
193 193 return [x]
194 194
195 195 def getargs(x, min, max, err):
196 196 l = getlist(x)
197 197 if len(l) < min or len(l) > max:
198 198 raise error.ParseError(err)
199 199 return l
200 200
201 201 def binary(mctx, x):
202 202 """``binary()``
203 203 File that appears to be binary (contains NUL bytes).
204 204 """
205 205 # i18n: "binary" is a keyword
206 206 getargs(x, 0, 0, _("binary takes no arguments"))
207 207 return [f for f in mctx.existing() if util.binary(mctx.ctx[f].data())]
208 208
209 209 def exec_(mctx, x):
210 210 """``exec()``
211 211 File that is marked as executable.
212 212 """
213 213 # i18n: "exec" is a keyword
214 214 getargs(x, 0, 0, _("exec takes no arguments"))
215 215 return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x']
216 216
217 217 def symlink(mctx, x):
218 218 """``symlink()``
219 219 File that is marked as a symlink.
220 220 """
221 221 # i18n: "symlink" is a keyword
222 222 getargs(x, 0, 0, _("symlink takes no arguments"))
223 223 return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l']
224 224
225 225 def resolved(mctx, x):
226 226 """``resolved()``
227 227 File that is marked resolved according to the resolve state.
228 228 """
229 229 # i18n: "resolved" is a keyword
230 230 getargs(x, 0, 0, _("resolved takes no arguments"))
231 231 if mctx.ctx.rev() is not None:
232 232 return []
233 233 ms = merge.mergestate(mctx.ctx._repo)
234 234 return [f for f in mctx.subset if f in ms and ms[f] == 'r']
235 235
236 236 def unresolved(mctx, x):
237 237 """``unresolved()``
238 238 File that is marked unresolved according to the resolve state.
239 239 """
240 240 # i18n: "unresolved" is a keyword
241 241 getargs(x, 0, 0, _("unresolved takes no arguments"))
242 242 if mctx.ctx.rev() is not None:
243 243 return []
244 244 ms = merge.mergestate(mctx.ctx._repo)
245 245 return [f for f in mctx.subset if f in ms and ms[f] == 'u']
246 246
247 247 def hgignore(mctx, x):
248 248 """``hgignore()``
249 249 File that matches the active .hgignore pattern.
250 250 """
251 251 getargs(x, 0, 0, _("hgignore takes no arguments"))
252 252 ignore = mctx.ctx._repo.dirstate._ignore
253 253 return [f for f in mctx.subset if ignore(f)]
254 254
255 255 def grep(mctx, x):
256 256 """``grep(regex)``
257 257 File contains the given regular expression.
258 258 """
259 259 pat = getstring(x, _("grep requires a pattern"))
260 260 r = re.compile(pat)
261 261 return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())]
262 262
263 263 _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,
264 264 M=2**20, MB=2**20, G=2**30, GB=2**30)
265 265
266 266 def _sizetoint(s):
267 267 try:
268 268 s = s.strip()
269 269 for k, v in _units.items():
270 270 if s.endswith(k):
271 271 return int(float(s[:-len(k)]) * v)
272 272 return int(s)
273 273 except ValueError:
274 274 raise error.ParseError(_("couldn't parse size: %s") % s)
275 275
276 276 def _sizetomax(s):
277 277 try:
278 278 s = s.strip()
279 279 for k, v in _units.items():
280 280 if s.endswith(k):
281 281 # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
282 282 n = s[:-len(k)]
283 283 inc = 1.0
284 284 if "." in n:
285 285 inc /= 10 ** len(n.split(".")[1])
286 286 return int((float(n) + inc) * v) - 1
287 287 # no extension, this is a precise value
288 288 return int(s)
289 289 except ValueError:
290 290 raise error.ParseError(_("couldn't parse size: %s") % s)
291 291
292 292 def size(mctx, x):
293 293 """``size(expression)``
294 294 File size matches the given expression. Examples:
295 295
296 296 - 1k (files from 1024 to 2047 bytes)
297 297 - < 20k (files less than 20480 bytes)
298 298 - >= .5MB (files at least 524288 bytes)
299 299 - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
300 300 """
301 301
302 302 # i18n: "size" is a keyword
303 303 expr = getstring(x, _("size requires an expression")).strip()
304 304 if '-' in expr: # do we have a range?
305 305 a, b = expr.split('-', 1)
306 306 a = _sizetoint(a)
307 307 b = _sizetoint(b)
308 308 m = lambda x: x >= a and x <= b
309 309 elif expr.startswith("<="):
310 310 a = _sizetoint(expr[2:])
311 311 m = lambda x: x <= a
312 312 elif expr.startswith("<"):
313 313 a = _sizetoint(expr[1:])
314 314 m = lambda x: x < a
315 315 elif expr.startswith(">="):
316 316 a = _sizetoint(expr[2:])
317 317 m = lambda x: x >= a
318 318 elif expr.startswith(">"):
319 319 a = _sizetoint(expr[1:])
320 320 m = lambda x: x > a
321 321 elif expr[0].isdigit or expr[0] == '.':
322 322 a = _sizetoint(expr)
323 323 b = _sizetomax(expr)
324 324 m = lambda x: x >= a and x <= b
325 325 else:
326 326 raise error.ParseError(_("couldn't parse size: %s") % expr)
327 327
328 328 return [f for f in mctx.existing() if m(mctx.ctx[f].size())]
329 329
330 330 def encoding(mctx, x):
331 331 """``encoding(name)``
332 332 File can be successfully decoded with the given character
333 333 encoding. May not be useful for encodings other than ASCII and
334 334 UTF-8.
335 335 """
336 336
337 337 # i18n: "encoding" is a keyword
338 338 enc = getstring(x, _("encoding requires an encoding name"))
339 339
340 340 s = []
341 341 for f in mctx.existing():
342 342 d = mctx.ctx[f].data()
343 343 try:
344 344 d.decode(enc)
345 345 except LookupError:
346 346 raise util.Abort(_("unknown encoding '%s'") % enc)
347 347 except UnicodeDecodeError:
348 348 continue
349 349 s.append(f)
350 350
351 351 return s
352 352
353 353 def copied(mctx, x):
354 354 """``copied()``
355 355 File that is recorded as being copied.
356 356 """
357 357 # i18n: "copied" is a keyword
358 358 getargs(x, 0, 0, _("copied takes no arguments"))
359 359 s = []
360 360 for f in mctx.subset:
361 361 p = mctx.ctx[f].parents()
362 362 if p and p[0].path() != f:
363 363 s.append(f)
364 364 return s
365 365
366 366 def subrepo(mctx, x):
367 367 """``subrepo([pattern])``
368 368 Subrepositories whose paths match the given pattern.
369 369 """
370 370 # i18n: "subrepo" is a keyword
371 371 getargs(x, 0, 1, _("subrepo takes at most one argument"))
372 372 ctx = mctx.ctx
373 373 sstate = ctx.substate
374 374 if x:
375 375 pat = getstring(x, _("subrepo requires a pattern or no arguments"))
376 376
377 377 import match as matchmod # avoid circular import issues
378 378 fast = not matchmod.patkind(pat)
379 379 if fast:
380 380 def m(s):
381 381 return (s == pat)
382 382 else:
383 383 m = matchmod.match(ctx._repo.root, '', [pat], ctx=ctx)
384 384 return [sub for sub in sstate if m(sub)]
385 385 else:
386 386 return [sub for sub in sstate]
387 387
388 388 symbols = {
389 389 'added': added,
390 390 'binary': binary,
391 391 'clean': clean,
392 392 'copied': copied,
393 393 'deleted': deleted,
394 394 'encoding': encoding,
395 395 'exec': exec_,
396 396 'grep': grep,
397 397 'ignored': ignored,
398 398 'hgignore': hgignore,
399 399 'modified': modified,
400 400 'removed': removed,
401 401 'resolved': resolved,
402 402 'size': size,
403 403 'symlink': symlink,
404 404 'unknown': unknown,
405 405 'unresolved': unresolved,
406 406 'subrepo': subrepo,
407 407 }
408 408
409 409 methods = {
410 410 'string': stringset,
411 411 'symbol': stringset,
412 412 'and': andset,
413 413 'or': orset,
414 414 'minus': minusset,
415 415 'list': listset,
416 416 'group': getset,
417 417 'not': notset,
418 418 'func': func,
419 419 }
420 420
421 421 class matchctx(object):
422 422 def __init__(self, ctx, subset=None, status=None):
423 423 self.ctx = ctx
424 424 self.subset = subset
425 425 self._status = status
426 426 def status(self):
427 427 return self._status
428 428 def matcher(self, patterns):
429 429 return self.ctx.match(patterns)
430 430 def filter(self, files):
431 431 return [f for f in files if f in self.subset]
432 432 def existing(self):
433 return (f for f in self.subset if f in self.ctx)
433 if self._status is not None:
434 removed = set(self._status[3])
435 else:
436 removed = set()
437 return (f for f in self.subset
438 if f in self.ctx and f not in removed)
434 439 def narrow(self, files):
435 440 return matchctx(self.ctx, self.filter(files), self._status)
436 441
437 442 def _intree(funcs, tree):
438 443 if isinstance(tree, tuple):
439 444 if tree[0] == 'func' and tree[1][0] == 'symbol':
440 445 if tree[1][1] in funcs:
441 446 return True
442 447 for s in tree[1:]:
443 448 if _intree(funcs, s):
444 449 return True
445 450 return False
446 451
452 # filesets using matchctx.existing()
453 _existingcallers = [
454 'binary',
455 'exec',
456 'grep',
457 'size',
458 'symlink',
459 ]
460
447 461 def getfileset(ctx, expr):
448 462 tree, pos = parse(expr)
449 463 if (pos != len(expr)):
450 464 raise error.ParseError(_("invalid token"), pos)
451 465
452 466 # do we need status info?
453 if _intree(['modified', 'added', 'removed', 'deleted',
454 'unknown', 'ignored', 'clean'], tree):
467 if (_intree(['modified', 'added', 'removed', 'deleted',
468 'unknown', 'ignored', 'clean'], tree) or
469 # Using matchctx.existing() on a workingctx requires us to check
470 # for deleted files.
471 (ctx.rev() is None and _intree(_existingcallers, tree))):
455 472 unknown = _intree(['unknown'], tree)
456 473 ignored = _intree(['ignored'], tree)
457 474
458 475 r = ctx._repo
459 476 status = r.status(ctx.p1(), ctx,
460 477 unknown=unknown, ignored=ignored, clean=True)
461 478 subset = []
462 479 for c in status:
463 480 subset.extend(c)
464 481 else:
465 482 status = None
466 483 subset = ctx.walk(ctx.match([]))
467 484
468 485 return getset(matchctx(ctx, subset, status), tree)
469 486
470 487 # tell hggettext to extract docstrings from these functions:
471 488 i18nfunctions = symbols.values()
@@ -1,78 +1,83 b''
1 1 $ fileset() {
2 2 > hg debugfileset "$@"
3 3 > }
4 4
5 5 $ hg init repo
6 6 $ cd repo
7 7 $ echo a > a1
8 8 $ echo a > a2
9 9 $ echo b > b1
10 10 $ echo b > b2
11 11 $ hg ci -Am addfiles
12 12 adding a1
13 13 adding a2
14 14 adding b1
15 15 adding b2
16 16
17 17 Test operators and basic patterns
18 18
19 19 $ fileset a1
20 20 a1
21 21 $ fileset 'a*'
22 22 a1
23 23 a2
24 24 $ fileset '"re:a\d"'
25 25 a1
26 26 a2
27 27 $ fileset 'a1 or a2'
28 28 a1
29 29 a2
30 30 $ fileset 'a1 | a2'
31 31 a1
32 32 a2
33 33 $ fileset 'a* and "*1"'
34 34 a1
35 35 $ fileset 'a* & "*1"'
36 36 a1
37 37 $ fileset 'not (r"a*")'
38 38 b1
39 39 b2
40 40 $ fileset '! ("a*")'
41 41 b1
42 42 b2
43 43 $ fileset 'a* - a1'
44 44 a2
45 45
46 46 Test files status
47 47
48 48 $ rm a1
49 49 $ hg rm a2
50 50 $ echo b >> b2
51 51 $ hg cp b1 c1
52 52 $ echo c > c2
53 53 $ echo c > c3
54 54 $ cat > .hgignore <<EOF
55 55 > \.hgignore
56 56 > 2$
57 57 > EOF
58 58 $ fileset 'modified()'
59 59 b2
60 60 $ fileset 'added()'
61 61 c1
62 62 $ fileset 'removed()'
63 63 a2
64 64 $ fileset 'deleted()'
65 65 a1
66 66 $ fileset 'unknown()'
67 67 c3
68 68 $ fileset 'ignored()'
69 69 .hgignore
70 70 c2
71 71 $ fileset 'hgignore()'
72 72 a2
73 73 b2
74 74 $ fileset 'clean()'
75 75 b1
76 76 $ fileset 'copied()'
77 77 c1
78 78
79 Test files properties
80
81 >>> file('bin', 'wb').write('\0a')
82 $ fileset 'binary()'
83
General Comments 0
You need to be logged in to leave comments. Login now