upstream/mercurial-mirror Commit - r14683:281102f3

1

# fileset.py - file set queries for mercurial

1

# fileset.py - file set queries for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import parser, error, util, merge, re

8

import parser, error, util, merge, re

9

from i18n import _

9

from i18n import _

10

11

elements = {

11

elements = {

12

"(": (20, ("group", 1, ")"), ("func", 1, ")")),

12

"(": (20, ("group", 1, ")"), ("func", 1, ")")),

13

"-": (5, ("negate", 19), ("minus", 5)),

13

"-": (5, ("negate", 19), ("minus", 5)),

14

"not": (10, ("not", 10)),

14

"not": (10, ("not", 10)),

15

"!": (10, ("not", 10)),

15

"!": (10, ("not", 10)),

16

"and": (5, None, ("and", 5)),

16

"and": (5, None, ("and", 5)),

17

"&": (5, None, ("and", 5)),

17

"&": (5, None, ("and", 5)),

18

"or": (4, None, ("or", 4)),

18

"or": (4, None, ("or", 4)),

19

"|": (4, None, ("or", 4)),

19

"|": (4, None, ("or", 4)),

20

"+": (4, None, ("or", 4)),

20

"+": (4, None, ("or", 4)),

21

",": (2, None, ("list", 2)),

21

",": (2, None, ("list", 2)),

22

")": (0, None, None),

22

")": (0, None, None),

23

"symbol": (0, ("symbol",), None),

23

"symbol": (0, ("symbol",), None),

24

"string": (0, ("string",), None),

24

"string": (0, ("string",), None),

25

"end": (0, None, None),

25

"end": (0, None, None),

26

}

26

}

27

28

keywords = set(['and', 'or', 'not'])

28

keywords = set(['and', 'or', 'not'])

29

30

globchars = ".*{}[]?/\\"

30

globchars = ".*{}[]?/\\"

31

32

def tokenize(program):

32

def tokenize(program):

33

pos, l = 0, len(program)

33

pos, l = 0, len(program)

34

while pos < l:

34

while pos < l:

35

c = program[pos]

35

c = program[pos]

36

if c.isspace(): # skip inter-token whitespace

36

if c.isspace(): # skip inter-token whitespace

37

pass

37

pass

38

elif c in "(),-|&+!": # handle simple operators

38

elif c in "(),-|&+!": # handle simple operators

39

yield (c, None, pos)

39

yield (c, None, pos)

40

elif (c in '"\'' or c == 'r' and

40

elif (c in '"\'' or c == 'r' and

41

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

41

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

42

if c == 'r':

42

if c == 'r':

43

pos += 1

43

pos += 1

44

c = program[pos]

44

c = program[pos]

45

decode = lambda x: x

45

decode = lambda x: x

46

else:

46

else:

47

decode = lambda x: x.decode('string-escape')

47

decode = lambda x: x.decode('string-escape')

48

pos += 1

48

pos += 1

49

s = pos

49

s = pos

50

while pos < l: # find closing quote

50

while pos < l: # find closing quote

51

d = program[pos]

51

d = program[pos]

52

if d == '\\': # skip over escaped characters

52

if d == '\\': # skip over escaped characters

53

pos += 2

53

pos += 2

54

continue

54

continue

55

if d == c:

55

if d == c:

56

yield ('string', decode(program[s:pos]), s)

56

yield ('string', decode(program[s:pos]), s)

57

break

57

break

58

pos += 1

58

pos += 1

59

else:

59

else:

60

raise error.ParseError(_("unterminated string"), s)

60

raise error.ParseError(_("unterminated string"), s)

61

elif c.isalnum() or c in globchars or ord(c) > 127:

61

elif c.isalnum() or c in globchars or ord(c) > 127:

62

# gather up a symbol/keyword

62

# gather up a symbol/keyword

63

s = pos

63

s = pos

64

pos += 1

64

pos += 1

65

while pos < l: # find end of symbol

65

while pos < l: # find end of symbol

66

d = program[pos]

66

d = program[pos]

67

if not (d.isalnum() or d in globchars or ord(d) > 127):

67

if not (d.isalnum() or d in globchars or ord(d) > 127):

68

break

68

break

69

pos += 1

69

pos += 1

70

sym = program[s:pos]

70

sym = program[s:pos]

71

if sym in keywords: # operator keywords

71

if sym in keywords: # operator keywords

72

yield (sym, None, s)

72

yield (sym, None, s)

73

else:

73

else:

74

yield ('symbol', sym, s)

74

yield ('symbol', sym, s)

75

pos -= 1

75

pos -= 1

76

else:

76

else:

77

raise error.ParseError(_("syntax error"), pos)

77

raise error.ParseError(_("syntax error"), pos)

78

pos += 1

78

pos += 1

79

yield ('end', None, pos)

79

yield ('end', None, pos)

80

81

parse = parser.parser(tokenize, elements).parse

81

parse = parser.parser(tokenize, elements).parse

82

83

def getstring(x, err):

83

def getstring(x, err):

84

if x and (x[0] == 'string' or x[0] == 'symbol'):

84

if x and (x[0] == 'string' or x[0] == 'symbol'):

85

return x[1]

85

return x[1]

86

raise error.ParseError(err)

86

raise error.ParseError(err)

87

88

def getset(mctx, x):

88

def getset(mctx, x):

89

if not x:

89

if not x:

90

raise error.ParseError(_("missing argument"))

90

raise error.ParseError(_("missing argument"))

91

return methods[x[0]](mctx, *x[1:])

91

return methods[x[0]](mctx, *x[1:])

92

93

def stringset(mctx, x):

93

def stringset(mctx, x):

94

m = mctx.matcher([x])

94

m = mctx.matcher([x])

95

return [f for f in mctx.subset if m(f)]

95

return [f for f in mctx.subset if m(f)]

96

97

def andset(mctx, x, y):

97

def andset(mctx, x, y):

98

return getset(mctx.narrow(getset(mctx, x)), y)

98

return getset(mctx.narrow(getset(mctx, x)), y)

99

100

def orset(mctx, x, y):

100

def orset(mctx, x, y):

101

# needs optimizing

101

# needs optimizing

102

xl = getset(mctx, x)

102

xl = getset(mctx, x)

103

yl = getset(mctx, y)

103

yl = getset(mctx, y)

104

return xl + [f for f in yl if f not in xl]

104

return xl + [f for f in yl if f not in xl]

105

106

def notset(mctx, x):

106

def notset(mctx, x):

107

s = set(getset(mctx, x))

107

s = set(getset(mctx, x))

108

return [r for r in mctx.subset if r not in s]

108

return [r for r in mctx.subset if r not in s]

109

110

def listset(mctx, a, b):

110

def listset(mctx, a, b):

111

raise error.ParseError(_("can't use a list in this context"))

111

raise error.ParseError(_("can't use a list in this context"))

112

113

def modified(mctx, x):

113

def modified(mctx, x):

114

"""``modified()``

114

"""``modified()``

115

File that is modified according to status.

115

File that is modified according to status.

116

"""

116

"""

117

getargs(x, 0, 0, _("modified takes no arguments"))

117

getargs(x, 0, 0, _("modified takes no arguments"))

118

s = mctx.status()[0]

118

s = mctx.status()[0]

119

return [f for f in mctx.subset if f in s]

119

return [f for f in mctx.subset if f in s]

120

121

def added(mctx, x):

121

def added(mctx, x):

122

"""``added()``

122

"""``added()``

123

File that is added according to status.

123

File that is added according to status.

124

"""

124

"""

125

getargs(x, 0, 0, _("added takes no arguments"))

125

getargs(x, 0, 0, _("added takes no arguments"))

126

s = mctx.status()[1]

126

s = mctx.status()[1]

127

return [f for f in mctx.subset if f in s]

127

return [f for f in mctx.subset if f in s]

128

129

def removed(mctx, x):

129

def removed(mctx, x):

130

"""``removed()``

130

"""``removed()``

131

File that is removed according to status.

131

File that is removed according to status.

132

"""

132

"""

133

getargs(x, 0, 0, _("removed takes no arguments"))

133

getargs(x, 0, 0, _("removed takes no arguments"))

134

s = mctx.status()[2]

134

s = mctx.status()[2]

135

return [f for f in mctx.subset if f in s]

135

return [f for f in mctx.subset if f in s]

136

137

def deleted(mctx, x):

137

def deleted(mctx, x):

138

"""``deleted()``

138

"""``deleted()``

139

File that is deleted according to status.

139

File that is deleted according to status.

140

"""

140

"""

141

getargs(x, 0, 0, _("deleted takes no arguments"))

141

getargs(x, 0, 0, _("deleted takes no arguments"))

142

s = mctx.status()[3]

142

s = mctx.status()[3]

143

return [f for f in mctx.subset if f in s]

143

return [f for f in mctx.subset if f in s]

144

145

def unknown(mctx, x):

145

def unknown(mctx, x):

146

"""``unknown()``

146

"""``unknown()``

147

File that is unknown according to status. These files will only be

147

File that is unknown according to status. These files will only be

148

considered if this predicate is used.

148

considered if this predicate is used.

149

"""

149

"""

150

getargs(x, 0, 0, _("unknown takes no arguments"))

150

getargs(x, 0, 0, _("unknown takes no arguments"))

151

s = mctx.status()[4]

151

s = mctx.status()[4]

152

return [f for f in mctx.subset if f in s]

152

return [f for f in mctx.subset if f in s]

153

154

def ignored(mctx, x):

154

def ignored(mctx, x):

155

"""``ignored()``

155

"""``ignored()``

156

File that is ignored according to status. These files will only be

156

File that is ignored according to status. These files will only be

157

considered if this predicate is used.

157

considered if this predicate is used.

158

"""

158

"""

159

getargs(x, 0, 0, _("ignored takes no arguments"))

159

getargs(x, 0, 0, _("ignored takes no arguments"))

160

s = mctx.status()[5]

160

s = mctx.status()[5]

161

return [f for f in mctx.subset if f in s]

161

return [f for f in mctx.subset if f in s]

162

163

def clean(mctx, x):

163

def clean(mctx, x):

164

"""``clean()``

164

"""``clean()``

165

File that is clean according to status.

165

File that is clean according to status.

166

"""

166

"""

167

getargs(x, 0, 0, _("clean takes no arguments"))

167

getargs(x, 0, 0, _("clean takes no arguments"))

168

s = mctx.status()[6]

168

s = mctx.status()[6]

169

return [f for f in mctx.subset if f in s]

169

return [f for f in mctx.subset if f in s]

170

171

def func(mctx, a, b):

171

def func(mctx, a, b):

172

if a[0] == 'symbol' and a[1] in symbols:

172

if a[0] == 'symbol' and a[1] in symbols:

173

return symbols[a[1]](mctx, b)

173

return symbols[a[1]](mctx, b)

174

raise error.ParseError(_("not a function: %s") % a[1])

174

raise error.ParseError(_("not a function: %s") % a[1])

175

176

def getlist(x):

176

def getlist(x):

177

if not x:

177

if not x:

178

return []

178

return []

179

if x[0] == 'list':

179

if x[0] == 'list':

180

return getlist(x[1]) + [x[2]]

180

return getlist(x[1]) + [x[2]]

181

return [x]

181

return [x]

182

183

def getargs(x, min, max, err):

183

def getargs(x, min, max, err):

184

l = getlist(x)

184

l = getlist(x)

185

if len(l) < min or len(l) > max:

185

if len(l) < min or len(l) > max:

186

raise error.ParseError(err)

186

raise error.ParseError(err)

187

return l

187

return l

188

189

def binary(mctx, x):

189

def binary(mctx, x):

190

"""``binary()``

190

"""``binary()``

191

File that appears to be binary (contails NUL bytes).

191

File that appears to be binary (contails NUL bytes).

192

"""

192

"""

193

getargs(x, 0, 0, _("binary takes no arguments"))

193

getargs(x, 0, 0, _("binary takes no arguments"))

194

return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]

194

return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]

195

196

def exec_(mctx, x):

196

def exec_(mctx, x):

197

"""``exec()``

197

"""``exec()``

198

File that is marked as executable.

198

File that is marked as executable.

199

"""

199

"""

200

getargs(x, 0, 0, _("exec takes no arguments"))

200

getargs(x, 0, 0, _("exec takes no arguments"))

201

return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']

201

return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']

202

203

def symlink(mctx, x):

203

def symlink(mctx, x):

204

"""``symlink()``

204

"""``symlink()``

205

File that is marked as a symlink.

205

File that is marked as a symlink.

206

"""

206

"""

207

getargs(x, 0, 0, _("symlink takes no arguments"))

207

getargs(x, 0, 0, _("symlink takes no arguments"))

208

return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']

208

return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']

209

210

def resolved(mctx, x):

210

def resolved(mctx, x):

211

"""``resolved()``

211

"""``resolved()``

212

File that is marked resolved according to the resolve state.

212

File that is marked resolved according to the resolve state.

213

"""

213

"""

214

getargs(x, 0, 0, _("resolved takes no arguments"))

214

getargs(x, 0, 0, _("resolved takes no arguments"))

215

if mctx.ctx.rev() is not None:

215

if mctx.ctx.rev() is not None:

216

return []

216

return []

217

ms = merge.mergestate(mctx.ctx._repo)

217

ms = merge.mergestate(mctx.ctx._repo)

218

return [f for f in mctx.subset if f in ms and ms[f] == 'r']

218

return [f for f in mctx.subset if f in ms and ms[f] == 'r']

219

220

def unresolved(mctx, x):

220

def unresolved(mctx, x):

221

"""``unresolved()``

221

"""``unresolved()``

222

File that is marked unresolved according to the resolve state.

222

File that is marked unresolved according to the resolve state.

223

"""

223

"""

224

getargs(x, 0, 0, _("unresolved takes no arguments"))

224

getargs(x, 0, 0, _("unresolved takes no arguments"))

225

if mctx.ctx.rev() is not None:

225

if mctx.ctx.rev() is not None:

226

return []

226

return []

227

ms = merge.mergestate(mctx.ctx._repo)

227

ms = merge.mergestate(mctx.ctx._repo)

228

return [f for f in mctx.subset if f in ms and ms[f] == 'u']

228

return [f for f in mctx.subset if f in ms and ms[f] == 'u']

229

230

def hgignore(mctx, x):

230

def hgignore(mctx, x):

231

"""``resolved()``

231

"""``resolved()``

232

File that matches the active .hgignore pattern.

232

File that matches the active .hgignore pattern.

233

"""

233

"""

234

getargs(x, 0, 0, _("hgignore takes no arguments"))

234

getargs(x, 0, 0, _("hgignore takes no arguments"))

235

ignore = mctx.ctx._repo.dirstate._ignore

235

ignore = mctx.ctx._repo.dirstate._ignore

236

return [f for f in mctx.subset if ignore(f)]

236

return [f for f in mctx.subset if ignore(f)]

237

238

def grep(mctx, x):

238

def grep(mctx, x):

239

"""``grep(regex)``

239

"""``grep(regex)``

240

File contains the given regular expression.

240

File contains the given regular expression.

241

"""

241

"""

242

pat = getstring(x, _("grep requires a pattern"))

242

pat = getstring(x, _("grep requires a pattern"))

243

r = re.compile(pat)

243

r = re.compile(pat)

244

return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]

244

return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]

245

246

_units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,

247

M=2**20, MB=2**20, G=2**30, GB=2**30,

248

kiB=10**3, MiB=10**6, GiB=10**9)

249

250

def _sizetoint(s):

251

try:

252

s = s.strip()

253

for k, v in _units.items():

254

if s.endswith(k):

255

return int(float(s[:-len(k)]) * v)

256

return int(s)

257

except ValueError:

258

raise

259

raise error.ParseError(_("couldn't parse size"), s)

260

261

def _sizetomax(s):

262

try:

263

s = s.strip()

264

for k, v in _units.items():

265

if s.endswith(k):

266

# max(4k) = 5k - 1, max(4.5k) = 4.6k - 1

267

n = s[:-len(k)]

268

inc = 1.0

269

if "." in n:

270

inc /= 10 ** len(n.split(".")[1])

271

return int((float(n) + inc) * v) - 1

272

# no extension, this is a precise value

273

return int(s)

274

except ValueError:

275

raise

276

raise error.ParseError(_("couldn't parse size"), s)

277

278

def size(mctx, x):

279

"""``size(expression)``

280

File size matches the given expression. Examples:

281

282

- 1k (files from 1024 to 2047 bytes)

283

- 1.0kiB (files from 1000 to 1100 bytes)

284

- < 20k (files less than 20480 bytes)

285

- >= .5MiB (files at least 500000 bytes)

286

- 4k - 1MB (files from 4096 bytes to 1048576 bytes)

287

"""

288

289

expr = getstring(x, _("grep requires a pattern")).strip()

290

if '-' in expr: # do we have a range?

291

a, b = expr.split('-', 1)

292

a = _sizetoint(a)

293

b = _sizetoint(b)

294

m = lambda x: x >= a and x <= b

295

elif expr.startswith("<="):

296

a = _sizetoint(expr[2:])

297

m = lambda x: x <= a

298

elif expr.startswith("<"):

299

a = _sizetoint(expr[1:])

300

m = lambda x: x < a

301

elif expr.startswith(">="):

302

a = _sizetoint(expr[2:])

303

m = lambda x: x >= a

304

elif expr.startswith(">"):

305

a = _sizetoint(expr[1:])

306

m = lambda x: x > a

307

elif expr[0].isdigit or expr[0] == '.':

308

a = _sizetoint(expr)

309

b = _sizetomax(expr)

310

m = lambda x: x >=a and x <= b

311

else:

312

raise error.ParseError(_("couldn't parse size"), expr)

313

314

return [f for f in mctx.subset if m(mctx.ctx[f].size())]

315

246

symbols = {

316

symbols = {

247

'added': added,

317

'added': added,

248

'binary': binary,

318

'binary': binary,

249

'clean': clean,

319

'clean': clean,

250

'deleted': deleted,

320

'deleted': deleted,

251

'exec': exec_,

321

'exec': exec_,

252

'grep': grep,

322

'grep': grep,

253

'ignored': ignored,

323

'ignored': ignored,

254

'hgignore': hgignore,

324

'hgignore': hgignore,

255

'modified': modified,

325

'modified': modified,

256

'removed': removed,

326

'removed': removed,

257

'resolved': resolved,

327

'resolved': resolved,

328

'size': size,

258

'symlink': symlink,

329

'symlink': symlink,

259

'unknown': unknown,

330

'unknown': unknown,

260

'unresolved': unresolved,

331

'unresolved': unresolved,

261

}

332

}

262

333

263

methods = {

334

methods = {

264

'string': stringset,

335

'string': stringset,

265

'symbol': stringset,

336

'symbol': stringset,

266

'and': andset,

337

'and': andset,

267

'or': orset,

338

'or': orset,

268

'list': listset,

339

'list': listset,

269

'group': getset,

340

'group': getset,

270

'not': notset,

341

'not': notset,

271

'func': func,

342

'func': func,

272

}

343

}

273

344

274

class matchctx(object):

345

class matchctx(object):

275

def __init__(self, ctx, subset=None, status=None):

346

def __init__(self, ctx, subset=None, status=None):

276

self.ctx = ctx

347

self.ctx = ctx

277

self.subset = subset

348

self.subset = subset

278

self._status = status

349

self._status = status

279

def status(self):

350

def status(self):

280

return self._status

351

return self._status

281

def matcher(self, patterns):

352

def matcher(self, patterns):

282

return self.ctx.match(patterns)

353

return self.ctx.match(patterns)

283

def filter(self, files):

354

def filter(self, files):

284

return [f for f in files if f in self.subset]

355

return [f for f in files if f in self.subset]

285

def narrow(self, files):

356

def narrow(self, files):

286

return matchctx(self.ctx, self.filter(files), self._status)

357

return matchctx(self.ctx, self.filter(files), self._status)

287

358

288

def _intree(funcs, tree):

359

def _intree(funcs, tree):

289

if isinstance(tree, tuple):

360

if isinstance(tree, tuple):

290

if tree[0] == 'func' and tree[1][0] == 'symbol':

361

if tree[0] == 'func' and tree[1][0] == 'symbol':

291

if tree[1][1] in funcs:

362

if tree[1][1] in funcs:

292

return True

363

return True

293

for s in tree[1:]:

364

for s in tree[1:]:

294

if _intree(funcs, s):

365

if _intree(funcs, s):

295

return True

366

return True

296

return False

367

return False

297

368

298

def getfileset(ctx, expr):

369

def getfileset(ctx, expr):

299

tree, pos = parse(expr)

370

tree, pos = parse(expr)

300

if (pos != len(expr)):

371

if (pos != len(expr)):

301

raise error.ParseError("invalid token", pos)

372

raise error.ParseError("invalid token", pos)

302

373

303

# do we need status info?

374

# do we need status info?

304

if _intree(['modified', 'added', 'removed', 'deleted',

375

if _intree(['modified', 'added', 'removed', 'deleted',

305

'unknown', 'ignored', 'clean'], tree):

376

'unknown', 'ignored', 'clean'], tree):

306

unknown = _intree(['unknown'], tree)

377

unknown = _intree(['unknown'], tree)

307

ignored = _intree(['ignored'], tree)

378

ignored = _intree(['ignored'], tree)

308

379

309

r = ctx._repo

380

r = ctx._repo

310

status = r.status(ctx.p1(), ctx,

381

status = r.status(ctx.p1(), ctx,

311

unknown=unknown, ignored=ignored, clean=True)

382

unknown=unknown, ignored=ignored, clean=True)

312

subset = []

383

subset = []

313

for c in status:

384

for c in status:

314

subset.extend(c)

385

subset.extend(c)

315

else:

386

else:

316

status = None

387

status = None

317

subset = ctx.walk(ctx.match([]))

388

subset = ctx.walk(ctx.match([]))

318

389

319

return getset(matchctx(ctx, subset, status), tree)

390

return getset(matchctx(ctx, subset, status), tree)

320

391

321

# tell hggettext to extract docstrings from these functions:

392

# tell hggettext to extract docstrings from these functions:

322

i18nfunctions = symbols.values()

393

i18nfunctions = symbols.values()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # fileset.py - file set queries for mercurial
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import parser, error, util, merge, re
             from i18n import _
             elements = {
                 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
                 "-": (5, ("negate", 19), ("minus", 5)),
                 "not": (10, ("not", 10)),
                 "!": (10, ("not", 10)),
                 "and": (5, None, ("and", 5)),
                 "&": (5, None, ("and", 5)),
                 "or": (4, None, ("or", 4)),
                 "|": (4, None, ("or", 4)),
                 "+": (4, None, ("or", 4)),
                 ",": (2, None, ("list", 2)),
                 ")": (0, None, None),
                 "symbol": (0, ("symbol",), None),
                 "string": (0, ("string",), None),
                 "end": (0, None, None),
             }
             keywords = set(['and', 'or', 'not'])
             globchars = ".*{}[]?/\\"
             def tokenize(program):
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c in "(),-|&+!": # handle simple operators
                         yield (c, None, pos)
                     elif (c in '"\'' or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = lambda x: x.decode('string-escape')
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     elif c.isalnum() or c in globchars or ord(c) > 127:
                         # gather up a symbol/keyword
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if not (d.isalnum() or d in globchars or ord(d) > 127):
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error"), pos)
                     pos += 1
                 yield ('end', None, pos)
             parse = parser.parser(tokenize, elements).parse
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getset(mctx, x):
                 if not x:
                     raise error.ParseError(_("missing argument"))
                 return methods[x[0]](mctx, *x[1:])
             def stringset(mctx, x):
                 m = mctx.matcher([x])
                 return [f for f in mctx.subset if m(f)]
             def andset(mctx, x, y):
                 return getset(mctx.narrow(getset(mctx, x)), y)
             def orset(mctx, x, y):
                 # needs optimizing
                 xl = getset(mctx, x)
                 yl = getset(mctx, y)
                 return xl + [f for f in yl if f not in xl]
             def notset(mctx, x):
                 s = set(getset(mctx, x))
                 return [r for r in mctx.subset if r not in s]
             def listset(mctx, a, b):
                 raise error.ParseError(_("can't use a list in this context"))
             def modified(mctx, x):
                 """``modified()``
                 File that is modified according to status.
                 """
                 getargs(x, 0, 0, _("modified takes no arguments"))
                 s = mctx.status()[0]
                 return [f for f in mctx.subset if f in s]
             def added(mctx, x):
                 """``added()``
                 File that is added according to status.
                 """
                 getargs(x, 0, 0, _("added takes no arguments"))
                 s = mctx.status()[1]
                 return [f for f in mctx.subset if f in s]
             def removed(mctx, x):
                 """``removed()``
                 File that is removed according to status.
                 """
                 getargs(x, 0, 0, _("removed takes no arguments"))
                 s = mctx.status()[2]
                 return [f for f in mctx.subset if f in s]
             def deleted(mctx, x):
                 """``deleted()``
                 File that is deleted according to status.
                 """
                 getargs(x, 0, 0, _("deleted takes no arguments"))
                 s = mctx.status()[3]
                 return [f for f in mctx.subset if f in s]
             def unknown(mctx, x):
                 """``unknown()``
                 File that is unknown according to status. These files will only be
                 considered if this predicate is used.
                 """
                 getargs(x, 0, 0, _("unknown takes no arguments"))
                 s = mctx.status()[4]
                 return [f for f in mctx.subset if f in s]
             def ignored(mctx, x):
                 """``ignored()``
                 File that is ignored according to status. These files will only be
                 considered if this predicate is used.
                 """
                 getargs(x, 0, 0, _("ignored takes no arguments"))
                 s = mctx.status()[5]
                 return [f for f in mctx.subset if f in s]
             def clean(mctx, x):
                 """``clean()``
                 File that is clean according to status.
                 """
                 getargs(x, 0, 0, _("clean takes no arguments"))
                 s = mctx.status()[6]
                 return [f for f in mctx.subset if f in s]
             def func(mctx, a, b):
                 if a[0] == 'symbol' and a[1] in symbols:
                     return symbols[a[1]](mctx, b)
                 raise error.ParseError(_("not a function: %s") % a[1])
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return getlist(x[1]) + [x[2]]
                 return [x]
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or len(l) > max:
                     raise error.ParseError(err)
                 return l
             def binary(mctx, x):
                 """``binary()``
                 File that appears to be binary (contails NUL bytes).
                 """
                 getargs(x, 0, 0, _("binary takes no arguments"))
                 return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]
             def exec_(mctx, x):
                 """``exec()``
                 File that is marked as executable.
                 """
                 getargs(x, 0, 0, _("exec takes no arguments"))
                 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']
             def symlink(mctx, x):
                 """``symlink()``
                 File that is marked as a symlink.
                 """
                 getargs(x, 0, 0, _("symlink takes no arguments"))
                 return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']
             def resolved(mctx, x):
                 """``resolved()``
                 File that is marked resolved according to the resolve state.
                 """
                 getargs(x, 0, 0, _("resolved takes no arguments"))
                 if mctx.ctx.rev() is not None:
                     return []
                 ms = merge.mergestate(mctx.ctx._repo)
                 return [f for f in mctx.subset if f in ms and ms[f] == 'r']
             def unresolved(mctx, x):
                 """``unresolved()``
                 File that is marked unresolved according to the resolve state.
                 """
                 getargs(x, 0, 0, _("unresolved takes no arguments"))
                 if mctx.ctx.rev() is not None:
                     return []
                 ms = merge.mergestate(mctx.ctx._repo)
                 return [f for f in mctx.subset if f in ms and ms[f] == 'u']
             def hgignore(mctx, x):
                 """``resolved()``
                 File that matches the active .hgignore pattern.
                 """
                 getargs(x, 0, 0, _("hgignore takes no arguments"))
                 ignore = mctx.ctx._repo.dirstate._ignore
                 return [f for f in mctx.subset if ignore(f)]
             def grep(mctx, x):
                 """``grep(regex)``
                 File contains the given regular expression.
                 """
                 pat = getstring(x, _("grep requires a pattern"))
                 r = re.compile(pat)
                 return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]
+            _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,
+                          M=2**20, MB=2**20, G=2**30, GB=2**30,
+                          kiB=10**3, MiB=10**6, GiB=10**9)
+            def _sizetoint(s):
+                try:
+                    s = s.strip()
+                    for k, v in _units.items():
+                        if s.endswith(k):
+                            return int(float(s[:-len(k)]) * v)
+                    return int(s)
+                except ValueError:
+                    raise
+                    raise error.ParseError(_("couldn't parse size"), s)
+            def _sizetomax(s):
+                try:
+                    s = s.strip()
+                    for k, v in _units.items():
+                        if s.endswith(k):
+                            # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
+                            n = s[:-len(k)]
+                            inc = 1.0
+                            if "." in n:
+                                inc /= 10 ** len(n.split(".")[1])
+                            return int((float(n) + inc) * v) - 1
+                    # no extension, this is a precise value
+                    return int(s)
+                except ValueError:
+                    raise
+                    raise error.ParseError(_("couldn't parse size"), s)
+            def size(mctx, x):
+                """``size(expression)``
+                File size matches the given expression. Examples:
+                - 1k (files from 1024 to 2047 bytes)
+                - 1.0kiB (files from 1000 to 1100 bytes)
+                - < 20k (files less than 20480 bytes)
+                - >= .5MiB (files at least 500000 bytes)
+                - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
+                """
+                expr = getstring(x, _("grep requires a pattern")).strip()
+                if '-' in expr: # do we have a range?
+                    a, b = expr.split('-', 1)
+                    a = _sizetoint(a)
+                    b = _sizetoint(b)
+                    m = lambda x: x >= a and x <= b
+                elif expr.startswith("<="):
+                    a = _sizetoint(expr[2:])
+                    m = lambda x: x <= a
+                elif expr.startswith("<"):
+                    a = _sizetoint(expr[1:])
+                    m = lambda x: x < a
+                elif expr.startswith(">="):
+                    a = _sizetoint(expr[2:])
+                    m = lambda x: x >= a
+                elif expr.startswith(">"):
+                    a = _sizetoint(expr[1:])
+                    m = lambda x: x > a
+                elif expr[0].isdigit or expr[0] == '.':
+                    a = _sizetoint(expr)
+                    b = _sizetomax(expr)
+                    m = lambda x: x >=a and x <= b
+                else:
+                    raise error.ParseError(_("couldn't parse size"), expr)
+                return [f for f in mctx.subset if m(mctx.ctx[f].size())]
             symbols = {
                 'added': added,
                 'binary': binary,
                 'clean': clean,
                 'deleted': deleted,
                 'exec': exec_,
                 'grep': grep,
                 'ignored': ignored,
                 'hgignore': hgignore,
                 'modified': modified,
                 'removed': removed,
                 'resolved': resolved,
+                'size': size,
                 'symlink': symlink,
                 'unknown': unknown,
                 'unresolved': unresolved,
             }
             methods = {
                 'string': stringset,
                 'symbol': stringset,
                 'and': andset,
                 'or': orset,
                 'list': listset,
                 'group': getset,
                 'not': notset,
                 'func': func,
             }
             class matchctx(object):
                 def __init__(self, ctx, subset=None, status=None):
                     self.ctx = ctx
                     self.subset = subset
                     self._status = status
                 def status(self):
                     return self._status
                 def matcher(self, patterns):
                     return self.ctx.match(patterns)
                 def filter(self, files):
                     return [f for f in files if f in self.subset]
                 def narrow(self, files):
                     return matchctx(self.ctx, self.filter(files), self._status)
             def _intree(funcs, tree):
                 if isinstance(tree, tuple):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         if tree[1][1] in funcs:
                             return True
                     for s in tree[1:]:
                         if _intree(funcs, s):
                             return True
                 return False
             def getfileset(ctx, expr):
                 tree, pos = parse(expr)
                 if (pos != len(expr)):
                     raise error.ParseError("invalid token", pos)
                 # do we need status info?
                 if _intree(['modified', 'added', 'removed', 'deleted',
                             'unknown', 'ignored', 'clean'], tree):
                     unknown = _intree(['unknown'], tree)
                     ignored = _intree(['ignored'], tree)
                     r = ctx._repo
                     status = r.status(ctx.p1(), ctx,
                                       unknown=unknown, ignored=ignored, clean=True)
                     subset = []
                     for c in status:
                         subset.extend(c)
                 else:
                     status = None
                     subset = ctx.walk(ctx.match([]))
                 return getset(matchctx(ctx, subset, status), tree)
             # tell hggettext to extract docstrings from these functions:
             i18nfunctions = symbols.values()