upstream/mercurial-mirror Commit - r21112:03782d2f

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import re

8

import re

9

import util, pathutil

9

import util, pathutil

10

from i18n import _

10

from i18n import _

11

12

def _rematcher(regex):

12

def _rematcher(regex):

13

'''compile the regexp with the best available regexp engine and return a

13

'''compile the regexp with the best available regexp engine and return a

14

matcher function'''

14

matcher function'''

15

m = util.compilere(regex)

15

m = util.compilere(regex)

16

try:

16

try:

17

# slightly faster, provided by facebook's re2 bindings

17

# slightly faster, provided by facebook's re2 bindings

18

return m.test_match

18

return m.test_match

19

except AttributeError:

19

except AttributeError:

20

return m.match

20

return m.match

21

22

def _expandsets(kindpats, ctx):

22

def _expandsets(kindpats, ctx):

23

'''Returns the kindpats list with the 'set' patterns expanded.'''

23

'''Returns the kindpats list with the 'set' patterns expanded.'''

24

fset = set()

24

fset = set()

25

other = []

25

other = []

26

27

for kind, pat in kindpats:

27

for kind, pat in kindpats:

28

if kind == 'set':

28

if kind == 'set':

29

if not ctx:

29

if not ctx:

30

raise util.Abort("fileset expression with no context")

30

raise util.Abort("fileset expression with no context")

31

s = ctx.getfileset(pat)

31

s = ctx.getfileset(pat)

32

fset.update(s)

32

fset.update(s)

33

continue

33

continue

34

other.append((kind, pat))

34

other.append((kind, pat))

35

return fset, other

35

return fset, other

36

37

class match(object):

37

class match(object):

38

def __init__(self, root, cwd, patterns, include=[], exclude=[],

38

def __init__(self, root, cwd, patterns, include=[], exclude=[],

39

default='glob', exact=False, auditor=None, ctx=None):

39

default='glob', exact=False, auditor=None, ctx=None):

40

"""build an object to match a set of file patterns

40

"""build an object to match a set of file patterns

41

42

arguments:

42

arguments:

43

root - the canonical root of the tree you're matching against

43

root - the canonical root of the tree you're matching against

44

cwd - the current working directory, if relevant

44

cwd - the current working directory, if relevant

45

patterns - patterns to find

45

patterns - patterns to find

46

include - patterns to include (unless they are excluded)

46

include - patterns to include (unless they are excluded)

47

exclude - patterns to exclude (even if they are included)

47

exclude - patterns to exclude (even if they are included)

48

default - if a pattern in patterns has no explicit type, assume this one

48

default - if a pattern in patterns has no explicit type, assume this one

49

exact - patterns are actually filenames (include/exclude still apply)

49

exact - patterns are actually filenames (include/exclude still apply)

50

51

a pattern is one of:

51

a pattern is one of:

52

'glob:<glob>' - a glob relative to cwd

52

'glob:<glob>' - a glob relative to cwd

53

're:<regexp>' - a regular expression

53

're:<regexp>' - a regular expression

54

'path:<path>' - a path relative to repository root

54

'path:<path>' - a path relative to repository root

55

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

55

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

56

'relpath:<path>' - a path relative to cwd

56

'relpath:<path>' - a path relative to cwd

57

'relre:<regexp>' - a regexp that needn't match the start of a name

57

'relre:<regexp>' - a regexp that needn't match the start of a name

58

'set:<fileset>' - a fileset expression

58

'set:<fileset>' - a fileset expression

59

'<something>' - a pattern of the specified default type

59

'<something>' - a pattern of the specified default type

60

"""

60

"""

61

62

self._root = root

62

self._root = root

63

self._cwd = cwd

63

self._cwd = cwd

64

self._files = [] # exact files and roots of patterns

64

self._files = [] # exact files and roots of patterns

65

self._anypats = bool(include or exclude)

65

self._anypats = bool(include or exclude)

66

self._ctx = ctx

66

self._ctx = ctx

67

self._always = False

67

self._always = False

68

69

if include:

69

if include:

70

kindpats = _normalize(include, 'glob', root, cwd, auditor)

70

kindpats = _normalize(include, 'glob', root, cwd, auditor)

71

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

71

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

72

if exclude:

72

if exclude:

73

kindpats = _normalize(exclude, 'glob', root, cwd, auditor)

73

kindpats = _normalize(exclude, 'glob', root, cwd, auditor)

74

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

74

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

75

if exact:

75

if exact:

76

if isinstance(patterns, list):

76

if isinstance(patterns, list):

77

self._files = patterns

77

self._files = patterns

78

else:

78

else:

79

self._files = list(patterns)

79

self._files = list(patterns)

80

pm = self.exact

80

pm = self.exact

81

elif patterns:

81

elif patterns:

82

kindpats = _normalize(patterns, default, root, cwd, auditor)

82

kindpats = _normalize(patterns, default, root, cwd, auditor)

83

self._files = _roots(kindpats)

83

self._files = _roots(kindpats)

84

self._anypats = self._anypats or _anypats(kindpats)

84

self._anypats = self._anypats or _anypats(kindpats)

85

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

85

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

86

87

if patterns or exact:

87

if patterns or exact:

88

if include:

88

if include:

89

if exclude:

89

if exclude:

90

m = lambda f: im(f) and not em(f) and pm(f)

90

m = lambda f: im(f) and not em(f) and pm(f)

91

else:

91

else:

92

m = lambda f: im(f) and pm(f)

92

m = lambda f: im(f) and pm(f)

93

else:

93

else:

94

if exclude:

94

if exclude:

95

m = lambda f: not em(f) and pm(f)

95

m = lambda f: not em(f) and pm(f)

96

else:

96

else:

97

m = pm

97

m = pm

98

else:

98

else:

99

if include:

99

if include:

100

if exclude:

100

if exclude:

101

m = lambda f: im(f) and not em(f)

101

m = lambda f: im(f) and not em(f)

102

else:

102

else:

103

m = im

103

m = im

104

else:

104

else:

105

if exclude:

105

if exclude:

106

m = lambda f: not em(f)

106

m = lambda f: not em(f)

107

else:

107

else:

108

m = lambda f: True

108

m = lambda f: True

109

self._always = True

109

self._always = True

110

111

self.matchfn = m

111

self.matchfn = m

112

self._fmap = set(self._files)

112

self._fmap = set(self._files)

113

114

def __call__(self, fn):

114

def __call__(self, fn):

115

return self.matchfn(fn)

115

return self.matchfn(fn)

116

def __iter__(self):

116

def __iter__(self):

117

for f in self._files:

117

for f in self._files:

118

yield f

118

yield f

119

120

# Callbacks related to how the matcher is used by dirstate.walk.

120

# Callbacks related to how the matcher is used by dirstate.walk.

121

# Subscribers to these events must monkeypatch the matcher object.

121

# Subscribers to these events must monkeypatch the matcher object.

122

def bad(self, f, msg):

122

def bad(self, f, msg):

123

'''Callback from dirstate.walk for each explicit file that can't be

123

'''Callback from dirstate.walk for each explicit file that can't be

124

found/accessed, with an error message.'''

124

found/accessed, with an error message.'''

125

pass

125

pass

126

127

# If an explicitdir is set, it will be called when an explicitly listed

127

# If an explicitdir is set, it will be called when an explicitly listed

128

# directory is visited.

128

# directory is visited.

129

explicitdir = None

129

explicitdir = None

130

131

# If an traversedir is set, it will be called when a directory discovered

131

# If an traversedir is set, it will be called when a directory discovered

132

# by recursive traversal is visited.

132

# by recursive traversal is visited.

133

traversedir = None

133

traversedir = None

134

135

def missing(self, f):

135

def missing(self, f):

136

pass

136

pass

137

138

def rel(self, f):

138

def rel(self, f):

139

'''Convert repo path back to path that is relative to cwd of matcher.'''

139

'''Convert repo path back to path that is relative to cwd of matcher.'''

140

return util.pathto(self._root, self._cwd, f)

140

return util.pathto(self._root, self._cwd, f)

141

142

def files(self):

142

def files(self):

143

'''Explicitly listed files or patterns or roots:

143

'''Explicitly listed files or patterns or roots:

144

if no patterns or .always(): empty list,

144

if no patterns or .always(): empty list,

145

if exact: list exact files,

145

if exact: list exact files,

146

if not .anypats(): list all files and dirs,

146

if not .anypats(): list all files and dirs,

147

else: optimal roots'''

147

else: optimal roots'''

148

return self._files

148

return self._files

149

150

def exact(self, f):

150

def exact(self, f):

151

'''Returns True if f is in .files().'''

151

'''Returns True if f is in .files().'''

152

return f in self._fmap

152

return f in self._fmap

153

154

def anypats(self):

154

def anypats(self):

155

'''Matcher uses patterns or include/exclude.'''

155

'''Matcher uses patterns or include/exclude.'''

156

return self._anypats

156

return self._anypats

157

158

def always(self):

158

def always(self):

159

'''Matcher will match everything and .files() will be empty

159

'''Matcher will match everything and .files() will be empty

160

- optimization might be possible and necessary.'''

160

- optimization might be possible and necessary.'''

161

return self._always

161

return self._always

162

163

class exact(match):

163

class exact(match):

164

def __init__(self, root, cwd, files):

164

def __init__(self, root, cwd, files):

165

match.__init__(self, root, cwd, files, exact=True)

165

match.__init__(self, root, cwd, files, exact=True)

166

167

class always(match):

167

class always(match):

168

def __init__(self, root, cwd):

168

def __init__(self, root, cwd):

169

match.__init__(self, root, cwd, [])

169

match.__init__(self, root, cwd, [])

170

self._always = True

170

self._always = True

171

172

class narrowmatcher(match):

172

class narrowmatcher(match):

173

"""Adapt a matcher to work on a subdirectory only.

173

"""Adapt a matcher to work on a subdirectory only.

174

175

The paths are remapped to remove/insert the path as needed:

175

The paths are remapped to remove/insert the path as needed:

176

177

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

177

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

178

>>> m2 = narrowmatcher('sub', m1)

178

>>> m2 = narrowmatcher('sub', m1)

179

>>> bool(m2('a.txt'))

179

>>> bool(m2('a.txt'))

180

False

180

False

181

>>> bool(m2('b.txt'))

181

>>> bool(m2('b.txt'))

182

True

182

True

183

>>> bool(m2.matchfn('a.txt'))

183

>>> bool(m2.matchfn('a.txt'))

184

False

184

False

185

>>> bool(m2.matchfn('b.txt'))

185

>>> bool(m2.matchfn('b.txt'))

186

True

186

True

187

>>> m2.files()

187

>>> m2.files()

188

['b.txt']

188

['b.txt']

189

>>> m2.exact('b.txt')

189

>>> m2.exact('b.txt')

190

True

190

True

191

>>> m2.rel('b.txt')

191

>>> m2.rel('b.txt')

192

'b.txt'

192

'b.txt'

193

>>> def bad(f, msg):

193

>>> def bad(f, msg):

194

... print "%s: %s" % (f, msg)

194

... print "%s: %s" % (f, msg)

195

>>> m1.bad = bad

195

>>> m1.bad = bad

196

>>> m2.bad('x.txt', 'No such file')

196

>>> m2.bad('x.txt', 'No such file')

197

sub/x.txt: No such file

197

sub/x.txt: No such file

198

"""

198

"""

199

200

def __init__(self, path, matcher):

200

def __init__(self, path, matcher):

201

self._root = matcher._root

201

self._root = matcher._root

202

self._cwd = matcher._cwd

202

self._cwd = matcher._cwd

203

self._path = path

203

self._path = path

204

self._matcher = matcher

204

self._matcher = matcher

205

self._always = matcher._always

205

self._always = matcher._always

206

207

self._files = [f[len(path) + 1:] for f in matcher._files

207

self._files = [f[len(path) + 1:] for f in matcher._files

208

if f.startswith(path + "/")]

208

if f.startswith(path + "/")]

209

self._anypats = matcher._anypats

209

self._anypats = matcher._anypats

210

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

210

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

211

self._fmap = set(self._files)

211

self._fmap = set(self._files)

212

213

def bad(self, f, msg):

213

def bad(self, f, msg):

214

self._matcher.bad(self._path + "/" + f, msg)

214

self._matcher.bad(self._path + "/" + f, msg)

215

216

def patkind(pattern, default=None):

216

def patkind(pattern, default=None):

217

'''If pattern is 'kind:pat' with a known kind, return kind.'''

217

'''If pattern is 'kind:pat' with a known kind, return kind.'''

218

return _patsplit(pattern, default)[0]

218

return _patsplit(pattern, default)[0]

219

220

def _patsplit(pattern, default):

220

def _patsplit(pattern, default):

221

"""Split a string into the optional pattern kind prefix and the actual

221

"""Split a string into the optional pattern kind prefix and the actual

222

pattern."""

222

pattern."""

223

if ':' in pattern:

223

if ':' in pattern:

224

kind, pat = pattern.split(':', 1)

224

kind, pat = pattern.split(':', 1)

225

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

225

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

226

'listfile', 'listfile0', 'set'):

226

'listfile', 'listfile0', 'set'):

227

return kind, pat

227

return kind, pat

228

return default, pattern

228

return default, pattern

229

230

def _globre(pat):

230

def _globre(pat):

231

'''Convert an extended glob string to a regexp string.~~'''~~

231

r'''Convert an extended glob string to a regexp string.

232

233

>>> print _globre(r'?')

234

.

235

>>> print _globre(r'*')

236

[^/]*

237

>>> print _globre(r'**')

238

.*

239

>>> print _globre(r'[a*?!^][^b][!c]')

240

[a*?!^][\^b][^c]

241

>>> print _globre(r'{a,b}')

242

(?:a|b)

243

>>> print _globre(r'.\*\?')

244

\.\*\?

245

'''

232

i, n = 0, len(pat)

246

i, n = 0, len(pat)

233

res = ''

247

res = ''

234

group = 0

248

group = 0

235

escape = re.escape

249

escape = re.escape

236

def peek():

250

def peek():

237

return i < n and pat[i]

251

return i < n and pat[i]

238

while i < n:

252

while i < n:

239

c = pat[i]

253

c = pat[i]

240

i += 1

254

i += 1

241

if c not in '*?[{},\\':

255

if c not in '*?[{},\\':

242

res += escape(c)

256

res += escape(c)

243

elif c == '*':

257

elif c == '*':

244

if peek() == '*':

258

if peek() == '*':

245

i += 1

259

i += 1

246

res += '.*'

260

res += '.*'

247

else:

261

else:

248

res += '[^/]*'

262

res += '[^/]*'

249

elif c == '?':

263

elif c == '?':

250

res += '.'

264

res += '.'

251

elif c == '[':

265

elif c == '[':

252

j = i

266

j = i

253

if j < n and pat[j] in '!]':

267

if j < n and pat[j] in '!]':

254

j += 1

268

j += 1

255

while j < n and pat[j] != ']':

269

while j < n and pat[j] != ']':

256

j += 1

270

j += 1

257

if j >= n:

271

if j >= n:

258

res += '\\['

272

res += '\\['

259

else:

273

else:

260

stuff = pat[i:j].replace('\\','\\\\')

274

stuff = pat[i:j].replace('\\','\\\\')

261

i = j + 1

275

i = j + 1

262

if stuff[0] == '!':

276

if stuff[0] == '!':

263

stuff = '^' + stuff[1:]

277

stuff = '^' + stuff[1:]

264

elif stuff[0] == '^':

278

elif stuff[0] == '^':

265

stuff = '\\' + stuff

279

stuff = '\\' + stuff

266

res = '%s[%s]' % (res, stuff)

280

res = '%s[%s]' % (res, stuff)

267

elif c == '{':

281

elif c == '{':

268

group += 1

282

group += 1

269

res += '(?:'

283

res += '(?:'

270

elif c == '}' and group:

284

elif c == '}' and group:

271

res += ')'

285

res += ')'

272

group -= 1

286

group -= 1

273

elif c == ',' and group:

287

elif c == ',' and group:

274

res += '|'

288

res += '|'

275

elif c == '\\':

289

elif c == '\\':

276

p = peek()

290

p = peek()

277

if p:

291

if p:

278

i += 1

292

i += 1

279

res += escape(p)

293

res += escape(p)

280

else:

294

else:

281

res += escape(c)

295

res += escape(c)

282

else:

296

else:

283

res += escape(c)

297

res += escape(c)

284

return res

298

return res

285

299

286

def _regex(kind, pat, globsuffix):

300

def _regex(kind, pat, globsuffix):

287

'''Convert a (normalized) pattern of any kind into a regular expression.

301

'''Convert a (normalized) pattern of any kind into a regular expression.

288

globsuffix is appended to the regexp of globs.'''

302

globsuffix is appended to the regexp of globs.'''

289

if not pat:

303

if not pat:

290

return ''

304

return ''

291

if kind == 're':

305

if kind == 're':

292

return pat

306

return pat

293

if kind == 'path':

307

if kind == 'path':

294

return '^' + re.escape(pat) + '(?:/|$)'

308

return '^' + re.escape(pat) + '(?:/|$)'

295

if kind == 'relglob':

309

if kind == 'relglob':

296

return '(?:|.*/)' + _globre(pat) + globsuffix

310

return '(?:|.*/)' + _globre(pat) + globsuffix

297

if kind == 'relpath':

311

if kind == 'relpath':

298

return re.escape(pat) + '(?:/|$)'

312

return re.escape(pat) + '(?:/|$)'

299

if kind == 'relre':

313

if kind == 'relre':

300

if pat.startswith('^'):

314

if pat.startswith('^'):

301

return pat

315

return pat

302

return '.*' + pat

316

return '.*' + pat

303

return _globre(pat) + globsuffix

317

return _globre(pat) + globsuffix

304

318

305

def _buildmatch(ctx, kindpats, globsuffix):

319

def _buildmatch(ctx, kindpats, globsuffix):

306

'''Return regexp string and a matcher function for kindpats.

320

'''Return regexp string and a matcher function for kindpats.

307

globsuffix is appended to the regexp of globs.'''

321

globsuffix is appended to the regexp of globs.'''

308

fset, kindpats = _expandsets(kindpats, ctx)

322

fset, kindpats = _expandsets(kindpats, ctx)

309

if not kindpats:

323

if not kindpats:

310

return "", fset.__contains__

324

return "", fset.__contains__

311

325

312

regex, mf = _buildregexmatch(kindpats, globsuffix)

326

regex, mf = _buildregexmatch(kindpats, globsuffix)

313

if fset:

327

if fset:

314

return regex, lambda f: f in fset or mf(f)

328

return regex, lambda f: f in fset or mf(f)

315

return regex, mf

329

return regex, mf

316

330

317

def _buildregexmatch(kindpats, globsuffix):

331

def _buildregexmatch(kindpats, globsuffix):

318

"""Build a match function from a list of kinds and kindpats,

332

"""Build a match function from a list of kinds and kindpats,

319

return regexp string and a matcher function."""

333

return regexp string and a matcher function."""

320

try:

334

try:

321

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

335

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

322

for (k, p) in kindpats])

336

for (k, p) in kindpats])

323

if len(regex) > 20000:

337

if len(regex) > 20000:

324

raise OverflowError

338

raise OverflowError

325

return regex, _rematcher(regex)

339

return regex, _rematcher(regex)

326

except OverflowError:

340

except OverflowError:

327

# We're using a Python with a tiny regex engine and we

341

# We're using a Python with a tiny regex engine and we

328

# made it explode, so we'll divide the pattern list in two

342

# made it explode, so we'll divide the pattern list in two

329

# until it works

343

# until it works

330

l = len(kindpats)

344

l = len(kindpats)

331

if l < 2:

345

if l < 2:

332

raise

346

raise

333

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

347

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

334

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

348

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

335

return pat, lambda s: a(s) or b(s)

349

return pat, lambda s: a(s) or b(s)

336

except re.error:

350

except re.error:

337

for k, p in kindpats:

351

for k, p in kindpats:

338

try:

352

try:

339

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

353

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

340

except re.error:

354

except re.error:

341

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

355

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

342

raise util.Abort(_("invalid pattern"))

356

raise util.Abort(_("invalid pattern"))

343

357

344

def _normalize(patterns, default, root, cwd, auditor):

358

def _normalize(patterns, default, root, cwd, auditor):

345

'''Convert 'kind:pat' from the patterns list to tuples with kind and

359

'''Convert 'kind:pat' from the patterns list to tuples with kind and

346

normalized and rooted patterns and with listfiles expanded.'''

360

normalized and rooted patterns and with listfiles expanded.'''

347

kindpats = []

361

kindpats = []

348

for kind, pat in [_patsplit(p, default) for p in patterns]:

362

for kind, pat in [_patsplit(p, default) for p in patterns]:

349

if kind in ('glob', 'relpath'):

363

if kind in ('glob', 'relpath'):

350

pat = pathutil.canonpath(root, cwd, pat, auditor)

364

pat = pathutil.canonpath(root, cwd, pat, auditor)

351

elif kind in ('relglob', 'path'):

365

elif kind in ('relglob', 'path'):

352

pat = util.normpath(pat)

366

pat = util.normpath(pat)

353

elif kind in ('listfile', 'listfile0'):

367

elif kind in ('listfile', 'listfile0'):

354

try:

368

try:

355

files = util.readfile(pat)

369

files = util.readfile(pat)

356

if kind == 'listfile0':

370

if kind == 'listfile0':

357

files = files.split('\0')

371

files = files.split('\0')

358

else:

372

else:

359

files = files.splitlines()

373

files = files.splitlines()

360

files = [f for f in files if f]

374

files = [f for f in files if f]

361

except EnvironmentError:

375

except EnvironmentError:

362

raise util.Abort(_("unable to read file list (%s)") % pat)

376

raise util.Abort(_("unable to read file list (%s)") % pat)

363

kindpats += _normalize(files, default, root, cwd, auditor)

377

kindpats += _normalize(files, default, root, cwd, auditor)

364

continue

378

continue

365

# else: re or relre - which cannot be normalized

379

# else: re or relre - which cannot be normalized

366

kindpats.append((kind, pat))

380

kindpats.append((kind, pat))

367

return kindpats

381

return kindpats

368

382

369

def _roots(kindpats):

383

def _roots(kindpats):

370

'''return roots and exact explicitly listed files from patterns

384

'''return roots and exact explicitly listed files from patterns

371

385

372

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

386

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

373

['g', 'g', '.']

387

['g', 'g', '.']

374

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

388

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

375

['r', 'p/p', '.']

389

['r', 'p/p', '.']

376

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

390

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

377

['.', '.', '.']

391

['.', '.', '.']

378

'''

392

'''

379

r = []

393

r = []

380

for kind, pat in kindpats:

394

for kind, pat in kindpats:

381

if kind == 'glob': # find the non-glob prefix

395

if kind == 'glob': # find the non-glob prefix

382

root = []

396

root = []

383

for p in pat.split('/'):

397

for p in pat.split('/'):

384

if '[' in p or '{' in p or '*' in p or '?' in p:

398

if '[' in p or '{' in p or '*' in p or '?' in p:

385

break

399

break

386

root.append(p)

400

root.append(p)

387

r.append('/'.join(root) or '.')

401

r.append('/'.join(root) or '.')

388

elif kind in ('relpath', 'path'):

402

elif kind in ('relpath', 'path'):

389

r.append(pat or '.')

403

r.append(pat or '.')

390

else: # relglob, re, relre

404

else: # relglob, re, relre

391

r.append('.')

405

r.append('.')

392

return r

406

return r

393

407

394

def _anypats(kindpats):

408

def _anypats(kindpats):

395

for kind, pat in kindpats:

409

for kind, pat in kindpats:

396

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

410

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

397

return True

411

return True

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import re
             import util, pathutil
             from i18n import _
             def _rematcher(regex):
                 '''compile the regexp with the best available regexp engine and return a
                 matcher function'''
                 m = util.compilere(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(kindpats, ctx):
                 '''Returns the kindpats list with the 'set' patterns expanded.'''
                 fset = set()
                 other = []
                 for kind, pat in kindpats:
                     if kind == 'set':
                         if not ctx:
                             raise util.Abort("fileset expression with no context")
                         s = ctx.getfileset(pat)
                         fset.update(s)
                         continue
                     other.append((kind, pat))
                 return fset, other
             class match(object):
                 def __init__(self, root, cwd, patterns, include=[], exclude=[],
                              default='glob', exact=False, auditor=None, ctx=None):
                     """build an object to match a set of file patterns
                     arguments:
                     root - the canonical root of the tree you're matching against
                     cwd - the current working directory, if relevant
                     patterns - patterns to find
                     include - patterns to include (unless they are excluded)
                     exclude - patterns to exclude (even if they are included)
                     default - if a pattern in patterns has no explicit type, assume this one
                     exact - patterns are actually filenames (include/exclude still apply)
                     a pattern is one of:
                     'glob:<glob>' - a glob relative to cwd
                     're:<regexp>' - a regular expression
                     'path:<path>' - a path relative to repository root
                     'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                     'relpath:<path>' - a path relative to cwd
                     'relre:<regexp>' - a regexp that needn't match the start of a name
                     'set:<fileset>' - a fileset expression
                     '<something>' - a pattern of the specified default type
                     """
                     self._root = root
                     self._cwd = cwd
                     self._files = [] # exact files and roots of patterns
                     self._anypats = bool(include or exclude)
                     self._ctx = ctx
                     self._always = False
                     if include:
                         kindpats = _normalize(include, 'glob', root, cwd, auditor)
                         self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
                     if exclude:
                         kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
                         self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
                     if exact:
                         if isinstance(patterns, list):
                             self._files = patterns
                         else:
                             self._files = list(patterns)
                         pm = self.exact
                     elif patterns:
                         kindpats = _normalize(patterns, default, root, cwd, auditor)
                         self._files = _roots(kindpats)
                         self._anypats = self._anypats or _anypats(kindpats)
                         self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
                     if patterns or exact:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f) and pm(f)
                             else:
                                 m = lambda f: im(f) and pm(f)
                         else:
                             if exclude:
                                 m = lambda f: not em(f) and pm(f)
                             else:
                                 m = pm
                     else:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f)
                             else:
                                 m = im
                         else:
                             if exclude:
                                 m = lambda f: not em(f)
                             else:
                                 m = lambda f: True
                                 self._always = True
                     self.matchfn = m
                     self._fmap = set(self._files)
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 def __iter__(self):
                     for f in self._files:
                         yield f
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     '''Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message.'''
                     pass
                 # If an explicitdir is set, it will be called when an explicitly listed
                 # directory is visited.
                 explicitdir = None
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 def missing(self, f):
                     pass
                 def rel(self, f):
                     '''Convert repo path back to path that is relative to cwd of matcher.'''
                     return util.pathto(self._root, self._cwd, f)
                 def files(self):
                     '''Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots'''
                     return self._files
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fmap
                 def anypats(self):
                     '''Matcher uses patterns or include/exclude.'''
                     return self._anypats
                 def always(self):
                     '''Matcher will match everything and .files() will be empty
                     - optimization might be possible and necessary.'''
                     return self._always
             class exact(match):
                 def __init__(self, root, cwd, files):
                     match.__init__(self, root, cwd, files, exact=True)
             class always(match):
                 def __init__(self, root, cwd):
                     match.__init__(self, root, cwd, [])
                     self._always = True
             class narrowmatcher(match):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                 >>> m2 = narrowmatcher('sub', m1)
                 >>> bool(m2('a.txt'))
                 False
                 >>> bool(m2('b.txt'))
                 True
                 >>> bool(m2.matchfn('a.txt'))
                 False
                 >>> bool(m2.matchfn('b.txt'))
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact('b.txt')
                 True
                 >>> m2.rel('b.txt')
                 'b.txt'
                 >>> def bad(f, msg):
                 ...     print "%s: %s" % (f, msg)
                 >>> m1.bad = bad
                 >>> m2.bad('x.txt', 'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     self._root = matcher._root
                     self._cwd = matcher._cwd
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher._always
                     self._files = [f[len(path) + 1:] for f in matcher._files
                                    if f.startswith(path + "/")]
                     self._anypats = matcher._anypats
                     self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                     self._fmap = set(self._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + "/" + f, msg)
             def patkind(pattern, default=None):
                 '''If pattern is 'kind:pat' with a known kind, return kind.'''
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if ':' in pattern:
                     kind, pat = pattern.split(':', 1)
                     if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                                 'listfile', 'listfile0', 'set'):
                         return kind, pat
                 return default, pattern
             def _globre(pat):
-                '''Convert an extended glob string to a regexp string.'''
+                r'''Convert an extended glob string to a regexp string.
+                >>> print _globre(r'?')
+                .
+                >>> print _globre(r'*')
+                [^/]*
+                >>> print _globre(r'**')
+                .*
+                >>> print _globre(r'[a*?!^][^b][!c]')
+                [a*?!^][\^b][^c]
+                >>> print _globre(r'{a,b}')
+                (?:a|b)
+                >>> print _globre(r'.\*\?')
+                \.\*\?
+                '''
                 i, n = 0, len(pat)
                 res = ''
                 group = 0
                 escape = re.escape
                 def peek():
                     return i < n and pat[i]
                 while i < n:
                     c = pat[i]
                     i += 1
                     if c not in '*?[{},\\':
                         res += escape(c)
                     elif c == '*':
                         if peek() == '*':
                             i += 1
                             res += '.*'
                         else:
                             res += '[^/]*'
                     elif c == '?':
                         res += '.'
                     elif c == '[':
                         j = i
                         if j < n and pat[j] in '!]':
                             j += 1
                         while j < n and pat[j] != ']':
                             j += 1
                         if j >= n:
                             res += '\\['
                         else:
                             stuff = pat[i:j].replace('\\','\\\\')
                             i = j + 1
                             if stuff[0] == '!':
                                 stuff = '^' + stuff[1:]
                             elif stuff[0] == '^':
                                 stuff = '\\' + stuff
                             res = '%s[%s]' % (res, stuff)
                     elif c == '{':
                         group += 1
                         res += '(?:'
                     elif c == '}' and group:
                         res += ')'
                         group -= 1
                     elif c == ',' and group:
                         res += '|'
                     elif c == '\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p)
                         else:
                             res += escape(c)
                     else:
                         res += escape(c)
                 return res
             def _regex(kind, pat, globsuffix):
                 '''Convert a (normalized) pattern of any kind into a regular expression.
                 globsuffix is appended to the regexp of globs.'''
                 if not pat:
                     return ''
                 if kind == 're':
                     return pat
                 if kind == 'path':
                     return '^' + re.escape(pat) + '(?:/|$)'
                 if kind == 'relglob':
                     return '(?:|.*/)' + _globre(pat) + globsuffix
                 if kind == 'relpath':
                     return re.escape(pat) + '(?:/|$)'
                 if kind == 'relre':
                     if pat.startswith('^'):
                         return pat
                     return '.*' + pat
                 return _globre(pat) + globsuffix
             def _buildmatch(ctx, kindpats, globsuffix):
                 '''Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs.'''
                 fset, kindpats = _expandsets(kindpats, ctx)
                 if not kindpats:
                     return "", fset.__contains__
                 regex, mf = _buildregexmatch(kindpats, globsuffix)
                 if fset:
                     return regex, lambda f: f in fset or mf(f)
                 return regex, mf
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function."""
                 try:
                     regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
                                                  for (k, p) in kindpats])
                     if len(regex) > 20000:
                         raise OverflowError
                     return regex, _rematcher(regex)
                 except OverflowError:
                     # We're using a Python with a tiny regex engine and we
                     # made it explode, so we'll divide the pattern list in two
                     # until it works
                     l = len(kindpats)
                     if l < 2:
                         raise
                     regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
                     regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
                     return pat, lambda s: a(s) or b(s)
                 except re.error:
                     for k, p in kindpats:
                         try:
                             _rematcher('(?:%s)' % _regex(k, p, globsuffix))
                         except re.error:
                             raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                     raise util.Abort(_("invalid pattern"))
             def _normalize(patterns, default, root, cwd, auditor):
                 '''Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded.'''
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in ('glob', 'relpath'):
                         pat = pathutil.canonpath(root, cwd, pat, auditor)
                     elif kind in ('relglob', 'path'):
                         pat = util.normpath(pat)
                     elif kind in ('listfile', 'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == 'listfile0':
                                 files = files.split('\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise util.Abort(_("unable to read file list (%s)") % pat)
                         kindpats += _normalize(files, default, root, cwd, auditor)
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat))
                 return kindpats
             def _roots(kindpats):
                 '''return roots and exact explicitly listed files from patterns
                 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
                 ['g', 'g', '.']
                 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
                 ['r', 'p/p', '.']
                 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
                 ['.', '.', '.']
                 '''
                 r = []
                 for kind, pat in kindpats:
                     if kind == 'glob': # find the non-glob prefix
                         root = []
                         for p in pat.split('/'):
                             if '[' in p or '{' in p or '*' in p or '?' in p:
                                 break
                             root.append(p)
                         r.append('/'.join(root) or '.')
                     elif kind in ('relpath', 'path'):
                         r.append(pat or '.')
                     else: # relglob, re, relre
                         r.append('.')
                 return r
             def _anypats(kindpats):
                 for kind, pat in kindpats:
                     if kind in ('glob', 're', 'relglob', 'relre', 'set'):
                         return True