upstream/mercurial-mirror Commit - r24789:0b1577c8

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import re

8

import re

9

import util, pathutil

9

import util, pathutil

10

from i18n import _

10

from i18n import _

11

12

propertycache = util.propertycache

12

propertycache = util.propertycache

13

14

def _rematcher(regex):

14

def _rematcher(regex):

15

'''compile the regexp with the best available regexp engine and return a

15

'''compile the regexp with the best available regexp engine and return a

16

matcher function'''

16

matcher function'''

17

m = util.re.compile(regex)

17

m = util.re.compile(regex)

18

try:

18

try:

19

# slightly faster, provided by facebook's re2 bindings

19

# slightly faster, provided by facebook's re2 bindings

20

return m.test_match

20

return m.test_match

21

except AttributeError:

21

except AttributeError:

22

return m.match

22

return m.match

23

24

def _expandsets(kindpats, ctx):

24

def _expandsets(kindpats, ctx):

25

'''Returns the kindpats list with the 'set' patterns expanded.'''

25

'''Returns the kindpats list with the 'set' patterns expanded.'''

26

fset = set()

26

fset = set()

27

other = []

27

other = []

28

29

for kind, pat in kindpats:

29

for kind, pat in kindpats:

30

if kind == 'set':

30

if kind == 'set':

31

if not ctx:

31

if not ctx:

32

raise util.Abort("fileset expression with no context")

32

raise util.Abort("fileset expression with no context")

33

s = ctx.getfileset(pat)

33

s = ctx.getfileset(pat)

34

fset.update(s)

34

fset.update(s)

35

continue

35

continue

36

other.append((kind, pat))

36

other.append((kind, pat))

37

return fset, other

37

return fset, other

38

39

def _kindpatsalwaysmatch(kindpats):

39

def _kindpatsalwaysmatch(kindpats):

40

""""Checks whether the kindspats match everything, as e.g.

40

""""Checks whether the kindspats match everything, as e.g.

41

'relpath:.' does.

41

'relpath:.' does.

42

"""

42

"""

43

for kind, pat in kindpats:

43

for kind, pat in kindpats:

44

if pat != '' or kind not in ['relpath', 'glob']:

44

if pat != '' or kind not in ['relpath', 'glob']:

45

return False

45

return False

46

return True

46

return True

47

48

class match(object):

48

class match(object):

49

def __init__(self, root, cwd, patterns, include=[], exclude=[],

49

def __init__(self, root, cwd, patterns, include=[], exclude=[],

50

default='glob', exact=False, auditor=None, ctx=None):

50

default='glob', exact=False, auditor=None, ctx=None):

51

"""build an object to match a set of file patterns

51

"""build an object to match a set of file patterns

52

53

arguments:

53

arguments:

54

root - the canonical root of the tree you're matching against

54

root - the canonical root of the tree you're matching against

55

cwd - the current working directory, if relevant

55

cwd - the current working directory, if relevant

56

patterns - patterns to find

56

patterns - patterns to find

57

include - patterns to include (unless they are excluded)

57

include - patterns to include (unless they are excluded)

58

exclude - patterns to exclude (even if they are included)

58

exclude - patterns to exclude (even if they are included)

59

default - if a pattern in patterns has no explicit type, assume this one

59

default - if a pattern in patterns has no explicit type, assume this one

60

exact - patterns are actually filenames (include/exclude still apply)

60

exact - patterns are actually filenames (include/exclude still apply)

61

62

a pattern is one of:

62

a pattern is one of:

63

'glob:<glob>' - a glob relative to cwd

63

'glob:<glob>' - a glob relative to cwd

64

're:<regexp>' - a regular expression

64

're:<regexp>' - a regular expression

65

'path:<path>' - a path relative to repository root

65

'path:<path>' - a path relative to repository root

66

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

66

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

67

'relpath:<path>' - a path relative to cwd

67

'relpath:<path>' - a path relative to cwd

68

'relre:<regexp>' - a regexp that needn't match the start of a name

68

'relre:<regexp>' - a regexp that needn't match the start of a name

69

'set:<fileset>' - a fileset expression

69

'set:<fileset>' - a fileset expression

70

'<something>' - a pattern of the specified default type

70

'<something>' - a pattern of the specified default type

71

"""

71

"""

72

73

self._root = root

73

self._root = root

74

self._cwd = cwd

74

self._cwd = cwd

75

self._files = [] # exact files and roots of patterns

75

self._files = [] # exact files and roots of patterns

76

self._anypats = bool(include or exclude)

76

self._anypats = bool(include or exclude)

77

self._always = False

77

self._always = False

78

self._pathrestricted = bool(include or exclude or patterns)

78

self._pathrestricted = bool(include or exclude or patterns)

79

80

matchfns = []

80

matchfns = []

81

if include:

81

if include:

82

kindpats = _normalize(include, 'glob', root, cwd, auditor)

82

kindpats = self._normalize(include, 'glob', root, cwd, auditor)

83

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

83

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

84

matchfns.append(im)

84

matchfns.append(im)

85

if exclude:

85

if exclude:

86

kindpats = _normalize(exclude, 'glob', root, cwd, auditor)

86

kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)

87

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

87

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

88

matchfns.append(lambda f: not em(f))

88

matchfns.append(lambda f: not em(f))

89

if exact:

89

if exact:

90

if isinstance(patterns, list):

90

if isinstance(patterns, list):

91

self._files = patterns

91

self._files = patterns

92

else:

92

else:

93

self._files = list(patterns)

93

self._files = list(patterns)

94

matchfns.append(self.exact)

94

matchfns.append(self.exact)

95

elif patterns:

95

elif patterns:

96

kindpats = _normalize(patterns, default, root, cwd, auditor)

96

kindpats = self._normalize(patterns, default, root, cwd, auditor)

97

if not _kindpatsalwaysmatch(kindpats):

97

if not _kindpatsalwaysmatch(kindpats):

98

self._files = _roots(kindpats)

98

self._files = _roots(kindpats)

99

self._anypats = self._anypats or _anypats(kindpats)

99

self._anypats = self._anypats or _anypats(kindpats)

100

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

100

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

101

matchfns.append(pm)

101

matchfns.append(pm)

102

103

if not matchfns:

103

if not matchfns:

104

m = util.always

104

m = util.always

105

self._always = True

105

self._always = True

106

elif len(matchfns) == 1:

106

elif len(matchfns) == 1:

107

m = matchfns[0]

107

m = matchfns[0]

108

else:

108

else:

109

def m(f):

109

def m(f):

110

for matchfn in matchfns:

110

for matchfn in matchfns:

111

if not matchfn(f):

111

if not matchfn(f):

112

return False

112

return False

113

return True

113

return True

114

115

self.matchfn = m

115

self.matchfn = m

116

self._fmap = set(self._files)

116

self._fmap = set(self._files)

117

118

def __call__(self, fn):

118

def __call__(self, fn):

119

return self.matchfn(fn)

119

return self.matchfn(fn)

120

def __iter__(self):

120

def __iter__(self):

121

for f in self._files:

121

for f in self._files:

122

yield f

122

yield f

123

124

# Callbacks related to how the matcher is used by dirstate.walk.

124

# Callbacks related to how the matcher is used by dirstate.walk.

125

# Subscribers to these events must monkeypatch the matcher object.

125

# Subscribers to these events must monkeypatch the matcher object.

126

def bad(self, f, msg):

126

def bad(self, f, msg):

127

'''Callback from dirstate.walk for each explicit file that can't be

127

'''Callback from dirstate.walk for each explicit file that can't be

128

found/accessed, with an error message.'''

128

found/accessed, with an error message.'''

129

pass

129

pass

130

131

# If an explicitdir is set, it will be called when an explicitly listed

131

# If an explicitdir is set, it will be called when an explicitly listed

132

# directory is visited.

132

# directory is visited.

133

explicitdir = None

133

explicitdir = None

134

135

# If an traversedir is set, it will be called when a directory discovered

135

# If an traversedir is set, it will be called when a directory discovered

136

# by recursive traversal is visited.

136

# by recursive traversal is visited.

137

traversedir = None

137

traversedir = None

138

139

def abs(self, f):

139

def abs(self, f):

140

'''Convert a repo path back to path that is relative to the root of the

140

'''Convert a repo path back to path that is relative to the root of the

141

matcher.'''

141

matcher.'''

142

return f

142

return f

143

144

def rel(self, f):

144

def rel(self, f):

145

'''Convert repo path back to path that is relative to cwd of matcher.'''

145

'''Convert repo path back to path that is relative to cwd of matcher.'''

146

return util.pathto(self._root, self._cwd, f)

146

return util.pathto(self._root, self._cwd, f)

147

148

def uipath(self, f):

148

def uipath(self, f):

149

'''Convert repo path to a display path. If patterns or -I/-X were used

149

'''Convert repo path to a display path. If patterns or -I/-X were used

150

to create this matcher, the display path will be relative to cwd.

150

to create this matcher, the display path will be relative to cwd.

151

Otherwise it is relative to the root of the repo.'''

151

Otherwise it is relative to the root of the repo.'''

152

return (self._pathrestricted and self.rel(f)) or self.abs(f)

152

return (self._pathrestricted and self.rel(f)) or self.abs(f)

153

154

def files(self):

154

def files(self):

155

'''Explicitly listed files or patterns or roots:

155

'''Explicitly listed files or patterns or roots:

156

if no patterns or .always(): empty list,

156

if no patterns or .always(): empty list,

157

if exact: list exact files,

157

if exact: list exact files,

158

if not .anypats(): list all files and dirs,

158

if not .anypats(): list all files and dirs,

159

else: optimal roots'''

159

else: optimal roots'''

160

return self._files

160

return self._files

161

162

@propertycache

162

@propertycache

163

def _dirs(self):

163

def _dirs(self):

164

return set(util.dirs(self._fmap)) | set(['.'])

164

return set(util.dirs(self._fmap)) | set(['.'])

165

166

def visitdir(self, dir):

166

def visitdir(self, dir):

167

'''Helps while traversing a directory tree. Returns the string 'all' if

167

'''Helps while traversing a directory tree. Returns the string 'all' if

168

the given directory and all subdirectories should be visited. Otherwise

168

the given directory and all subdirectories should be visited. Otherwise

169

returns True or False indicating whether the given directory should be

169

returns True or False indicating whether the given directory should be

170

visited. If 'all' is returned, calling this method on a subdirectory

170

visited. If 'all' is returned, calling this method on a subdirectory

171

gives an undefined result.'''

171

gives an undefined result.'''

172

if not self._fmap or self.exact(dir):

172

if not self._fmap or self.exact(dir):

173

return 'all'

173

return 'all'

174

return dir in self._dirs

174

return dir in self._dirs

175

176

def exact(self, f):

176

def exact(self, f):

177

'''Returns True if f is in .files().'''

177

'''Returns True if f is in .files().'''

178

return f in self._fmap

178

return f in self._fmap

179

180

def anypats(self):

180

def anypats(self):

181

'''Matcher uses patterns or include/exclude.'''

181

'''Matcher uses patterns or include/exclude.'''

182

return self._anypats

182

return self._anypats

183

184

def always(self):

184

def always(self):

185

'''Matcher will match everything and .files() will be empty

185

'''Matcher will match everything and .files() will be empty

186

- optimization might be possible and necessary.'''

186

- optimization might be possible and necessary.'''

187

return self._always

187

return self._always

188

189

def isexact(self):

189

def isexact(self):

190

return self.matchfn == self.exact

190

return self.matchfn == self.exact

191

192

def _normalize(self, patterns, default, root, cwd, auditor):

193

'''Convert 'kind:pat' from the patterns list to tuples with kind and

194

normalized and rooted patterns and with listfiles expanded.'''

195

kindpats = []

196

for kind, pat in [_patsplit(p, default) for p in patterns]:

197

if kind in ('glob', 'relpath'):

198

pat = pathutil.canonpath(root, cwd, pat, auditor)

199

elif kind in ('relglob', 'path'):

200

pat = util.normpath(pat)

201

elif kind in ('listfile', 'listfile0'):

202

try:

203

files = util.readfile(pat)

204

if kind == 'listfile0':

205

files = files.split('\0')

206

else:

207

files = files.splitlines()

208

files = [f for f in files if f]

209

except EnvironmentError:

210

raise util.Abort(_("unable to read file list (%s)") % pat)

211

kindpats += self._normalize(files, default, root, cwd, auditor)

212

continue

213

# else: re or relre - which cannot be normalized

214

kindpats.append((kind, pat))

215

return kindpats

216

192

def exact(root, cwd, files):

217

def exact(root, cwd, files):

193

return match(root, cwd, files, exact=True)

218

return match(root, cwd, files, exact=True)

194

219

195

def always(root, cwd):

220

def always(root, cwd):

196

return match(root, cwd, [])

221

return match(root, cwd, [])

197

222

198

class narrowmatcher(match):

223

class narrowmatcher(match):

199

"""Adapt a matcher to work on a subdirectory only.

224

"""Adapt a matcher to work on a subdirectory only.

200

225

201

The paths are remapped to remove/insert the path as needed:

226

The paths are remapped to remove/insert the path as needed:

202

227

203

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

228

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

204

>>> m2 = narrowmatcher('sub', m1)

229

>>> m2 = narrowmatcher('sub', m1)

205

>>> bool(m2('a.txt'))

230

>>> bool(m2('a.txt'))

206

False

231

False

207

>>> bool(m2('b.txt'))

232

>>> bool(m2('b.txt'))

208

True

233

True

209

>>> bool(m2.matchfn('a.txt'))

234

>>> bool(m2.matchfn('a.txt'))

210

False

235

False

211

>>> bool(m2.matchfn('b.txt'))

236

>>> bool(m2.matchfn('b.txt'))

212

True

237

True

213

>>> m2.files()

238

>>> m2.files()

214

['b.txt']

239

['b.txt']

215

>>> m2.exact('b.txt')

240

>>> m2.exact('b.txt')

216

True

241

True

217

>>> util.pconvert(m2.rel('b.txt'))

242

>>> util.pconvert(m2.rel('b.txt'))

218

'sub/b.txt'

243

'sub/b.txt'

219

>>> def bad(f, msg):

244

>>> def bad(f, msg):

220

... print "%s: %s" % (f, msg)

245

... print "%s: %s" % (f, msg)

221

>>> m1.bad = bad

246

>>> m1.bad = bad

222

>>> m2.bad('x.txt', 'No such file')

247

>>> m2.bad('x.txt', 'No such file')

223

sub/x.txt: No such file

248

sub/x.txt: No such file

224

>>> m2.abs('c.txt')

249

>>> m2.abs('c.txt')

225

'sub/c.txt'

250

'sub/c.txt'

226

"""

251

"""

227

252

228

def __init__(self, path, matcher):

253

def __init__(self, path, matcher):

229

self._root = matcher._root

254

self._root = matcher._root

230

self._cwd = matcher._cwd

255

self._cwd = matcher._cwd

231

self._path = path

256

self._path = path

232

self._matcher = matcher

257

self._matcher = matcher

233

self._always = matcher._always

258

self._always = matcher._always

234

self._pathrestricted = matcher._pathrestricted

259

self._pathrestricted = matcher._pathrestricted

235

260

236

self._files = [f[len(path) + 1:] for f in matcher._files

261

self._files = [f[len(path) + 1:] for f in matcher._files

237

if f.startswith(path + "/")]

262

if f.startswith(path + "/")]

238

self._anypats = matcher._anypats

263

self._anypats = matcher._anypats

239

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

264

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

240

self._fmap = set(self._files)

265

self._fmap = set(self._files)

241

266

242

def abs(self, f):

267

def abs(self, f):

243

return self._matcher.abs(self._path + "/" + f)

268

return self._matcher.abs(self._path + "/" + f)

244

269

245

def bad(self, f, msg):

270

def bad(self, f, msg):

246

self._matcher.bad(self._path + "/" + f, msg)

271

self._matcher.bad(self._path + "/" + f, msg)

247

272

248

def rel(self, f):

273

def rel(self, f):

249

return self._matcher.rel(self._path + "/" + f)

274

return self._matcher.rel(self._path + "/" + f)

250

275

251

def patkind(pattern, default=None):

276

def patkind(pattern, default=None):

252

'''If pattern is 'kind:pat' with a known kind, return kind.'''

277

'''If pattern is 'kind:pat' with a known kind, return kind.'''

253

return _patsplit(pattern, default)[0]

278

return _patsplit(pattern, default)[0]

254

279

255

def _patsplit(pattern, default):

280

def _patsplit(pattern, default):

256

"""Split a string into the optional pattern kind prefix and the actual

281

"""Split a string into the optional pattern kind prefix and the actual

257

pattern."""

282

pattern."""

258

if ':' in pattern:

283

if ':' in pattern:

259

kind, pat = pattern.split(':', 1)

284

kind, pat = pattern.split(':', 1)

260

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

285

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

261

'listfile', 'listfile0', 'set'):

286

'listfile', 'listfile0', 'set'):

262

return kind, pat

287

return kind, pat

263

return default, pattern

288

return default, pattern

264

289

265

def _globre(pat):

290

def _globre(pat):

266

r'''Convert an extended glob string to a regexp string.

291

r'''Convert an extended glob string to a regexp string.

267

292

268

>>> print _globre(r'?')

293

>>> print _globre(r'?')

269

.

294

.

270

>>> print _globre(r'*')

295

>>> print _globre(r'*')

271

[^/]*

296

[^/]*

272

>>> print _globre(r'**')

297

>>> print _globre(r'**')

273

.*

298

.*

274

>>> print _globre(r'**/a')

299

>>> print _globre(r'**/a')

275

(?:.*/)?a

300

(?:.*/)?a

276

>>> print _globre(r'a/**/b')

301

>>> print _globre(r'a/**/b')

277

a\/(?:.*/)?b

302

a\/(?:.*/)?b

278

>>> print _globre(r'[a*?!^][^b][!c]')

303

>>> print _globre(r'[a*?!^][^b][!c]')

279

[a*?!^][\^b][^c]

304

[a*?!^][\^b][^c]

280

>>> print _globre(r'{a,b}')

305

>>> print _globre(r'{a,b}')

281

(?:a|b)

306

(?:a|b)

282

>>> print _globre(r'.\*\?')

307

>>> print _globre(r'.\*\?')

283

\.\*\?

308

\.\*\?

284

'''

309

'''

285

i, n = 0, len(pat)

310

i, n = 0, len(pat)

286

res = ''

311

res = ''

287

group = 0

312

group = 0

288

escape = util.re.escape

313

escape = util.re.escape

289

def peek():

314

def peek():

290

return i < n and pat[i]

315

return i < n and pat[i]

291

while i < n:

316

while i < n:

292

c = pat[i]

317

c = pat[i]

293

i += 1

318

i += 1

294

if c not in '*?[{},\\':

319

if c not in '*?[{},\\':

295

res += escape(c)

320

res += escape(c)

296

elif c == '*':

321

elif c == '*':

297

if peek() == '*':

322

if peek() == '*':

298

i += 1

323

i += 1

299

if peek() == '/':

324

if peek() == '/':

300

i += 1

325

i += 1

301

res += '(?:.*/)?'

326

res += '(?:.*/)?'

302

else:

327

else:

303

res += '.*'

328

res += '.*'

304

else:

329

else:

305

res += '[^/]*'

330

res += '[^/]*'

306

elif c == '?':

331

elif c == '?':

307

res += '.'

332

res += '.'

308

elif c == '[':

333

elif c == '[':

309

j = i

334

j = i

310

if j < n and pat[j] in '!]':

335

if j < n and pat[j] in '!]':

311

j += 1

336

j += 1

312

while j < n and pat[j] != ']':

337

while j < n and pat[j] != ']':

313

j += 1

338

j += 1

314

if j >= n:

339

if j >= n:

315

res += '\\['

340

res += '\\['

316

else:

341

else:

317

stuff = pat[i:j].replace('\\','\\\\')

342

stuff = pat[i:j].replace('\\','\\\\')

318

i = j + 1

343

i = j + 1

319

if stuff[0] == '!':

344

if stuff[0] == '!':

320

stuff = '^' + stuff[1:]

345

stuff = '^' + stuff[1:]

321

elif stuff[0] == '^':

346

elif stuff[0] == '^':

322

stuff = '\\' + stuff

347

stuff = '\\' + stuff

323

res = '%s[%s]' % (res, stuff)

348

res = '%s[%s]' % (res, stuff)

324

elif c == '{':

349

elif c == '{':

325

group += 1

350

group += 1

326

res += '(?:'

351

res += '(?:'

327

elif c == '}' and group:

352

elif c == '}' and group:

328

res += ')'

353

res += ')'

329

group -= 1

354

group -= 1

330

elif c == ',' and group:

355

elif c == ',' and group:

331

res += '|'

356

res += '|'

332

elif c == '\\':

357

elif c == '\\':

333

p = peek()

358

p = peek()

334

if p:

359

if p:

335

i += 1

360

i += 1

336

res += escape(p)

361

res += escape(p)

337

else:

362

else:

338

res += escape(c)

363

res += escape(c)

339

else:

364

else:

340

res += escape(c)

365

res += escape(c)

341

return res

366

return res

342

367

343

def _regex(kind, pat, globsuffix):

368

def _regex(kind, pat, globsuffix):

344

'''Convert a (normalized) pattern of any kind into a regular expression.

369

'''Convert a (normalized) pattern of any kind into a regular expression.

345

globsuffix is appended to the regexp of globs.'''

370

globsuffix is appended to the regexp of globs.'''

346

if not pat:

371

if not pat:

347

return ''

372

return ''

348

if kind == 're':

373

if kind == 're':

349

return pat

374

return pat

350

if kind == 'path':

375

if kind == 'path':

351

return '^' + util.re.escape(pat) + '(?:/|$)'

376

return '^' + util.re.escape(pat) + '(?:/|$)'

352

if kind == 'relglob':

377

if kind == 'relglob':

353

return '(?:|.*/)' + _globre(pat) + globsuffix

378

return '(?:|.*/)' + _globre(pat) + globsuffix

354

if kind == 'relpath':

379

if kind == 'relpath':

355

return util.re.escape(pat) + '(?:/|$)'

380

return util.re.escape(pat) + '(?:/|$)'

356

if kind == 'relre':

381

if kind == 'relre':

357

if pat.startswith('^'):

382

if pat.startswith('^'):

358

return pat

383

return pat

359

return '.*' + pat

384

return '.*' + pat

360

return _globre(pat) + globsuffix

385

return _globre(pat) + globsuffix

361

386

362

def _buildmatch(ctx, kindpats, globsuffix):

387

def _buildmatch(ctx, kindpats, globsuffix):

363

'''Return regexp string and a matcher function for kindpats.

388

'''Return regexp string and a matcher function for kindpats.

364

globsuffix is appended to the regexp of globs.'''

389

globsuffix is appended to the regexp of globs.'''

365

fset, kindpats = _expandsets(kindpats, ctx)

390

fset, kindpats = _expandsets(kindpats, ctx)

366

if not kindpats:

391

if not kindpats:

367

return "", fset.__contains__

392

return "", fset.__contains__

368

393

369

regex, mf = _buildregexmatch(kindpats, globsuffix)

394

regex, mf = _buildregexmatch(kindpats, globsuffix)

370

if fset:

395

if fset:

371

return regex, lambda f: f in fset or mf(f)

396

return regex, lambda f: f in fset or mf(f)

372

return regex, mf

397

return regex, mf

373

398

374

def _buildregexmatch(kindpats, globsuffix):

399

def _buildregexmatch(kindpats, globsuffix):

375

"""Build a match function from a list of kinds and kindpats,

400

"""Build a match function from a list of kinds and kindpats,

376

return regexp string and a matcher function."""

401

return regexp string and a matcher function."""

377

try:

402

try:

378

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

403

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

379

for (k, p) in kindpats])

404

for (k, p) in kindpats])

380

if len(regex) > 20000:

405

if len(regex) > 20000:

381

raise OverflowError

406

raise OverflowError

382

return regex, _rematcher(regex)

407

return regex, _rematcher(regex)

383

except OverflowError:

408

except OverflowError:

384

# We're using a Python with a tiny regex engine and we

409

# We're using a Python with a tiny regex engine and we

385

# made it explode, so we'll divide the pattern list in two

410

# made it explode, so we'll divide the pattern list in two

386

# until it works

411

# until it works

387

l = len(kindpats)

412

l = len(kindpats)

388

if l < 2:

413

if l < 2:

389

raise

414

raise

390

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

415

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

391

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

416

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

392

return regex, lambda s: a(s) or b(s)

417

return regex, lambda s: a(s) or b(s)

393

except re.error:

418

except re.error:

394

for k, p in kindpats:

419

for k, p in kindpats:

395

try:

420

try:

396

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

421

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

397

except re.error:

422

except re.error:

398

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

423

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

399

raise util.Abort(_("invalid pattern"))

424

raise util.Abort(_("invalid pattern"))

400

425

401

def _normalize(patterns, default, root, cwd, auditor):

402

'''Convert 'kind:pat' from the patterns list to tuples with kind and

403

normalized and rooted patterns and with listfiles expanded.'''

404

kindpats = []

405

for kind, pat in [_patsplit(p, default) for p in patterns]:

406

if kind in ('glob', 'relpath'):

407

pat = pathutil.canonpath(root, cwd, pat, auditor)

408

elif kind in ('relglob', 'path'):

409

pat = util.normpath(pat)

410

elif kind in ('listfile', 'listfile0'):

411

try:

412

files = util.readfile(pat)

413

if kind == 'listfile0':

414

files = files.split('\0')

415

else:

416

files = files.splitlines()

417

files = [f for f in files if f]

418

except EnvironmentError:

419

raise util.Abort(_("unable to read file list (%s)") % pat)

420

kindpats += _normalize(files, default, root, cwd, auditor)

421

continue

422

# else: re or relre - which cannot be normalized

423

kindpats.append((kind, pat))

424

return kindpats

425

426

def _roots(kindpats):

426

def _roots(kindpats):

427

'''return roots and exact explicitly listed files from patterns

427

'''return roots and exact explicitly listed files from patterns

428

429

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

429

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

430

['g', 'g', '.']

430

['g', 'g', '.']

431

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

431

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

432

['r', 'p/p', '.']

432

['r', 'p/p', '.']

433

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

433

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

434

['.', '.', '.']

434

['.', '.', '.']

435

'''

435

'''

436

r = []

436

r = []

437

for kind, pat in kindpats:

437

for kind, pat in kindpats:

438

if kind == 'glob': # find the non-glob prefix

438

if kind == 'glob': # find the non-glob prefix

439

root = []

439

root = []

440

for p in pat.split('/'):

440

for p in pat.split('/'):

441

if '[' in p or '{' in p or '*' in p or '?' in p:

441

if '[' in p or '{' in p or '*' in p or '?' in p:

442

break

442

break

443

root.append(p)

443

root.append(p)

444

r.append('/'.join(root) or '.')

444

r.append('/'.join(root) or '.')

445

elif kind in ('relpath', 'path'):

445

elif kind in ('relpath', 'path'):

446

r.append(pat or '.')

446

r.append(pat or '.')

447

else: # relglob, re, relre

447

else: # relglob, re, relre

448

r.append('.')

448

r.append('.')

449

return r

449

return r

450

451

def _anypats(kindpats):

451

def _anypats(kindpats):

452

for kind, pat in kindpats:

452

for kind, pat in kindpats:

453

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

453

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

454

return True

454

return True

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import re
             import util, pathutil
             from i18n import _
             propertycache = util.propertycache
             def _rematcher(regex):
                 '''compile the regexp with the best available regexp engine and return a
                 matcher function'''
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(kindpats, ctx):
                 '''Returns the kindpats list with the 'set' patterns expanded.'''
                 fset = set()
                 other = []
                 for kind, pat in kindpats:
                     if kind == 'set':
                         if not ctx:
                             raise util.Abort("fileset expression with no context")
                         s = ctx.getfileset(pat)
                         fset.update(s)
                         continue
                     other.append((kind, pat))
                 return fset, other
             def _kindpatsalwaysmatch(kindpats):
                 """"Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat in kindpats:
                     if pat != '' or kind not in ['relpath', 'glob']:
                         return False
                 return True
             class match(object):
                 def __init__(self, root, cwd, patterns, include=[], exclude=[],
                              default='glob', exact=False, auditor=None, ctx=None):
                     """build an object to match a set of file patterns
                     arguments:
                     root - the canonical root of the tree you're matching against
                     cwd - the current working directory, if relevant
                     patterns - patterns to find
                     include - patterns to include (unless they are excluded)
                     exclude - patterns to exclude (even if they are included)
                     default - if a pattern in patterns has no explicit type, assume this one
                     exact - patterns are actually filenames (include/exclude still apply)
                     a pattern is one of:
                     'glob:<glob>' - a glob relative to cwd
                     're:<regexp>' - a regular expression
                     'path:<path>' - a path relative to repository root
                     'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                     'relpath:<path>' - a path relative to cwd
                     'relre:<regexp>' - a regexp that needn't match the start of a name
                     'set:<fileset>' - a fileset expression
                     '<something>' - a pattern of the specified default type
                     """
                     self._root = root
                     self._cwd = cwd
                     self._files = [] # exact files and roots of patterns
                     self._anypats = bool(include or exclude)
                     self._always = False
                     self._pathrestricted = bool(include or exclude or patterns)
                     matchfns = []
                     if include:
-                        kindpats = _normalize(include, 'glob', root, cwd, auditor)
+                        kindpats = self._normalize(include, 'glob', root, cwd, auditor)
                         self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
                         matchfns.append(im)
                     if exclude:
-                        kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
+                        kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
                         self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
                         matchfns.append(lambda f: not em(f))
                     if exact:
                         if isinstance(patterns, list):
                             self._files = patterns
                         else:
                             self._files = list(patterns)
                         matchfns.append(self.exact)
                     elif patterns:
-                        kindpats = _normalize(patterns, default, root, cwd, auditor)
+                        kindpats = self._normalize(patterns, default, root, cwd, auditor)
                         if not _kindpatsalwaysmatch(kindpats):
                             self._files = _roots(kindpats)
                             self._anypats = self._anypats or _anypats(kindpats)
                             self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
                             matchfns.append(pm)
                     if not matchfns:
                         m = util.always
                         self._always = True
                     elif len(matchfns) == 1:
                         m = matchfns[0]
                     else:
                         def m(f):
                             for matchfn in matchfns:
                                 if not matchfn(f):
                                     return False
                             return True
                     self.matchfn = m
                     self._fmap = set(self._files)
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 def __iter__(self):
                     for f in self._files:
                         yield f
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     '''Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message.'''
                     pass
                 # If an explicitdir is set, it will be called when an explicitly listed
                 # directory is visited.
                 explicitdir = None
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 def abs(self, f):
                     '''Convert a repo path back to path that is relative to the root of the
                     matcher.'''
                     return f
                 def rel(self, f):
                     '''Convert repo path back to path that is relative to cwd of matcher.'''
                     return util.pathto(self._root, self._cwd, f)
                 def uipath(self, f):
                     '''Convert repo path to a display path.  If patterns or -I/-X were used
                     to create this matcher, the display path will be relative to cwd.
                     Otherwise it is relative to the root of the repo.'''
                     return (self._pathrestricted and self.rel(f)) or self.abs(f)
                 def files(self):
                     '''Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots'''
                     return self._files
                 @propertycache
                 def _dirs(self):
                     return set(util.dirs(self._fmap)) | set(['.'])
                 def visitdir(self, dir):
                     '''Helps while traversing a directory tree. Returns the string 'all' if
                     the given directory and all subdirectories should be visited. Otherwise
                     returns True or False indicating whether the given directory should be
                     visited. If 'all' is returned, calling this method on a subdirectory
                     gives an undefined result.'''
                     if not self._fmap or self.exact(dir):
                         return 'all'
                     return dir in self._dirs
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fmap
                 def anypats(self):
                     '''Matcher uses patterns or include/exclude.'''
                     return self._anypats
                 def always(self):
                     '''Matcher will match everything and .files() will be empty
                     - optimization might be possible and necessary.'''
                     return self._always
                 def isexact(self):
                     return self.matchfn == self.exact
+                def _normalize(self, patterns, default, root, cwd, auditor):
+                    '''Convert 'kind:pat' from the patterns list to tuples with kind and
+                    normalized and rooted patterns and with listfiles expanded.'''
+                    kindpats = []
+                    for kind, pat in [_patsplit(p, default) for p in patterns]:
+                        if kind in ('glob', 'relpath'):
+                            pat = pathutil.canonpath(root, cwd, pat, auditor)
+                        elif kind in ('relglob', 'path'):
+                            pat = util.normpath(pat)
+                        elif kind in ('listfile', 'listfile0'):
+                            try:
+                                files = util.readfile(pat)
+                                if kind == 'listfile0':
+                                    files = files.split('\0')
+                                else:
+                                    files = files.splitlines()
+                                files = [f for f in files if f]
+                            except EnvironmentError:
+                                raise util.Abort(_("unable to read file list (%s)") % pat)
+                            kindpats += self._normalize(files, default, root, cwd, auditor)
+                            continue
+                        # else: re or relre - which cannot be normalized
+                        kindpats.append((kind, pat))
+                    return kindpats
             def exact(root, cwd, files):
                 return match(root, cwd, files, exact=True)
             def always(root, cwd):
                 return match(root, cwd, [])
             class narrowmatcher(match):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                 >>> m2 = narrowmatcher('sub', m1)
                 >>> bool(m2('a.txt'))
                 False
                 >>> bool(m2('b.txt'))
                 True
                 >>> bool(m2.matchfn('a.txt'))
                 False
                 >>> bool(m2.matchfn('b.txt'))
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact('b.txt')
                 True
                 >>> util.pconvert(m2.rel('b.txt'))
                 'sub/b.txt'
                 >>> def bad(f, msg):
                 ...     print "%s: %s" % (f, msg)
                 >>> m1.bad = bad
                 >>> m2.bad('x.txt', 'No such file')
                 sub/x.txt: No such file
                 >>> m2.abs('c.txt')
                 'sub/c.txt'
                 """
                 def __init__(self, path, matcher):
                     self._root = matcher._root
                     self._cwd = matcher._cwd
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher._always
                     self._pathrestricted = matcher._pathrestricted
                     self._files = [f[len(path) + 1:] for f in matcher._files
                                    if f.startswith(path + "/")]
                     self._anypats = matcher._anypats
                     self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                     self._fmap = set(self._files)
                 def abs(self, f):
                     return self._matcher.abs(self._path + "/" + f)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + "/" + f, msg)
                 def rel(self, f):
                     return self._matcher.rel(self._path + "/" + f)
             def patkind(pattern, default=None):
                 '''If pattern is 'kind:pat' with a known kind, return kind.'''
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if ':' in pattern:
                     kind, pat = pattern.split(':', 1)
                     if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                                 'listfile', 'listfile0', 'set'):
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r'''Convert an extended glob string to a regexp string.
                 >>> print _globre(r'?')
                 .
                 >>> print _globre(r'*')
                 [^/]*
                 >>> print _globre(r'**')
                 .*
                 >>> print _globre(r'**/a')
                 (?:.*/)?a
                 >>> print _globre(r'a/**/b')
                 a\/(?:.*/)?b
                 >>> print _globre(r'[a*?!^][^b][!c]')
                 [a*?!^][\^b][^c]
                 >>> print _globre(r'{a,b}')
                 (?:a|b)
                 >>> print _globre(r'.\*\?')
                 \.\*\?
                 '''
                 i, n = 0, len(pat)
                 res = ''
                 group = 0
                 escape = util.re.escape
                 def peek():
                     return i < n and pat[i]
                 while i < n:
                     c = pat[i]
                     i += 1
                     if c not in '*?[{},\\':
                         res += escape(c)
                     elif c == '*':
                         if peek() == '*':
                             i += 1
                             if peek() == '/':
                                 i += 1
                                 res += '(?:.*/)?'
                             else:
                                 res += '.*'
                         else:
                             res += '[^/]*'
                     elif c == '?':
                         res += '.'
                     elif c == '[':
                         j = i
                         if j < n and pat[j] in '!]':
                             j += 1
                         while j < n and pat[j] != ']':
                             j += 1
                         if j >= n:
                             res += '\\['
                         else:
                             stuff = pat[i:j].replace('\\','\\\\')
                             i = j + 1
                             if stuff[0] == '!':
                                 stuff = '^' + stuff[1:]
                             elif stuff[0] == '^':
                                 stuff = '\\' + stuff
                             res = '%s[%s]' % (res, stuff)
                     elif c == '{':
                         group += 1
                         res += '(?:'
                     elif c == '}' and group:
                         res += ')'
                         group -= 1
                     elif c == ',' and group:
                         res += '|'
                     elif c == '\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p)
                         else:
                             res += escape(c)
                     else:
                         res += escape(c)
                 return res
             def _regex(kind, pat, globsuffix):
                 '''Convert a (normalized) pattern of any kind into a regular expression.
                 globsuffix is appended to the regexp of globs.'''
                 if not pat:
                     return ''
                 if kind == 're':
                     return pat
                 if kind == 'path':
                     return '^' + util.re.escape(pat) + '(?:/|$)'
                 if kind == 'relglob':
                     return '(?:|.*/)' + _globre(pat) + globsuffix
                 if kind == 'relpath':
                     return util.re.escape(pat) + '(?:/|$)'
                 if kind == 'relre':
                     if pat.startswith('^'):
                         return pat
                     return '.*' + pat
                 return _globre(pat) + globsuffix
             def _buildmatch(ctx, kindpats, globsuffix):
                 '''Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs.'''
                 fset, kindpats = _expandsets(kindpats, ctx)
                 if not kindpats:
                     return "", fset.__contains__
                 regex, mf = _buildregexmatch(kindpats, globsuffix)
                 if fset:
                     return regex, lambda f: f in fset or mf(f)
                 return regex, mf
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function."""
                 try:
                     regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
                                                  for (k, p) in kindpats])
                     if len(regex) > 20000:
                         raise OverflowError
                     return regex, _rematcher(regex)
                 except OverflowError:
                     # We're using a Python with a tiny regex engine and we
                     # made it explode, so we'll divide the pattern list in two
                     # until it works
                     l = len(kindpats)
                     if l < 2:
                         raise
                     regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
                     regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
                     return regex, lambda s: a(s) or b(s)
                 except re.error:
                     for k, p in kindpats:
                         try:
                             _rematcher('(?:%s)' % _regex(k, p, globsuffix))
                         except re.error:
                             raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                     raise util.Abort(_("invalid pattern"))
-            def _normalize(patterns, default, root, cwd, auditor):
-                '''Convert 'kind:pat' from the patterns list to tuples with kind and
-                normalized and rooted patterns and with listfiles expanded.'''
-                kindpats = []
-                for kind, pat in [_patsplit(p, default) for p in patterns]:
-                    if kind in ('glob', 'relpath'):
-                        pat = pathutil.canonpath(root, cwd, pat, auditor)
-                    elif kind in ('relglob', 'path'):
-                        pat = util.normpath(pat)
-                    elif kind in ('listfile', 'listfile0'):
-                        try:
-                            files = util.readfile(pat)
-                            if kind == 'listfile0':
-                                files = files.split('\0')
-                            else:
-                                files = files.splitlines()
-                            files = [f for f in files if f]
-                        except EnvironmentError:
-                            raise util.Abort(_("unable to read file list (%s)") % pat)
-                        kindpats += _normalize(files, default, root, cwd, auditor)
-                        continue
-                    # else: re or relre - which cannot be normalized
-                    kindpats.append((kind, pat))
-                return kindpats
             def _roots(kindpats):
                 '''return roots and exact explicitly listed files from patterns
                 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
                 ['g', 'g', '.']
                 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
                 ['r', 'p/p', '.']
                 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
                 ['.', '.', '.']
                 '''
                 r = []
                 for kind, pat in kindpats:
                     if kind == 'glob': # find the non-glob prefix
                         root = []
                         for p in pat.split('/'):
                             if '[' in p or '{' in p or '*' in p or '?' in p:
                                 break
                             root.append(p)
                         r.append('/'.join(root) or '.')
                     elif kind in ('relpath', 'path'):
                         r.append(pat or '.')
                     else: # relglob, re, relre
                         r.append('.')
                 return r
             def _anypats(kindpats):
                 for kind, pat in kindpats:
                     if kind in ('glob', 're', 'relglob', 'relre', 'set'):
                         return True