upstream/mercurial-mirror Commit - r21815:a4b67bf1

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

import re

8

import re

9

import util, pathutil

9

import util, pathutil

10

from i18n import _

10

from i18n import _

11

12

def _rematcher(regex):

12

def _rematcher(regex):

13

'''compile the regexp with the best available regexp engine and return a

13

'''compile the regexp with the best available regexp engine and return a

14

matcher function'''

14

matcher function'''

15

m = util.compilere(regex)

15

m = util.compilere(regex)

16

try:

16

try:

17

# slightly faster, provided by facebook's re2 bindings

17

# slightly faster, provided by facebook's re2 bindings

18

return m.test_match

18

return m.test_match

19

except AttributeError:

19

except AttributeError:

20

return m.match

20

return m.match

21

22

def _expandsets(kindpats, ctx):

22

def _expandsets(kindpats, ctx):

23

'''Returns the kindpats list with the 'set' patterns expanded.'''

23

'''Returns the kindpats list with the 'set' patterns expanded.'''

24

fset = set()

24

fset = set()

25

other = []

25

other = []

26

27

for kind, pat in kindpats:

27

for kind, pat in kindpats:

28

if kind == 'set':

28

if kind == 'set':

29

if not ctx:

29

if not ctx:

30

raise util.Abort("fileset expression with no context")

30

raise util.Abort("fileset expression with no context")

31

s = ctx.getfileset(pat)

31

s = ctx.getfileset(pat)

32

fset.update(s)

32

fset.update(s)

33

continue

33

continue

34

other.append((kind, pat))

34

other.append((kind, pat))

35

return fset, other

35

return fset, other

36

37

class match(object):

37

class match(object):

38

def __init__(self, root, cwd, patterns, include=[], exclude=[],

38

def __init__(self, root, cwd, patterns, include=[], exclude=[],

39

default='glob', exact=False, auditor=None, ctx=None):

39

default='glob', exact=False, auditor=None, ctx=None):

40

"""build an object to match a set of file patterns

40

"""build an object to match a set of file patterns

41

42

arguments:

42

arguments:

43

root - the canonical root of the tree you're matching against

43

root - the canonical root of the tree you're matching against

44

cwd - the current working directory, if relevant

44

cwd - the current working directory, if relevant

45

patterns - patterns to find

45

patterns - patterns to find

46

include - patterns to include (unless they are excluded)

46

include - patterns to include (unless they are excluded)

47

exclude - patterns to exclude (even if they are included)

47

exclude - patterns to exclude (even if they are included)

48

default - if a pattern in patterns has no explicit type, assume this one

48

default - if a pattern in patterns has no explicit type, assume this one

49

exact - patterns are actually filenames (include/exclude still apply)

49

exact - patterns are actually filenames (include/exclude still apply)

50

51

a pattern is one of:

51

a pattern is one of:

52

'glob:<glob>' - a glob relative to cwd

52

'glob:<glob>' - a glob relative to cwd

53

're:<regexp>' - a regular expression

53

're:<regexp>' - a regular expression

54

'path:<path>' - a path relative to repository root

54

'path:<path>' - a path relative to repository root

55

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

55

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

56

'relpath:<path>' - a path relative to cwd

56

'relpath:<path>' - a path relative to cwd

57

'relre:<regexp>' - a regexp that needn't match the start of a name

57

'relre:<regexp>' - a regexp that needn't match the start of a name

58

'set:<fileset>' - a fileset expression

58

'set:<fileset>' - a fileset expression

59

'<something>' - a pattern of the specified default type

59

'<something>' - a pattern of the specified default type

60

"""

60

"""

61

62

self._root = root

62

self._root = root

63

self._cwd = cwd

63

self._cwd = cwd

64

self._files = [] # exact files and roots of patterns

64

self._files = [] # exact files and roots of patterns

65

self._anypats = bool(include or exclude)

65

self._anypats = bool(include or exclude)

66

self._ctx = ctx

66

self._ctx = ctx

67

self._always = False

67

self._always = False

68

69

if include:

69

if include:

70

kindpats = _normalize(include, 'glob', root, cwd, auditor)

70

kindpats = _normalize(include, 'glob', root, cwd, auditor)

71

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

71

self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')

72

if exclude:

72

if exclude:

73

kindpats = _normalize(exclude, 'glob', root, cwd, auditor)

73

kindpats = _normalize(exclude, 'glob', root, cwd, auditor)

74

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

74

self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')

75

if exact:

75

if exact:

76

if isinstance(patterns, list):

76

if isinstance(patterns, list):

77

self._files = patterns

77

self._files = patterns

78

else:

78

else:

79

self._files = list(patterns)

79

self._files = list(patterns)

80

pm = self.exact

80

pm = self.exact

81

elif patterns:

81

elif patterns:

82

kindpats = _normalize(patterns, default, root, cwd, auditor)

82

kindpats = _normalize(patterns, default, root, cwd, auditor)

83

self._files = _roots(kindpats)

83

self._files = _roots(kindpats)

84

self._anypats = self._anypats or _anypats(kindpats)

84

self._anypats = self._anypats or _anypats(kindpats)

85

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

85

self.patternspat, pm = _buildmatch(ctx, kindpats, '$')

86

87

if patterns or exact:

87

if patterns or exact:

88

if include:

88

if include:

89

if exclude:

89

if exclude:

90

m = lambda f: im(f) and not em(f) and pm(f)

90

m = lambda f: im(f) and not em(f) and pm(f)

91

else:

91

else:

92

m = lambda f: im(f) and pm(f)

92

m = lambda f: im(f) and pm(f)

93

else:

93

else:

94

if exclude:

94

if exclude:

95

m = lambda f: not em(f) and pm(f)

95

m = lambda f: not em(f) and pm(f)

96

else:

96

else:

97

m = pm

97

m = pm

98

else:

98

else:

99

if include:

99

if include:

100

if exclude:

100

if exclude:

101

m = lambda f: im(f) and not em(f)

101

m = lambda f: im(f) and not em(f)

102

else:

102

else:

103

m = im

103

m = im

104

else:

104

else:

105

if exclude:

105

if exclude:

106

m = lambda f: not em(f)

106

m = lambda f: not em(f)

107

else:

107

else:

108

m = lambda f: True

108

m = lambda f: True

109

self._always = True

109

self._always = True

110

111

self.matchfn = m

111

self.matchfn = m

112

self._fmap = set(self._files)

112

self._fmap = set(self._files)

113

114

def __call__(self, fn):

114

def __call__(self, fn):

115

return self.matchfn(fn)

115

return self.matchfn(fn)

116

def __iter__(self):

116

def __iter__(self):

117

for f in self._files:

117

for f in self._files:

118

yield f

118

yield f

119

120

# Callbacks related to how the matcher is used by dirstate.walk.

120

# Callbacks related to how the matcher is used by dirstate.walk.

121

# Subscribers to these events must monkeypatch the matcher object.

121

# Subscribers to these events must monkeypatch the matcher object.

122

def bad(self, f, msg):

122

def bad(self, f, msg):

123

'''Callback from dirstate.walk for each explicit file that can't be

123

'''Callback from dirstate.walk for each explicit file that can't be

124

found/accessed, with an error message.'''

124

found/accessed, with an error message.'''

125

pass

125

pass

126

127

# If an explicitdir is set, it will be called when an explicitly listed

127

# If an explicitdir is set, it will be called when an explicitly listed

128

# directory is visited.

128

# directory is visited.

129

explicitdir = None

129

explicitdir = None

130

131

# If an traversedir is set, it will be called when a directory discovered

131

# If an traversedir is set, it will be called when a directory discovered

132

# by recursive traversal is visited.

132

# by recursive traversal is visited.

133

traversedir = None

133

traversedir = None

134

135

def rel(self, f):

135

def rel(self, f):

136

'''Convert repo path back to path that is relative to cwd of matcher.'''

136

'''Convert repo path back to path that is relative to cwd of matcher.'''

137

return util.pathto(self._root, self._cwd, f)

137

return util.pathto(self._root, self._cwd, f)

138

139

def files(self):

139

def files(self):

140

'''Explicitly listed files or patterns or roots:

140

'''Explicitly listed files or patterns or roots:

141

if no patterns or .always(): empty list,

141

if no patterns or .always(): empty list,

142

if exact: list exact files,

142

if exact: list exact files,

143

if not .anypats(): list all files and dirs,

143

if not .anypats(): list all files and dirs,

144

else: optimal roots'''

144

else: optimal roots'''

145

return self._files

145

return self._files

146

147

def exact(self, f):

147

def exact(self, f):

148

'''Returns True if f is in .files().'''

148

'''Returns True if f is in .files().'''

149

return f in self._fmap

149

return f in self._fmap

150

151

def anypats(self):

151

def anypats(self):

152

'''Matcher uses patterns or include/exclude.'''

152

'''Matcher uses patterns or include/exclude.'''

153

return self._anypats

153

return self._anypats

154

155

def always(self):

155

def always(self):

156

'''Matcher will match everything and .files() will be empty

156

'''Matcher will match everything and .files() will be empty

157

- optimization might be possible and necessary.'''

157

- optimization might be possible and necessary.'''

158

return self._always

158

return self._always

159

160

class exact(match):

160

class exact(match):

161

def __init__(self, root, cwd, files):

161

def __init__(self, root, cwd, files):

162

match.__init__(self, root, cwd, files, exact=True)

162

match.__init__(self, root, cwd, files, exact=True)

163

164

class always(match):

164

class always(match):

165

def __init__(self, root, cwd):

165

def __init__(self, root, cwd):

166

match.__init__(self, root, cwd, [])

166

match.__init__(self, root, cwd, [])

167

self._always = True

167

self._always = True

168

169

class narrowmatcher(match):

169

class narrowmatcher(match):

170

"""Adapt a matcher to work on a subdirectory only.

170

"""Adapt a matcher to work on a subdirectory only.

171

172

The paths are remapped to remove/insert the path as needed:

172

The paths are remapped to remove/insert the path as needed:

173

174

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

174

>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])

175

>>> m2 = narrowmatcher('sub', m1)

175

>>> m2 = narrowmatcher('sub', m1)

176

>>> bool(m2('a.txt'))

176

>>> bool(m2('a.txt'))

177

False

177

False

178

>>> bool(m2('b.txt'))

178

>>> bool(m2('b.txt'))

179

True

179

True

180

>>> bool(m2.matchfn('a.txt'))

180

>>> bool(m2.matchfn('a.txt'))

181

False

181

False

182

>>> bool(m2.matchfn('b.txt'))

182

>>> bool(m2.matchfn('b.txt'))

183

True

183

True

184

>>> m2.files()

184

>>> m2.files()

185

['b.txt']

185

['b.txt']

186

>>> m2.exact('b.txt')

186

>>> m2.exact('b.txt')

187

True

187

True

188

>>> m2.rel('b.txt')

188

>>> m2.rel('b.txt')

189

'b.txt'

189

'b.txt'

190

>>> def bad(f, msg):

190

>>> def bad(f, msg):

191

... print "%s: %s" % (f, msg)

191

... print "%s: %s" % (f, msg)

192

>>> m1.bad = bad

192

>>> m1.bad = bad

193

>>> m2.bad('x.txt', 'No such file')

193

>>> m2.bad('x.txt', 'No such file')

194

sub/x.txt: No such file

194

sub/x.txt: No such file

195

"""

195

"""

196

197

def __init__(self, path, matcher):

197

def __init__(self, path, matcher):

198

self._root = matcher._root

198

self._root = matcher._root

199

self._cwd = matcher._cwd

199

self._cwd = matcher._cwd

200

self._path = path

200

self._path = path

201

self._matcher = matcher

201

self._matcher = matcher

202

self._always = matcher._always

202

self._always = matcher._always

203

204

self._files = [f[len(path) + 1:] for f in matcher._files

204

self._files = [f[len(path) + 1:] for f in matcher._files

205

if f.startswith(path + "/")]

205

if f.startswith(path + "/")]

206

self._anypats = matcher._anypats

206

self._anypats = matcher._anypats

207

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

207

self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)

208

self._fmap = set(self._files)

208

self._fmap = set(self._files)

209

210

def bad(self, f, msg):

210

def bad(self, f, msg):

211

self._matcher.bad(self._path + "/" + f, msg)

211

self._matcher.bad(self._path + "/" + f, msg)

212

213

def patkind(pattern, default=None):

213

def patkind(pattern, default=None):

214

'''If pattern is 'kind:pat' with a known kind, return kind.'''

214

'''If pattern is 'kind:pat' with a known kind, return kind.'''

215

return _patsplit(pattern, default)[0]

215

return _patsplit(pattern, default)[0]

216

217

def _patsplit(pattern, default):

217

def _patsplit(pattern, default):

218

"""Split a string into the optional pattern kind prefix and the actual

218

"""Split a string into the optional pattern kind prefix and the actual

219

pattern."""

219

pattern."""

220

if ':' in pattern:

220

if ':' in pattern:

221

kind, pat = pattern.split(':', 1)

221

kind, pat = pattern.split(':', 1)

222

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

222

if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',

223

'listfile', 'listfile0', 'set'):

223

'listfile', 'listfile0', 'set'):

224

return kind, pat

224

return kind, pat

225

return default, pattern

225

return default, pattern

226

227

def _globre(pat):

227

def _globre(pat):

228

r'''Convert an extended glob string to a regexp string.

228

r'''Convert an extended glob string to a regexp string.

229

230

>>> print _globre(r'?')

230

>>> print _globre(r'?')

231

.

231

.

232

>>> print _globre(r'*')

232

>>> print _globre(r'*')

233

[^/]*

233

[^/]*

234

>>> print _globre(r'**')

234

>>> print _globre(r'**')

235

.*

235

.*

236

>>> print _globre(r'**/a')

237

(?:.*/)?a

238

>>> print _globre(r'a/**/b')

239

a\/(?:.*/)?b

236

>>> print _globre(r'[a*?!^][^b][!c]')

240

>>> print _globre(r'[a*?!^][^b][!c]')

237

[a*?!^][\^b][^c]

241

[a*?!^][\^b][^c]

238

>>> print _globre(r'{a,b}')

242

>>> print _globre(r'{a,b}')

239

(?:a|b)

243

(?:a|b)

240

>>> print _globre(r'.\*\?')

244

>>> print _globre(r'.\*\?')

241

\.\*\?

245

\.\*\?

242

'''

246

'''

243

i, n = 0, len(pat)

247

i, n = 0, len(pat)

244

res = ''

248

res = ''

245

group = 0

249

group = 0

246

escape = re.escape

250

escape = re.escape

247

def peek():

251

def peek():

248

return i < n and pat[i]

252

return i < n and pat[i]

249

while i < n:

253

while i < n:

250

c = pat[i]

254

c = pat[i]

251

i += 1

255

i += 1

252

if c not in '*?[{},\\':

256

if c not in '*?[{},\\':

253

res += escape(c)

257

res += escape(c)

254

elif c == '*':

258

elif c == '*':

255

if peek() == '*':

259

if peek() == '*':

256

i += 1

260

i += 1

257

~~res~~ += '.*'

261

if peek() == '/':

262

i += 1

263

res += '(?:.*/)?'

264

else:

265

res += '.*'

258

else:

266

else:

259

res += '[^/]*'

267

res += '[^/]*'

260

elif c == '?':

268

elif c == '?':

261

res += '.'

269

res += '.'

262

elif c == '[':

270

elif c == '[':

263

j = i

271

j = i

264

if j < n and pat[j] in '!]':

272

if j < n and pat[j] in '!]':

265

j += 1

273

j += 1

266

while j < n and pat[j] != ']':

274

while j < n and pat[j] != ']':

267

j += 1

275

j += 1

268

if j >= n:

276

if j >= n:

269

res += '\\['

277

res += '\\['

270

else:

278

else:

271

stuff = pat[i:j].replace('\\','\\\\')

279

stuff = pat[i:j].replace('\\','\\\\')

272

i = j + 1

280

i = j + 1

273

if stuff[0] == '!':

281

if stuff[0] == '!':

274

stuff = '^' + stuff[1:]

282

stuff = '^' + stuff[1:]

275

elif stuff[0] == '^':

283

elif stuff[0] == '^':

276

stuff = '\\' + stuff

284

stuff = '\\' + stuff

277

res = '%s[%s]' % (res, stuff)

285

res = '%s[%s]' % (res, stuff)

278

elif c == '{':

286

elif c == '{':

279

group += 1

287

group += 1

280

res += '(?:'

288

res += '(?:'

281

elif c == '}' and group:

289

elif c == '}' and group:

282

res += ')'

290

res += ')'

283

group -= 1

291

group -= 1

284

elif c == ',' and group:

292

elif c == ',' and group:

285

res += '|'

293

res += '|'

286

elif c == '\\':

294

elif c == '\\':

287

p = peek()

295

p = peek()

288

if p:

296

if p:

289

i += 1

297

i += 1

290

res += escape(p)

298

res += escape(p)

291

else:

299

else:

292

res += escape(c)

300

res += escape(c)

293

else:

301

else:

294

res += escape(c)

302

res += escape(c)

295

return res

303

return res

296

304

297

def _regex(kind, pat, globsuffix):

305

def _regex(kind, pat, globsuffix):

298

'''Convert a (normalized) pattern of any kind into a regular expression.

306

'''Convert a (normalized) pattern of any kind into a regular expression.

299

globsuffix is appended to the regexp of globs.'''

307

globsuffix is appended to the regexp of globs.'''

300

if not pat:

308

if not pat:

301

return ''

309

return ''

302

if kind == 're':

310

if kind == 're':

303

return pat

311

return pat

304

if kind == 'path':

312

if kind == 'path':

305

return '^' + re.escape(pat) + '(?:/|$)'

313

return '^' + re.escape(pat) + '(?:/|$)'

306

if kind == 'relglob':

314

if kind == 'relglob':

307

return '(?:|.*/)' + _globre(pat) + globsuffix

315

return '(?:|.*/)' + _globre(pat) + globsuffix

308

if kind == 'relpath':

316

if kind == 'relpath':

309

return re.escape(pat) + '(?:/|$)'

317

return re.escape(pat) + '(?:/|$)'

310

if kind == 'relre':

318

if kind == 'relre':

311

if pat.startswith('^'):

319

if pat.startswith('^'):

312

return pat

320

return pat

313

return '.*' + pat

321

return '.*' + pat

314

return _globre(pat) + globsuffix

322

return _globre(pat) + globsuffix

315

323

316

def _buildmatch(ctx, kindpats, globsuffix):

324

def _buildmatch(ctx, kindpats, globsuffix):

317

'''Return regexp string and a matcher function for kindpats.

325

'''Return regexp string and a matcher function for kindpats.

318

globsuffix is appended to the regexp of globs.'''

326

globsuffix is appended to the regexp of globs.'''

319

fset, kindpats = _expandsets(kindpats, ctx)

327

fset, kindpats = _expandsets(kindpats, ctx)

320

if not kindpats:

328

if not kindpats:

321

return "", fset.__contains__

329

return "", fset.__contains__

322

330

323

regex, mf = _buildregexmatch(kindpats, globsuffix)

331

regex, mf = _buildregexmatch(kindpats, globsuffix)

324

if fset:

332

if fset:

325

return regex, lambda f: f in fset or mf(f)

333

return regex, lambda f: f in fset or mf(f)

326

return regex, mf

334

return regex, mf

327

335

328

def _buildregexmatch(kindpats, globsuffix):

336

def _buildregexmatch(kindpats, globsuffix):

329

"""Build a match function from a list of kinds and kindpats,

337

"""Build a match function from a list of kinds and kindpats,

330

return regexp string and a matcher function."""

338

return regexp string and a matcher function."""

331

try:

339

try:

332

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

340

regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)

333

for (k, p) in kindpats])

341

for (k, p) in kindpats])

334

if len(regex) > 20000:

342

if len(regex) > 20000:

335

raise OverflowError

343

raise OverflowError

336

return regex, _rematcher(regex)

344

return regex, _rematcher(regex)

337

except OverflowError:

345

except OverflowError:

338

# We're using a Python with a tiny regex engine and we

346

# We're using a Python with a tiny regex engine and we

339

# made it explode, so we'll divide the pattern list in two

347

# made it explode, so we'll divide the pattern list in two

340

# until it works

348

# until it works

341

l = len(kindpats)

349

l = len(kindpats)

342

if l < 2:

350

if l < 2:

343

raise

351

raise

344

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

352

regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)

345

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

353

regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)

346

return regex, lambda s: a(s) or b(s)

354

return regex, lambda s: a(s) or b(s)

347

except re.error:

355

except re.error:

348

for k, p in kindpats:

356

for k, p in kindpats:

349

try:

357

try:

350

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

358

_rematcher('(?:%s)' % _regex(k, p, globsuffix))

351

except re.error:

359

except re.error:

352

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

360

raise util.Abort(_("invalid pattern (%s): %s") % (k, p))

353

raise util.Abort(_("invalid pattern"))

361

raise util.Abort(_("invalid pattern"))

354

362

355

def _normalize(patterns, default, root, cwd, auditor):

363

def _normalize(patterns, default, root, cwd, auditor):

356

'''Convert 'kind:pat' from the patterns list to tuples with kind and

364

'''Convert 'kind:pat' from the patterns list to tuples with kind and

357

normalized and rooted patterns and with listfiles expanded.'''

365

normalized and rooted patterns and with listfiles expanded.'''

358

kindpats = []

366

kindpats = []

359

for kind, pat in [_patsplit(p, default) for p in patterns]:

367

for kind, pat in [_patsplit(p, default) for p in patterns]:

360

if kind in ('glob', 'relpath'):

368

if kind in ('glob', 'relpath'):

361

pat = pathutil.canonpath(root, cwd, pat, auditor)

369

pat = pathutil.canonpath(root, cwd, pat, auditor)

362

elif kind in ('relglob', 'path'):

370

elif kind in ('relglob', 'path'):

363

pat = util.normpath(pat)

371

pat = util.normpath(pat)

364

elif kind in ('listfile', 'listfile0'):

372

elif kind in ('listfile', 'listfile0'):

365

try:

373

try:

366

files = util.readfile(pat)

374

files = util.readfile(pat)

367

if kind == 'listfile0':

375

if kind == 'listfile0':

368

files = files.split('\0')

376

files = files.split('\0')

369

else:

377

else:

370

files = files.splitlines()

378

files = files.splitlines()

371

files = [f for f in files if f]

379

files = [f for f in files if f]

372

except EnvironmentError:

380

except EnvironmentError:

373

raise util.Abort(_("unable to read file list (%s)") % pat)

381

raise util.Abort(_("unable to read file list (%s)") % pat)

374

kindpats += _normalize(files, default, root, cwd, auditor)

382

kindpats += _normalize(files, default, root, cwd, auditor)

375

continue

383

continue

376

# else: re or relre - which cannot be normalized

384

# else: re or relre - which cannot be normalized

377

kindpats.append((kind, pat))

385

kindpats.append((kind, pat))

378

return kindpats

386

return kindpats

379

387

380

def _roots(kindpats):

388

def _roots(kindpats):

381

'''return roots and exact explicitly listed files from patterns

389

'''return roots and exact explicitly listed files from patterns

382

390

383

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

391

>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])

384

['g', 'g', '.']

392

['g', 'g', '.']

385

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

393

>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])

386

['r', 'p/p', '.']

394

['r', 'p/p', '.']

387

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

395

>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])

388

['.', '.', '.']

396

['.', '.', '.']

389

'''

397

'''

390

r = []

398

r = []

391

for kind, pat in kindpats:

399

for kind, pat in kindpats:

392

if kind == 'glob': # find the non-glob prefix

400

if kind == 'glob': # find the non-glob prefix

393

root = []

401

root = []

394

for p in pat.split('/'):

402

for p in pat.split('/'):

395

if '[' in p or '{' in p or '*' in p or '?' in p:

403

if '[' in p or '{' in p or '*' in p or '?' in p:

396

break

404

break

397

root.append(p)

405

root.append(p)

398

r.append('/'.join(root) or '.')

406

r.append('/'.join(root) or '.')

399

elif kind in ('relpath', 'path'):

407

elif kind in ('relpath', 'path'):

400

r.append(pat or '.')

408

r.append(pat or '.')

401

else: # relglob, re, relre

409

else: # relglob, re, relre

402

r.append('.')

410

r.append('.')

403

return r

411

return r

404

412

405

def _anypats(kindpats):

413

def _anypats(kindpats):

406

for kind, pat in kindpats:

414

for kind, pat in kindpats:

407

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

415

if kind in ('glob', 're', 'relglob', 'relre', 'set'):

408

return True

416

return True

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import re
             import util, pathutil
             from i18n import _
             def _rematcher(regex):
                 '''compile the regexp with the best available regexp engine and return a
                 matcher function'''
                 m = util.compilere(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(kindpats, ctx):
                 '''Returns the kindpats list with the 'set' patterns expanded.'''
                 fset = set()
                 other = []
                 for kind, pat in kindpats:
                     if kind == 'set':
                         if not ctx:
                             raise util.Abort("fileset expression with no context")
                         s = ctx.getfileset(pat)
                         fset.update(s)
                         continue
                     other.append((kind, pat))
                 return fset, other
             class match(object):
                 def __init__(self, root, cwd, patterns, include=[], exclude=[],
                              default='glob', exact=False, auditor=None, ctx=None):
                     """build an object to match a set of file patterns
                     arguments:
                     root - the canonical root of the tree you're matching against
                     cwd - the current working directory, if relevant
                     patterns - patterns to find
                     include - patterns to include (unless they are excluded)
                     exclude - patterns to exclude (even if they are included)
                     default - if a pattern in patterns has no explicit type, assume this one
                     exact - patterns are actually filenames (include/exclude still apply)
                     a pattern is one of:
                     'glob:<glob>' - a glob relative to cwd
                     're:<regexp>' - a regular expression
                     'path:<path>' - a path relative to repository root
                     'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                     'relpath:<path>' - a path relative to cwd
                     'relre:<regexp>' - a regexp that needn't match the start of a name
                     'set:<fileset>' - a fileset expression
                     '<something>' - a pattern of the specified default type
                     """
                     self._root = root
                     self._cwd = cwd
                     self._files = [] # exact files and roots of patterns
                     self._anypats = bool(include or exclude)
                     self._ctx = ctx
                     self._always = False
                     if include:
                         kindpats = _normalize(include, 'glob', root, cwd, auditor)
                         self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
                     if exclude:
                         kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
                         self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
                     if exact:
                         if isinstance(patterns, list):
                             self._files = patterns
                         else:
                             self._files = list(patterns)
                         pm = self.exact
                     elif patterns:
                         kindpats = _normalize(patterns, default, root, cwd, auditor)
                         self._files = _roots(kindpats)
                         self._anypats = self._anypats or _anypats(kindpats)
                         self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
                     if patterns or exact:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f) and pm(f)
                             else:
                                 m = lambda f: im(f) and pm(f)
                         else:
                             if exclude:
                                 m = lambda f: not em(f) and pm(f)
                             else:
                                 m = pm
                     else:
                         if include:
                             if exclude:
                                 m = lambda f: im(f) and not em(f)
                             else:
                                 m = im
                         else:
                             if exclude:
                                 m = lambda f: not em(f)
                             else:
                                 m = lambda f: True
                                 self._always = True
                     self.matchfn = m
                     self._fmap = set(self._files)
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 def __iter__(self):
                     for f in self._files:
                         yield f
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     '''Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message.'''
                     pass
                 # If an explicitdir is set, it will be called when an explicitly listed
                 # directory is visited.
                 explicitdir = None
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 def rel(self, f):
                     '''Convert repo path back to path that is relative to cwd of matcher.'''
                     return util.pathto(self._root, self._cwd, f)
                 def files(self):
                     '''Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots'''
                     return self._files
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fmap
                 def anypats(self):
                     '''Matcher uses patterns or include/exclude.'''
                     return self._anypats
                 def always(self):
                     '''Matcher will match everything and .files() will be empty
                     - optimization might be possible and necessary.'''
                     return self._always
             class exact(match):
                 def __init__(self, root, cwd, files):
                     match.__init__(self, root, cwd, files, exact=True)
             class always(match):
                 def __init__(self, root, cwd):
                     match.__init__(self, root, cwd, [])
                     self._always = True
             class narrowmatcher(match):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                 >>> m2 = narrowmatcher('sub', m1)
                 >>> bool(m2('a.txt'))
                 False
                 >>> bool(m2('b.txt'))
                 True
                 >>> bool(m2.matchfn('a.txt'))
                 False
                 >>> bool(m2.matchfn('b.txt'))
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact('b.txt')
                 True
                 >>> m2.rel('b.txt')
                 'b.txt'
                 >>> def bad(f, msg):
                 ...     print "%s: %s" % (f, msg)
                 >>> m1.bad = bad
                 >>> m2.bad('x.txt', 'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     self._root = matcher._root
                     self._cwd = matcher._cwd
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher._always
                     self._files = [f[len(path) + 1:] for f in matcher._files
                                    if f.startswith(path + "/")]
                     self._anypats = matcher._anypats
                     self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                     self._fmap = set(self._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + "/" + f, msg)
             def patkind(pattern, default=None):
                 '''If pattern is 'kind:pat' with a known kind, return kind.'''
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if ':' in pattern:
                     kind, pat = pattern.split(':', 1)
                     if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                                 'listfile', 'listfile0', 'set'):
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r'''Convert an extended glob string to a regexp string.
                 >>> print _globre(r'?')
                 .
                 >>> print _globre(r'*')
                 [^/]*
                 >>> print _globre(r'**')
                 .*
+                >>> print _globre(r'**/a')
+                (?:.*/)?a
+                >>> print _globre(r'a/**/b')
+                a\/(?:.*/)?b
                 >>> print _globre(r'[a*?!^][^b][!c]')
                 [a*?!^][\^b][^c]
                 >>> print _globre(r'{a,b}')
                 (?:a|b)
                 >>> print _globre(r'.\*\?')
                 \.\*\?
                 '''
                 i, n = 0, len(pat)
                 res = ''
                 group = 0
                 escape = re.escape
                 def peek():
                     return i < n and pat[i]
                 while i < n:
                     c = pat[i]
                     i += 1
                     if c not in '*?[{},\\':
                         res += escape(c)
                     elif c == '*':
                         if peek() == '*':
                             i += 1
-                            res += '.*'
+                            if peek() == '/':
+                                i += 1
+                                res += '(?:.*/)?'
+                            else:
+                                res += '.*'
                         else:
                             res += '[^/]*'
                     elif c == '?':
                         res += '.'
                     elif c == '[':
                         j = i
                         if j < n and pat[j] in '!]':
                             j += 1
                         while j < n and pat[j] != ']':
                             j += 1
                         if j >= n:
                             res += '\\['
                         else:
                             stuff = pat[i:j].replace('\\','\\\\')
                             i = j + 1
                             if stuff[0] == '!':
                                 stuff = '^' + stuff[1:]
                             elif stuff[0] == '^':
                                 stuff = '\\' + stuff
                             res = '%s[%s]' % (res, stuff)
                     elif c == '{':
                         group += 1
                         res += '(?:'
                     elif c == '}' and group:
                         res += ')'
                         group -= 1
                     elif c == ',' and group:
                         res += '|'
                     elif c == '\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p)
                         else:
                             res += escape(c)
                     else:
                         res += escape(c)
                 return res
             def _regex(kind, pat, globsuffix):
                 '''Convert a (normalized) pattern of any kind into a regular expression.
                 globsuffix is appended to the regexp of globs.'''
                 if not pat:
                     return ''
                 if kind == 're':
                     return pat
                 if kind == 'path':
                     return '^' + re.escape(pat) + '(?:/|$)'
                 if kind == 'relglob':
                     return '(?:|.*/)' + _globre(pat) + globsuffix
                 if kind == 'relpath':
                     return re.escape(pat) + '(?:/|$)'
                 if kind == 'relre':
                     if pat.startswith('^'):
                         return pat
                     return '.*' + pat
                 return _globre(pat) + globsuffix
             def _buildmatch(ctx, kindpats, globsuffix):
                 '''Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs.'''
                 fset, kindpats = _expandsets(kindpats, ctx)
                 if not kindpats:
                     return "", fset.__contains__
                 regex, mf = _buildregexmatch(kindpats, globsuffix)
                 if fset:
                     return regex, lambda f: f in fset or mf(f)
                 return regex, mf
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function."""
                 try:
                     regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
                                                  for (k, p) in kindpats])
                     if len(regex) > 20000:
                         raise OverflowError
                     return regex, _rematcher(regex)
                 except OverflowError:
                     # We're using a Python with a tiny regex engine and we
                     # made it explode, so we'll divide the pattern list in two
                     # until it works
                     l = len(kindpats)
                     if l < 2:
                         raise
                     regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
                     regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
                     return regex, lambda s: a(s) or b(s)
                 except re.error:
                     for k, p in kindpats:
                         try:
                             _rematcher('(?:%s)' % _regex(k, p, globsuffix))
                         except re.error:
                             raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                     raise util.Abort(_("invalid pattern"))
             def _normalize(patterns, default, root, cwd, auditor):
                 '''Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded.'''
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in ('glob', 'relpath'):
                         pat = pathutil.canonpath(root, cwd, pat, auditor)
                     elif kind in ('relglob', 'path'):
                         pat = util.normpath(pat)
                     elif kind in ('listfile', 'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == 'listfile0':
                                 files = files.split('\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise util.Abort(_("unable to read file list (%s)") % pat)
                         kindpats += _normalize(files, default, root, cwd, auditor)
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat))
                 return kindpats
             def _roots(kindpats):
                 '''return roots and exact explicitly listed files from patterns
                 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
                 ['g', 'g', '.']
                 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
                 ['r', 'p/p', '.']
                 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
                 ['.', '.', '.']
                 '''
                 r = []
                 for kind, pat in kindpats:
                     if kind == 'glob': # find the non-glob prefix
                         root = []
                         for p in pat.split('/'):
                             if '[' in p or '{' in p or '*' in p or '?' in p:
                                 break
                             root.append(p)
                         r.append('/'.join(root) or '.')
                     elif kind in ('relpath', 'path'):
                         r.append(pat or '.')
                     else: # relglob, re, relre
                         r.append('.')
                 return r
             def _anypats(kindpats):
                 for kind, pat in kindpats:
                     if kind in ('glob', 're', 'relglob', 'relre', 'set'):
                         return True

               $ hg init
             Issue562: .hgignore requires newline at end:
               $ touch foo
               $ touch bar
               $ touch baz
               $ cat > makeignore.py <<EOF
               > f = open(".hgignore", "w")
               > f.write("ignore\n")
               > f.write("foo\n")
               > # No EOL here
               > f.write("bar")
               > f.close()
               > EOF
               $ python makeignore.py
             Should display baz only:
               $ hg status
               ? baz
               $ rm foo bar baz .hgignore makeignore.py
               $ touch a.o
               $ touch a.c
               $ touch syntax
               $ mkdir dir
               $ touch dir/a.o
               $ touch dir/b.o
               $ touch dir/c.o
               $ hg add dir/a.o
               $ hg commit -m 0
               $ hg add dir/b.o
               $ hg status
               A dir/b.o
               ? a.c
               ? a.o
               ? dir/c.o
               ? syntax
               $ echo "*.o" > .hgignore
               $ hg status
               abort: $TESTTMP/.hgignore: invalid pattern (relre): *.o (glob)
               [255]
               $ echo ".*\.o" > .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? syntax
             Check it does not ignore the current directory '.':
               $ echo "^\." > .hgignore
               $ hg status
               A dir/b.o
               ? a.c
               ? a.o
               ? dir/c.o
               ? syntax
               $ echo "glob:**.o" > .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? syntax
               $ echo "glob:*.o" > .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? syntax
               $ echo "syntax: glob" > .hgignore
               $ echo "re:.*\.o" >> .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? syntax
               $ echo "syntax: invalid" > .hgignore
               $ hg status
               $TESTTMP/.hgignore: ignoring invalid syntax 'invalid' (glob)
               A dir/b.o
               ? .hgignore
               ? a.c
               ? a.o
               ? dir/c.o
               ? syntax
               $ echo "syntax: glob" > .hgignore
               $ echo "*.o" >> .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? syntax
               $ echo "relglob:syntax*" > .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? a.o
               ? dir/c.o
               $ echo "relglob:*" > .hgignore
               $ hg status
               A dir/b.o
               $ cd dir
               $ hg status .
               A b.o
               $ hg debugignore
               (?:(?:|.*/)[^/]*(?:/|$))
               $ cd ..
             Check patterns that match only the directory
               $ echo "^dir\$" > .hgignore
               $ hg status
               A dir/b.o
               ? .hgignore
               ? a.c
               ? a.o
               ? syntax
+            Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
+              $ echo "syntax: glob" > .hgignore
+              $ echo "dir/**/c.o" >> .hgignore
+              $ touch dir/c.o
+              $ mkdir dir/subdir
+              $ touch dir/subdir/c.o
+              $ hg status
+              A dir/b.o
+              ? .hgignore
+              ? a.c
+              ? a.o
+              ? syntax