upstream/mercurial-mirror Commit - r51286:81c7d04f

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import bisect

9

import bisect

10

import copy

10

import copy

11

import itertools

11

import itertools

12

import os

12

import os

13

import re

13

import re

14

15

from .i18n import _

15

from .i18n import _

16

from .pycompat import open

16

from .pycompat import open

17

from . import (

17

from . import (

18

encoding,

18

encoding,

19

error,

19

error,

20

pathutil,

20

pathutil,

21

policy,

21

policy,

22

pycompat,

22

pycompat,

23

util,

23

util,

24

)

24

)

25

from .utils import stringutil

25

from .utils import stringutil

26

27

rustmod = policy.importrust('dirstate')

27

rustmod = policy.importrust('dirstate')

28

29

allpatternkinds = (

29

allpatternkinds = (

30

b're',

30

b're',

31

b'glob',

31

b'glob',

32

b'path',

32

b'path',

33

b'relglob',

33

b'relglob',

34

b'relpath',

34

b'relpath',

35

b'relre',

35

b'relre',

36

b'rootglob',

36

b'rootglob',

37

b'listfile',

37

b'listfile',

38

b'listfile0',

38

b'listfile0',

39

b'set',

39

b'set',

40

b'include',

40

b'include',

41

b'subinclude',

41

b'subinclude',

42

b'rootfilesin',

42

b'rootfilesin',

43

)

43

)

44

cwdrelativepatternkinds = (b'relpath', b'glob')

44

cwdrelativepatternkinds = (b'relpath', b'glob')

45

46

propertycache = util.propertycache

46

propertycache = util.propertycache

47

48

49

def _rematcher(regex):

49

def _rematcher(regex):

50

"""compile the regexp with the best available regexp engine and return a

50

"""compile the regexp with the best available regexp engine and return a

51

matcher function"""

51

matcher function"""

52

m = util.re.compile(regex)

52

m = util.re.compile(regex)

53

try:

53

try:

54

# slightly faster, provided by facebook's re2 bindings

54

# slightly faster, provided by facebook's re2 bindings

55

return m.test_match

55

return m.test_match

56

except AttributeError:

56

except AttributeError:

57

return m.match

57

return m.match

58

59

60

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

60

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

62

matchers = []

62

matchers = []

63

other = []

63

other = []

64

65

for kind, pat, source in kindpats:

65

for kind, pat, source in kindpats:

66

if kind == b'set':

66

if kind == b'set':

67

if ctx is None:

67

if ctx is None:

68

raise error.ProgrammingError(

68

raise error.ProgrammingError(

69

b"fileset expression with no context"

69

b"fileset expression with no context"

70

)

70

)

71

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

71

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

72

73

if listsubrepos:

73

if listsubrepos:

74

for subpath in ctx.substate:

74

for subpath in ctx.substate:

75

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

75

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

77

matchers.append(pm)

77

matchers.append(pm)

78

79

continue

79

continue

80

other.append((kind, pat, source))

80

other.append((kind, pat, source))

81

return matchers, other

81

return matchers, other

82

83

84

def _expandsubinclude(kindpats, root):

84

def _expandsubinclude(kindpats, root):

85

"""Returns the list of subinclude matcher args and the kindpats without the

85

"""Returns the list of subinclude matcher args and the kindpats without the

86

subincludes in it."""

86

subincludes in it."""

87

relmatchers = []

87

relmatchers = []

88

other = []

88

other = []

89

90

for kind, pat, source in kindpats:

90

for kind, pat, source in kindpats:

91

if kind == b'subinclude':

91

if kind == b'subinclude':

92

sourceroot = pathutil.dirname(util.normpath(source))

92

sourceroot = pathutil.dirname(util.normpath(source))

93

pat = util.pconvert(pat)

93

pat = util.pconvert(pat)

94

path = pathutil.join(sourceroot, pat)

94

path = pathutil.join(sourceroot, pat)

95

96

newroot = pathutil.dirname(path)

96

newroot = pathutil.dirname(path)

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

98

99

prefix = pathutil.canonpath(root, root, newroot)

99

prefix = pathutil.canonpath(root, root, newroot)

100

if prefix:

100

if prefix:

101

prefix += b'/'

101

prefix += b'/'

102

relmatchers.append((prefix, matcherargs))

102

relmatchers.append((prefix, matcherargs))

103

else:

103

else:

104

other.append((kind, pat, source))

104

other.append((kind, pat, source))

105

106

return relmatchers, other

106

return relmatchers, other

107

108

109

def _kindpatsalwaysmatch(kindpats):

109

def _kindpatsalwaysmatch(kindpats):

110

"""Checks whether the kindspats match everything, as e.g.

110

"""Checks whether the kindspats match everything, as e.g.

111

'relpath:.' does.

111

'relpath:.' does.

112

"""

112

"""

113

for kind, pat, source in kindpats:

113

for kind, pat, source in kindpats:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

115

return False

115

return False

116

return True

116

return True

117

118

119

def _buildkindpatsmatcher(

119

def _buildkindpatsmatcher(

120

matchercls,

120

matchercls,

121

root,

121

root,

122

cwd,

122

cwd,

123

kindpats,

123

kindpats,

124

ctx=None,

124

ctx=None,

125

listsubrepos=False,

125

listsubrepos=False,

126

badfn=None,

126

badfn=None,

127

):

127

):

128

matchers = []

128

matchers = []

129

fms, kindpats = _expandsets(

129

fms, kindpats = _expandsets(

130

cwd,

130

cwd,

131

kindpats,

131

kindpats,

132

ctx=ctx,

132

ctx=ctx,

133

listsubrepos=listsubrepos,

133

listsubrepos=listsubrepos,

134

badfn=badfn,

134

badfn=badfn,

135

)

135

)

136

if kindpats:

136

if kindpats:

137

m = matchercls(root, kindpats, badfn=badfn)

137

m = matchercls(root, kindpats, badfn=badfn)

138

matchers.append(m)

138

matchers.append(m)

139

if fms:

139

if fms:

140

matchers.extend(fms)

140

matchers.extend(fms)

141

if not matchers:

141

if not matchers:

142

return nevermatcher(badfn=badfn)

142

return nevermatcher(badfn=badfn)

143

if len(matchers) == 1:

143

if len(matchers) == 1:

144

return matchers[0]

144

return matchers[0]

145

return unionmatcher(matchers)

145

return unionmatcher(matchers)

146

147

148

def match(

148

def match(

149

root,

149

root,

150

cwd,

150

cwd,

151

patterns=None,

151

patterns=None,

152

include=None,

152

include=None,

153

exclude=None,

153

exclude=None,

154

default=b'glob',

154

default=b'glob',

155

auditor=None,

155

auditor=None,

156

ctx=None,

156

ctx=None,

157

listsubrepos=False,

157

listsubrepos=False,

158

warn=None,

158

warn=None,

159

badfn=None,

159

badfn=None,

160

icasefs=False,

160

icasefs=False,

161

):

161

):

162

r"""build an object to match a set of file patterns

162

r"""build an object to match a set of file patterns

163

164

arguments:

164

arguments:

165

root - the canonical root of the tree you're matching against

165

root - the canonical root of the tree you're matching against

166

cwd - the current working directory, if relevant

166

cwd - the current working directory, if relevant

167

patterns - patterns to find

167

patterns - patterns to find

168

include - patterns to include (unless they are excluded)

168

include - patterns to include (unless they are excluded)

169

exclude - patterns to exclude (even if they are included)

169

exclude - patterns to exclude (even if they are included)

170

default - if a pattern in patterns has no explicit type, assume this one

170

default - if a pattern in patterns has no explicit type, assume this one

171

auditor - optional path auditor

171

auditor - optional path auditor

172

ctx - optional changecontext

172

ctx - optional changecontext

173

listsubrepos - if True, recurse into subrepositories

173

listsubrepos - if True, recurse into subrepositories

174

warn - optional function used for printing warnings

174

warn - optional function used for printing warnings

175

badfn - optional bad() callback for this matcher instead of the default

175

badfn - optional bad() callback for this matcher instead of the default

176

icasefs - make a matcher for wdir on case insensitive filesystems, which

176

icasefs - make a matcher for wdir on case insensitive filesystems, which

177

normalizes the given patterns to the case in the filesystem

177

normalizes the given patterns to the case in the filesystem

178

179

a pattern is one of:

179

a pattern is one of:

180

'glob:<glob>' - a glob relative to cwd

180

'glob:<glob>' - a glob relative to cwd

181

're:<regexp>' - a regular expression

181

're:<regexp>' - a regular expression

182

'path:<path>' - a path relative to repository root, which is matched

182

'path:<path>' - a path relative to repository root, which is matched

183

recursively

183

recursively

184

'rootfilesin:<path>' - a path relative to repository root, which is

184

'rootfilesin:<path>' - a path relative to repository root, which is

185

matched non-recursively (will not match subdirectories)

185

matched non-recursively (will not match subdirectories)

186

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

186

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

187

'relpath:<path>' - a path relative to cwd

187

'relpath:<path>' - a path relative to cwd

188

'relre:<regexp>' - a regexp that needn't match the start of a name

188

'relre:<regexp>' - a regexp that needn't match the start of a name

189

'set:<fileset>' - a fileset expression

189

'set:<fileset>' - a fileset expression

190

'include:<path>' - a file of patterns to read and include

190

'include:<path>' - a file of patterns to read and include

191

'subinclude:<path>' - a file of patterns to match against files under

191

'subinclude:<path>' - a file of patterns to match against files under

192

the same directory

192

the same directory

193

'<something>' - a pattern of the specified default type

193

'<something>' - a pattern of the specified default type

194

195

>>> def _match(root, *args, **kwargs):

195

>>> def _match(root, *args, **kwargs):

196

... return match(util.localpath(root), *args, **kwargs)

196

... return match(util.localpath(root), *args, **kwargs)

197

198

Usually a patternmatcher is returned:

198

Usually a patternmatcher is returned:

199

>>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])

199

>>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])

200

200

201

202

Combining 'patterns' with 'include' (resp. 'exclude') gives an

202

Combining 'patterns' with 'include' (resp. 'exclude') gives an

203

intersectionmatcher (resp. a differencematcher):

203

intersectionmatcher (resp. a differencematcher):

204

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))

204

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))

205

205

206

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))

206

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))

207

207

208

209

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

209

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

210

>>> _match(b'/foo', b'.', [])

210

>>> _match(b'/foo', b'.', [])

211

211

212

213

The 'default' argument determines which kind of pattern is assumed if a

213

The 'default' argument determines which kind of pattern is assumed if a

214

pattern has no prefix:

214

pattern has no prefix:

215

>>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')

215

>>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')

216

216

217

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

217

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

218

218

219

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

219

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

220

220

221

222

The primary use of matchers is to check whether a value (usually a file

222

The primary use of matchers is to check whether a value (usually a file

223

name) matches againset one of the patterns given at initialization. There

223

name) matches againset one of the patterns given at initialization. There

224

are two ways of doing this check.

224

are two ways of doing this check.

225

226

>>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])

226

>>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])

227

228

1. Calling the matcher with a file name returns True if any pattern

228

1. Calling the matcher with a file name returns True if any pattern

229

matches that file name:

229

matches that file name:

230

>>> m(b'a')

230

>>> m(b'a')

231

True

231

True

232

>>> m(b'main.c')

232

>>> m(b'main.c')

233

True

233

True

234

>>> m(b'test.py')

234

>>> m(b'test.py')

235

False

235

False

236

237

2. Using the exact() method only returns True if the file name matches one

237

2. Using the exact() method only returns True if the file name matches one

238

of the exact patterns (i.e. not re: or glob: patterns):

238

of the exact patterns (i.e. not re: or glob: patterns):

239

>>> m.exact(b'a')

239

>>> m.exact(b'a')

240

True

240

True

241

>>> m.exact(b'main.c')

241

>>> m.exact(b'main.c')

242

False

242

False

243

"""

243

"""

244

assert os.path.isabs(root)

244

assert os.path.isabs(root)

245

cwd = os.path.join(root, util.localpath(cwd))

245

cwd = os.path.join(root, util.localpath(cwd))

246

normalize = _donormalize

246

normalize = _donormalize

247

if icasefs:

247

if icasefs:

248

dirstate = ctx.repo().dirstate

248

dirstate = ctx.repo().dirstate

249

dsnormalize = dirstate.normalize

249

dsnormalize = dirstate.normalize

250

251

def normalize(patterns, default, root, cwd, auditor, warn):

251

def normalize(patterns, default, root, cwd, auditor, warn):

252

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

252

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

253

kindpats = []

253

kindpats = []

254

for kind, pats, source in kp:

254

for kind, pats, source in kp:

255

if kind not in (b're', b'relre'): # regex can't be normalized

255

if kind not in (b're', b'relre'): # regex can't be normalized

256

p = pats

256

p = pats

257

pats = dsnormalize(pats)

257

pats = dsnormalize(pats)

258

259

# Preserve the original to handle a case only rename.

259

# Preserve the original to handle a case only rename.

260

if p != pats and p in dirstate:

260

if p != pats and p in dirstate:

261

kindpats.append((kind, p, source))

261

kindpats.append((kind, p, source))

262

263

kindpats.append((kind, pats, source))

263

kindpats.append((kind, pats, source))

264

return kindpats

264

return kindpats

265

266

if patterns:

266

if patterns:

267

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

267

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

268

if _kindpatsalwaysmatch(kindpats):

268

if _kindpatsalwaysmatch(kindpats):

269

m = alwaysmatcher(badfn)

269

m = alwaysmatcher(badfn)

270

else:

270

else:

271

m = _buildkindpatsmatcher(

271

m = _buildkindpatsmatcher(

272

patternmatcher,

272

patternmatcher,

273

root,

273

root,

274

cwd,

274

cwd,

275

kindpats,

275

kindpats,

276

ctx=ctx,

276

ctx=ctx,

277

listsubrepos=listsubrepos,

277

listsubrepos=listsubrepos,

278

badfn=badfn,

278

badfn=badfn,

279

)

279

)

280

else:

280

else:

281

# It's a little strange that no patterns means to match everything.

281

# It's a little strange that no patterns means to match everything.

282

# Consider changing this to match nothing (probably using nevermatcher).

282

# Consider changing this to match nothing (probably using nevermatcher).

283

m = alwaysmatcher(badfn)

283

m = alwaysmatcher(badfn)

284

285

if include:

285

if include:

286

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

286

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

287

im = _buildkindpatsmatcher(

287

im = _buildkindpatsmatcher(

288

includematcher,

288

includematcher,

289

root,

289

root,

290

cwd,

290

cwd,

291

kindpats,

291

kindpats,

292

ctx=ctx,

292

ctx=ctx,

293

listsubrepos=listsubrepos,

293

listsubrepos=listsubrepos,

294

badfn=None,

294

badfn=None,

295

)

295

)

296

m = intersectmatchers(m, im)

296

m = intersectmatchers(m, im)

297

if exclude:

297

if exclude:

298

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

298

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

299

em = _buildkindpatsmatcher(

299

em = _buildkindpatsmatcher(

300

includematcher,

300

includematcher,

301

root,

301

root,

302

cwd,

302

cwd,

303

kindpats,

303

kindpats,

304

ctx=ctx,

304

ctx=ctx,

305

listsubrepos=listsubrepos,

305

listsubrepos=listsubrepos,

306

badfn=None,

306

badfn=None,

307

)

307

)

308

m = differencematcher(m, em)

308

m = differencematcher(m, em)

309

return m

309

return m

310

311

312

def exact(files, badfn=None):

312

def exact(files, badfn=None):

313

return exactmatcher(files, badfn=badfn)

313

return exactmatcher(files, badfn=badfn)

314

315

316

def always(badfn=None):

316

def always(badfn=None):

317

return alwaysmatcher(badfn)

317

return alwaysmatcher(badfn)

318

319

320

def never(badfn=None):

320

def never(badfn=None):

321

return nevermatcher(badfn)

321

return nevermatcher(badfn)

322

323

324

def badmatch(match, badfn):

324

def badmatch(match, badfn):

325

"""Make a copy of the given matcher, replacing its bad method with the given

325

"""Make a copy of the given matcher, replacing its bad method with the given

326

one.

326

one.

327

"""

327

"""

328

m = copy.copy(match)

328

m = copy.copy(match)

329

m.bad = badfn

329

m.bad = badfn

330

return m

330

return m

331

332

333

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

333

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

334

"""Convert 'kind:pat' from the patterns list to tuples with kind and

334

"""Convert 'kind:pat' from the patterns list to tuples with kind and

335

normalized and rooted patterns and with listfiles expanded."""

335

normalized and rooted patterns and with listfiles expanded."""

336

kindpats = []

336

kindpats = []

337

for kind, pat in [_patsplit(p, default) for p in patterns]:

337

for kind, pat in [_patsplit(p, default) for p in patterns]:

338

if kind in cwdrelativepatternkinds:

338

if kind in cwdrelativepatternkinds:

339

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

339

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

340

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

340

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

341

pat = util.normpath(pat)

341

pat = util.normpath(pat)

342

elif kind in (b'listfile', b'listfile0'):

342

elif kind in (b'listfile', b'listfile0'):

343

try:

343

try:

344

files = util.readfile(pat)

344

files = util.readfile(pat)

345

if kind == b'listfile0':

345

if kind == b'listfile0':

346

files = files.split(b'\0')

346

files = files.split(b'\0')

347

else:

347

else:

348

files = files.splitlines()

348

files = files.splitlines()

349

files = [f for f in files if f]

349

files = [f for f in files if f]

350

except EnvironmentError:

350

except EnvironmentError:

351

raise error.Abort(_(b"unable to read file list (%s)") % pat)

351

raise error.Abort(_(b"unable to read file list (%s)") % pat)

352

for k, p, source in _donormalize(

352

for k, p, source in _donormalize(

353

files, default, root, cwd, auditor, warn

353

files, default, root, cwd, auditor, warn

354

):

354

):

355

kindpats.append((k, p, pat))

355

kindpats.append((k, p, pat))

356

continue

356

continue

357

elif kind == b'include':

357

elif kind == b'include':

358

try:

358

try:

359

fullpath = os.path.join(root, util.localpath(pat))

359

fullpath = os.path.join(root, util.localpath(pat))

360

includepats = readpatternfile(fullpath, warn)

360

includepats = readpatternfile(fullpath, warn)

361

for k, p, source in _donormalize(

361

for k, p, source in _donormalize(

362

includepats, default, root, cwd, auditor, warn

362

includepats, default, root, cwd, auditor, warn

363

):

363

):

364

kindpats.append((k, p, source or pat))

364

kindpats.append((k, p, source or pat))

365

except error.Abort as inst:

365

except error.Abort as inst:

366

raise error.Abort(

366

raise error.Abort(

367

b'%s: %s'

367

b'%s: %s'

368

% (

368

% (

369

pat,

369

pat,

370

inst.message,

370

inst.message,

371

)

371

)

372

)

372

)

373

except IOError as inst:

373

except IOError as inst:

374

if warn:

374

if warn:

375

warn(

375

warn(

376

_(b"skipping unreadable pattern file '%s': %s\n")

376

_(b"skipping unreadable pattern file '%s': %s\n")

377

% (pat, stringutil.forcebytestr(inst.strerror))

377

% (pat, stringutil.forcebytestr(inst.strerror))

378

)

378

)

379

continue

379

continue

380

# else: re or relre - which cannot be normalized

380

# else: re or relre - which cannot be normalized

381

kindpats.append((kind, pat, b''))

381

kindpats.append((kind, pat, b''))

382

return kindpats

382

return kindpats

383

384

385

class basematcher:

385

class basematcher:

386

def __init__(self, badfn=None):

386

def __init__(self, badfn=None):

387

if badfn is not None:

387

if badfn is not None:

388

self.bad = badfn

388

self.bad = badfn

389

390

def __call__(self, fn):

390

def __call__(self, fn):

391

return self.matchfn(fn)

391

return self.matchfn(fn)

392

393

# Callbacks related to how the matcher is used by dirstate.walk.

393

# Callbacks related to how the matcher is used by dirstate.walk.

394

# Subscribers to these events must monkeypatch the matcher object.

394

# Subscribers to these events must monkeypatch the matcher object.

395

def bad(self, f, msg):

395

def bad(self, f, msg):

396

"""Callback from dirstate.walk for each explicit file that can't be

396

"""Callback from dirstate.walk for each explicit file that can't be

397

found/accessed, with an error message."""

397

found/accessed, with an error message."""

398

399

# If an traversedir is set, it will be called when a directory discovered

399

# If an traversedir is set, it will be called when a directory discovered

400

# by recursive traversal is visited.

400

# by recursive traversal is visited.

401

traversedir = None

401

traversedir = None

402

403

@propertycache

403

@propertycache

404

def _files(self):

404

def _files(self):

405

return []

405

return []

406

407

def files(self):

407

def files(self):

408

"""Explicitly listed files or patterns or roots:

408

"""Explicitly listed files or patterns or roots:

409

if no patterns or .always(): empty list,

409

if no patterns or .always(): empty list,

410

if exact: list exact files,

410

if exact: list exact files,

411

if not .anypats(): list all files and dirs,

411

if not .anypats(): list all files and dirs,

412

else: optimal roots"""

412

else: optimal roots"""

413

return self._files

413

return self._files

414

415

@propertycache

415

@propertycache

416

def _fileset(self):

416

def _fileset(self):

417

return set(self._files)

417

return set(self._files)

418

419

def exact(self, f):

419

def exact(self, f):

420

'''Returns True if f is in .files().'''

420

'''Returns True if f is in .files().'''

421

return f in self._fileset

421

return f in self._fileset

422

423

def matchfn(self, f):

423

def matchfn(self, f):

424

return False

424

return False

425

426

def visitdir(self, dir):

426

def visitdir(self, dir):

427

"""Decides whether a directory should be visited based on whether it

427

"""Decides whether a directory should be visited based on whether it

428

has potential matches in it or one of its subdirectories. This is

428

has potential matches in it or one of its subdirectories. This is

429

based on the match's primary, included, and excluded patterns.

429

based on the match's primary, included, and excluded patterns.

430

431

Returns the string 'all' if the given directory and all subdirectories

431

Returns the string 'all' if the given directory and all subdirectories

432

should be visited. Otherwise returns True or False indicating whether

432

should be visited. Otherwise returns True or False indicating whether

433

the given directory should be visited.

433

the given directory should be visited.

434

"""

434

"""

435

return True

435

return True

436

437

def visitchildrenset(self, dir):

437

def visitchildrenset(self, dir):

438

"""Decides whether a directory should be visited based on whether it

438

"""Decides whether a directory should be visited based on whether it

439

has potential matches in it or one of its subdirectories, and

439

has potential matches in it or one of its subdirectories, and

440

potentially lists which subdirectories of that directory should be

440

potentially lists which subdirectories of that directory should be

441

visited. This is based on the match's primary, included, and excluded

441

visited. This is based on the match's primary, included, and excluded

442

patterns.

442

patterns.

443

444

This function is very similar to 'visitdir', and the following mapping

444

This function is very similar to 'visitdir', and the following mapping

445

can be applied:

445

can be applied:

446

447

visitdir | visitchildrenlist

447

visitdir | visitchildrenlist

448

----------+-------------------

448

----------+-------------------

449

False | set()

449

False | set()

450

'all' | 'all'

450

'all' | 'all'

451

True | 'this' OR non-empty set of subdirs -or files- to visit

451

True | 'this' OR non-empty set of subdirs -or files- to visit

452

453

Example:

453

Example:

454

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

454

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

455

the following values (assuming the implementation of visitchildrenset

455

the following values (assuming the implementation of visitchildrenset

456

is capable of recognizing this; some implementations are not).

456

is capable of recognizing this; some implementations are not).

457

458

'' -> {'foo', 'qux'}

458

'' -> {'foo', 'qux'}

459

'baz' -> set()

459

'baz' -> set()

460

'foo' -> {'bar'}

460

'foo' -> {'bar'}

461

# Ideally this would be 'all', but since the prefix nature of matchers

461

# Ideally this would be 'all', but since the prefix nature of matchers

462

# is applied to the entire matcher, we have to downgrade this to

462

# is applied to the entire matcher, we have to downgrade this to

463

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

463

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

464

# in.

464

# in.

465

'foo/bar' -> 'this'

465

'foo/bar' -> 'this'

466

'qux' -> 'this'

466

'qux' -> 'this'

467

468

Important:

468

Important:

469

Most matchers do not know if they're representing files or

469

Most matchers do not know if they're representing files or

470

directories. They see ['path:dir/f'] and don't know whether 'f' is a

470

directories. They see ['path:dir/f'] and don't know whether 'f' is a

471

file or a directory, so visitchildrenset('dir') for most matchers will

471

file or a directory, so visitchildrenset('dir') for most matchers will

472

return {'f'}, but if the matcher knows it's a file (like exactmatcher

472

return {'f'}, but if the matcher knows it's a file (like exactmatcher

473

does), it may return 'this'. Do not rely on the return being a set

473

does), it may return 'this'. Do not rely on the return being a set

474

indicating that there are no files in this dir to investigate (or

474

indicating that there are no files in this dir to investigate (or

475

equivalently that if there are files to investigate in 'dir' that it

475

equivalently that if there are files to investigate in 'dir' that it

476

will always return 'this').

476

will always return 'this').

477

"""

477

"""

478

return b'this'

478

return b'this'

479

480

def always(self):

480

def always(self):

481

"""Matcher will match everything and .files() will be empty --

481

"""Matcher will match everything and .files() will be empty --

482

optimization might be possible."""

482

optimization might be possible."""

483

return False

483

return False

484

485

def isexact(self):

485

def isexact(self):

486

"""Matcher will match exactly the list of files in .files() --

486

"""Matcher will match exactly the list of files in .files() --

487

optimization might be possible."""

487

optimization might be possible."""

488

return False

488

return False

489

490

def prefix(self):

490

def prefix(self):

491

"""Matcher will match the paths in .files() recursively --

491

"""Matcher will match the paths in .files() recursively --

492

optimization might be possible."""

492

optimization might be possible."""

493

return False

493

return False

494

495

def anypats(self):

495

def anypats(self):

496

"""None of .always(), .isexact(), and .prefix() is true --

496

"""None of .always(), .isexact(), and .prefix() is true --

497

optimizations will be difficult."""

497

optimizations will be difficult."""

498

return not self.always() and not self.isexact() and not self.prefix()

498

return not self.always() and not self.isexact() and not self.prefix()

499

500

501

class alwaysmatcher(basematcher):

501

class alwaysmatcher(basematcher):

502

'''Matches everything.'''

502

'''Matches everything.'''

503

504

def __init__(self, badfn=None):

504

def __init__(self, badfn=None):

505

super(alwaysmatcher, self).__init__(badfn)

505

super(alwaysmatcher, self).__init__(badfn)

506

507

def always(self):

507

def always(self):

508

return True

508

return True

509

510

def matchfn(self, f):

510

def matchfn(self, f):

511

return True

511

return True

512

513

def visitdir(self, dir):

513

def visitdir(self, dir):

514

return b'all'

514

return b'all'

515

516

def visitchildrenset(self, dir):

516

def visitchildrenset(self, dir):

517

return b'all'

517

return b'all'

518

519

def __repr__(self):

519

def __repr__(self):

520

return r'<alwaysmatcher>'

520

return r'<alwaysmatcher>'

521

522

523

class nevermatcher(basematcher):

523

class nevermatcher(basematcher):

524

'''Matches nothing.'''

524

'''Matches nothing.'''

525

526

def __init__(self, badfn=None):

526

def __init__(self, badfn=None):

527

super(nevermatcher, self).__init__(badfn)

527

super(nevermatcher, self).__init__(badfn)

528

529

# It's a little weird to say that the nevermatcher is an exact matcher

529

# It's a little weird to say that the nevermatcher is an exact matcher

530

# or a prefix matcher, but it seems to make sense to let callers take

530

# or a prefix matcher, but it seems to make sense to let callers take

531

# fast paths based on either. There will be no exact matches, nor any

531

# fast paths based on either. There will be no exact matches, nor any

532

# prefixes (files() returns []), so fast paths iterating over them should

532

# prefixes (files() returns []), so fast paths iterating over them should

533

# be efficient (and correct).

533

# be efficient (and correct).

534

def isexact(self):

534

def isexact(self):

535

return True

535

return True

536

537

def prefix(self):

537

def prefix(self):

538

return True

538

return True

539

540

def visitdir(self, dir):

540

def visitdir(self, dir):

541

return False

541

return False

542

543

def visitchildrenset(self, dir):

543

def visitchildrenset(self, dir):

544

return set()

544

return set()

545

546

def __repr__(self):

546

def __repr__(self):

547

return r'<nevermatcher>'

547

return r'<nevermatcher>'

548

549

550

class predicatematcher(basematcher):

550

class predicatematcher(basematcher):

551

"""A matcher adapter for a simple boolean function"""

551

"""A matcher adapter for a simple boolean function"""

552

553

def __init__(self, predfn, predrepr=None, badfn=None):

553

def __init__(self, predfn, predrepr=None, badfn=None):

554

super(predicatematcher, self).__init__(badfn)

554

super(predicatematcher, self).__init__(badfn)

555

self.matchfn = predfn

555

self.matchfn = predfn

556

self._predrepr = predrepr

556

self._predrepr = predrepr

557

558

@encoding.strmethod

558

@encoding.strmethod

559

def __repr__(self):

559

def __repr__(self):

560

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

560

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

561

self.matchfn

561

self.matchfn

562

)

562

)

563

return b'<predicatenmatcher pred=%s>' % s

563

return b'<predicatenmatcher pred=%s>' % s

564

565

566

def path_or_parents_in_set(path, prefix_set):

566

def path_or_parents_in_set(path, prefix_set):

567

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

567

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

568

l = len(prefix_set)

568

l = len(prefix_set)

569

if l == 0:

569

if l == 0:

570

return False

570

return False

571

if path in prefix_set:

571

if path in prefix_set:

572

return True

572

return True

573

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

573

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

574

# "walk up" the directory hierarchy instead, with the assumption that most

574

# "walk up" the directory hierarchy instead, with the assumption that most

575

# directory hierarchies are relatively shallow and hash lookup is cheap.

575

# directory hierarchies are relatively shallow and hash lookup is cheap.

576

if l > 5:

576

if l > 5:

577

return any(

577

return any(

578

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

578

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

579

)

579

)

580

581

# FIXME: Ideally we'd never get to this point if this is the case - we'd

581

# FIXME: Ideally we'd never get to this point if this is the case - we'd

582

# recognize ourselves as an 'always' matcher and skip this.

582

# recognize ourselves as an 'always' matcher and skip this.

583

if b'' in prefix_set:

583

if b'' in prefix_set:

584

return True

584

return True

585

586

sl = ord(b'/')

586

sl = ord(b'/')

587

588

# We already checked that path isn't in prefix_set exactly, so

588

# We already checked that path isn't in prefix_set exactly, so

589

# `path[len(pf)] should never raise IndexError.

589

# `path[len(pf)] should never raise IndexError.

590

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

590

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

591

592

593

class patternmatcher(basematcher):

593

class patternmatcher(basematcher):

594

r"""Matches a set of (kind, pat, source) against a 'root' directory.

594

r"""Matches a set of (kind, pat, source) against a 'root' directory.

595

596

>>> kindpats = [

596

>>> kindpats = [

597

... (b're', br'.*\.c$', b''),

597

... (b're', br'.*\.c$', b''),

598

... (b'path', b'foo/a', b''),

598

... (b'path', b'foo/a', b''),

599

... (b'relpath', b'b', b''),

599

... (b'relpath', b'b', b''),

600

... (b'glob', b'*.h', b''),

600

... (b'glob', b'*.h', b''),

601

... ]

601

... ]

602

>>> m = patternmatcher(b'foo', kindpats)

602

>>> m = patternmatcher(b'foo', kindpats)

603

>>> m(b'main.c') # matches re:.*\.c$

603

>>> m(b'main.c') # matches re:.*\.c$

604

True

604

True

605

>>> m(b'b.txt')

605

>>> m(b'b.txt')

606

False

606

False

607

>>> m(b'foo/a') # matches path:foo/a

607

>>> m(b'foo/a') # matches path:foo/a

608

True

608

True

609

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

609

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

610

False

610

False

611

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

611

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

612

True

612

True

613

>>> m(b'lib.h') # matches glob:*.h

613

>>> m(b'lib.h') # matches glob:*.h

614

True

614

True

615

616

>>> m.files()

616

>>> m.files()

617

[b'', b'foo/a', b'', b'b']

617

[b'', b'foo/a', b'', b'b']

618

>>> m.exact(b'foo/a')

618

>>> m.exact(b'foo/a')

619

True

619

True

620

>>> m.exact(b'b')

620

>>> m.exact(b'b')

621

True

621

True

622

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

622

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

623

False

623

False

624

"""

624

"""

625

626

def __init__(self, root, kindpats, badfn=None):

626

def __init__(self, root, kindpats, badfn=None):

627

super(patternmatcher, self).__init__(badfn)

627

super(patternmatcher, self).__init__(badfn)

628

kindpats.sort()

628

kindpats.sort()

629

630

self._files = _explicitfiles(kindpats)

630

self._files = _explicitfiles(kindpats)

631

self._prefix = _prefix(kindpats)

631

self._prefix = _prefix(kindpats)

632

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

632

self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)

633

634

def matchfn(self, fn):

635

if fn in self._fileset:

636

return True

637

return self._matchfn(fn)

633

638

634

@propertycache

639

@propertycache

635

def _dirs(self):

640

def _dirs(self):

636

return set(pathutil.dirs(self._fileset))

641

return set(pathutil.dirs(self._fileset))

637

642

638

def visitdir(self, dir):

643

def visitdir(self, dir):

639

if self._prefix and dir in self._fileset:

644

if self._prefix and dir in self._fileset:

640

return b'all'

645

return b'all'

641

return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)

646

return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)

642

647

643

def visitchildrenset(self, dir):

648

def visitchildrenset(self, dir):

644

ret = self.visitdir(dir)

649

ret = self.visitdir(dir)

645

if ret is True:

650

if ret is True:

646

return b'this'

651

return b'this'

647

elif not ret:

652

elif not ret:

648

return set()

653

return set()

649

assert ret == b'all'

654

assert ret == b'all'

650

return b'all'

655

return b'all'

651

656

652

def prefix(self):

657

def prefix(self):

653

return self._prefix

658

return self._prefix

654

659

655

@encoding.strmethod

660

@encoding.strmethod

656

def __repr__(self):

661

def __repr__(self):

657

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

662

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

658

663

659

664

660

# This is basically a reimplementation of pathutil.dirs that stores the

665

# This is basically a reimplementation of pathutil.dirs that stores the

661

# children instead of just a count of them, plus a small optional optimization

666

# children instead of just a count of them, plus a small optional optimization

662

# to avoid some directories we don't need.

667

# to avoid some directories we don't need.

663

class _dirchildren:

668

class _dirchildren:

664

def __init__(self, paths, onlyinclude=None):

669

def __init__(self, paths, onlyinclude=None):

665

self._dirs = {}

670

self._dirs = {}

666

self._onlyinclude = onlyinclude or []

671

self._onlyinclude = onlyinclude or []

667

addpath = self.addpath

672

addpath = self.addpath

668

for f in paths:

673

for f in paths:

669

addpath(f)

674

addpath(f)

670

675

671

def addpath(self, path):

676

def addpath(self, path):

672

if path == b'':

677

if path == b'':

673

return

678

return

674

dirs = self._dirs

679

dirs = self._dirs

675

findsplitdirs = _dirchildren._findsplitdirs

680

findsplitdirs = _dirchildren._findsplitdirs

676

for d, b in findsplitdirs(path):

681

for d, b in findsplitdirs(path):

677

if d not in self._onlyinclude:

682

if d not in self._onlyinclude:

678

continue

683

continue

679

dirs.setdefault(d, set()).add(b)

684

dirs.setdefault(d, set()).add(b)

680

685

681

@staticmethod

686

@staticmethod

682

def _findsplitdirs(path):

687

def _findsplitdirs(path):

683

# yields (dirname, basename) tuples, walking back to the root. This is

688

# yields (dirname, basename) tuples, walking back to the root. This is

684

# very similar to pathutil.finddirs, except:

689

# very similar to pathutil.finddirs, except:

685

# - produces a (dirname, basename) tuple, not just 'dirname'

690

# - produces a (dirname, basename) tuple, not just 'dirname'

686

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

691

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

687

# slash.

692

# slash.

688

oldpos = len(path)

693

oldpos = len(path)

689

pos = path.rfind(b'/')

694

pos = path.rfind(b'/')

690

while pos != -1:

695

while pos != -1:

691

yield path[:pos], path[pos + 1 : oldpos]

696

yield path[:pos], path[pos + 1 : oldpos]

692

oldpos = pos

697

oldpos = pos

693

pos = path.rfind(b'/', 0, pos)

698

pos = path.rfind(b'/', 0, pos)

694

yield b'', path[:oldpos]

699

yield b'', path[:oldpos]

695

700

696

def get(self, path):

701

def get(self, path):

697

return self._dirs.get(path, set())

702

return self._dirs.get(path, set())

698

703

699

704

700

class includematcher(basematcher):

705

class includematcher(basematcher):

701

def __init__(self, root, kindpats, badfn=None):

706

def __init__(self, root, kindpats, badfn=None):

702

super(includematcher, self).__init__(badfn)

707

super(includematcher, self).__init__(badfn)

703

if rustmod is not None:

708

if rustmod is not None:

704

# We need to pass the patterns to Rust because they can contain

709

# We need to pass the patterns to Rust because they can contain

705

# patterns from the user interface

710

# patterns from the user interface

706

self._kindpats = kindpats

711

self._kindpats = kindpats

707

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

712

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

708

self._prefix = _prefix(kindpats)

713

self._prefix = _prefix(kindpats)

709

roots, dirs, parents = _rootsdirsandparents(kindpats)

714

roots, dirs, parents = _rootsdirsandparents(kindpats)

710

# roots are directories which are recursively included.

715

# roots are directories which are recursively included.

711

self._roots = set(roots)

716

self._roots = set(roots)

712

# dirs are directories which are non-recursively included.

717

# dirs are directories which are non-recursively included.

713

self._dirs = set(dirs)

718

self._dirs = set(dirs)

714

# parents are directories which are non-recursively included because

719

# parents are directories which are non-recursively included because

715

# they are needed to get to items in _dirs or _roots.

720

# they are needed to get to items in _dirs or _roots.

716

self._parents = parents

721

self._parents = parents

717

722

718

def visitdir(self, dir):

723

def visitdir(self, dir):

719

if self._prefix and dir in self._roots:

724

if self._prefix and dir in self._roots:

720

return b'all'

725

return b'all'

721

return (

726

return (

722

dir in self._dirs

727

dir in self._dirs

723

or dir in self._parents

728

or dir in self._parents

724

or path_or_parents_in_set(dir, self._roots)

729

or path_or_parents_in_set(dir, self._roots)

725

)

730

)

726

731

727

@propertycache

732

@propertycache

728

def _allparentschildren(self):

733

def _allparentschildren(self):

729

# It may seem odd that we add dirs, roots, and parents, and then

734

# It may seem odd that we add dirs, roots, and parents, and then

730

# restrict to only parents. This is to catch the case of:

735

# restrict to only parents. This is to catch the case of:

731

# dirs = ['foo/bar']

736

# dirs = ['foo/bar']

732

# parents = ['foo']

737

# parents = ['foo']

733

# if we asked for the children of 'foo', but had only added

738

# if we asked for the children of 'foo', but had only added

734

# self._parents, we wouldn't be able to respond ['bar'].

739

# self._parents, we wouldn't be able to respond ['bar'].

735

return _dirchildren(

740

return _dirchildren(

736

itertools.chain(self._dirs, self._roots, self._parents),

741

itertools.chain(self._dirs, self._roots, self._parents),

737

onlyinclude=self._parents,

742

onlyinclude=self._parents,

738

)

743

)

739

744

740

def visitchildrenset(self, dir):

745

def visitchildrenset(self, dir):

741

if self._prefix and dir in self._roots:

746

if self._prefix and dir in self._roots:

742

return b'all'

747

return b'all'

743

# Note: this does *not* include the 'dir in self._parents' case from

748

# Note: this does *not* include the 'dir in self._parents' case from

744

# visitdir, that's handled below.

749

# visitdir, that's handled below.

745

if (

750

if (

746

b'' in self._roots

751

b'' in self._roots

747

or dir in self._dirs

752

or dir in self._dirs

748

or path_or_parents_in_set(dir, self._roots)

753

or path_or_parents_in_set(dir, self._roots)

749

):

754

):

750

return b'this'

755

return b'this'

751

756

752

if dir in self._parents:

757

if dir in self._parents:

753

return self._allparentschildren.get(dir) or set()

758

return self._allparentschildren.get(dir) or set()

754

return set()

759

return set()

755

760

756

@encoding.strmethod

761

@encoding.strmethod

757

def __repr__(self):

762

def __repr__(self):

758

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

763

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

759

764

760

765

761

class exactmatcher(basematcher):

766

class exactmatcher(basematcher):

762

r"""Matches the input files exactly. They are interpreted as paths, not

767

r"""Matches the input files exactly. They are interpreted as paths, not

763

patterns (so no kind-prefixes).

768

patterns (so no kind-prefixes).

764

769

765

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

770

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

766

>>> m(b'a.txt')

771

>>> m(b'a.txt')

767

True

772

True

768

>>> m(b'b.txt')

773

>>> m(b'b.txt')

769

False

774

False

770

775

771

Input files that would be matched are exactly those returned by .files()

776

Input files that would be matched are exactly those returned by .files()

772

>>> m.files()

777

>>> m.files()

773

['a.txt', 're:.*\\.c$']

778

['a.txt', 're:.*\\.c$']

774

779

775

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

780

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

776

>>> m(b'main.c')

781

>>> m(b'main.c')

777

False

782

False

778

>>> m(br're:.*\.c$')

783

>>> m(br're:.*\.c$')

779

True

784

True

780

"""

785

"""

781

786

782

def __init__(self, files, badfn=None):

787

def __init__(self, files, badfn=None):

783

super(exactmatcher, self).__init__(badfn)

788

super(exactmatcher, self).__init__(badfn)

784

789

785

if isinstance(files, list):

790

if isinstance(files, list):

786

self._files = files

791

self._files = files

787

else:

792

else:

788

self._files = list(files)

793

self._files = list(files)

789

794

790

matchfn = basematcher.exact

795

matchfn = basematcher.exact

791

796

792

@propertycache

797

@propertycache

793

def _dirs(self):

798

def _dirs(self):

794

return set(pathutil.dirs(self._fileset))

799

return set(pathutil.dirs(self._fileset))

795

800

796

def visitdir(self, dir):

801

def visitdir(self, dir):

797

return dir in self._dirs

802

return dir in self._dirs

798

803

799

@propertycache

804

@propertycache

800

def _visitchildrenset_candidates(self):

805

def _visitchildrenset_candidates(self):

801

"""A memoized set of candidates for visitchildrenset."""

806

"""A memoized set of candidates for visitchildrenset."""

802

return self._fileset | self._dirs - {b''}

807

return self._fileset | self._dirs - {b''}

803

808

804

@propertycache

809

@propertycache

805

def _sorted_visitchildrenset_candidates(self):

810

def _sorted_visitchildrenset_candidates(self):

806

"""A memoized sorted list of candidates for visitchildrenset."""

811

"""A memoized sorted list of candidates for visitchildrenset."""

807

return sorted(self._visitchildrenset_candidates)

812

return sorted(self._visitchildrenset_candidates)

808

813

809

def visitchildrenset(self, dir):

814

def visitchildrenset(self, dir):

810

if not self._fileset or dir not in self._dirs:

815

if not self._fileset or dir not in self._dirs:

811

return set()

816

return set()

812

817

813

if dir == b'':

818

if dir == b'':

814

candidates = self._visitchildrenset_candidates

819

candidates = self._visitchildrenset_candidates

815

else:

820

else:

816

candidates = self._sorted_visitchildrenset_candidates

821

candidates = self._sorted_visitchildrenset_candidates

817

d = dir + b'/'

822

d = dir + b'/'

818

# Use bisect to find the first element potentially starting with d

823

# Use bisect to find the first element potentially starting with d

819

# (i.e. >= d). This should always find at least one element (we'll

824

# (i.e. >= d). This should always find at least one element (we'll

820

# assert later if this is not the case).

825

# assert later if this is not the case).

821

first = bisect.bisect_left(candidates, d)

826

first = bisect.bisect_left(candidates, d)

822

# We need a representation of the first element that is > d that

827

# We need a representation of the first element that is > d that

823

# does not start with d, so since we added a `/` on the end of dir,

828

# does not start with d, so since we added a `/` on the end of dir,

824

# we'll add whatever comes after slash (we could probably assume

829

# we'll add whatever comes after slash (we could probably assume

825

# that `0` is after `/`, but let's not) to the end of dir instead.

830

# that `0` is after `/`, but let's not) to the end of dir instead.

826

dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))

831

dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))

827

# Use bisect to find the first element >= d_next

832

# Use bisect to find the first element >= d_next

828

last = bisect.bisect_left(candidates, dnext, lo=first)

833

last = bisect.bisect_left(candidates, dnext, lo=first)

829

dlen = len(d)

834

dlen = len(d)

830

candidates = {c[dlen:] for c in candidates[first:last]}

835

candidates = {c[dlen:] for c in candidates[first:last]}

831

# self._dirs includes all of the directories, recursively, so if

836

# self._dirs includes all of the directories, recursively, so if

832

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

837

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

833

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

838

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

834

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

839

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

835

# immediate subdir will be in there without a slash.

840

# immediate subdir will be in there without a slash.

836

ret = {c for c in candidates if b'/' not in c}

841

ret = {c for c in candidates if b'/' not in c}

837

# We really do not expect ret to be empty, since that would imply that

842

# We really do not expect ret to be empty, since that would imply that

838

# there's something in _dirs that didn't have a file in _fileset.

843

# there's something in _dirs that didn't have a file in _fileset.

839

assert ret

844

assert ret

840

return ret

845

return ret

841

846

842

def isexact(self):

847

def isexact(self):

843

return True

848

return True

844

849

845

@encoding.strmethod

850

@encoding.strmethod

846

def __repr__(self):

851

def __repr__(self):

847

return b'<exactmatcher files=%r>' % self._files

852

return b'<exactmatcher files=%r>' % self._files

848

853

849

854

850

class differencematcher(basematcher):

855

class differencematcher(basematcher):

851

"""Composes two matchers by matching if the first matches and the second

856

"""Composes two matchers by matching if the first matches and the second

852

does not.

857

does not.

853

858

854

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

859

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

855

"""

860

"""

856

861

857

def __init__(self, m1, m2):

862

def __init__(self, m1, m2):

858

super(differencematcher, self).__init__()

863

super(differencematcher, self).__init__()

859

self._m1 = m1

864

self._m1 = m1

860

self._m2 = m2

865

self._m2 = m2

861

self.bad = m1.bad

866

self.bad = m1.bad

862

self.traversedir = m1.traversedir

867

self.traversedir = m1.traversedir

863

868

864

def matchfn(self, f):

869

def matchfn(self, f):

865

return self._m1(f) and not self._m2(f)

870

return self._m1(f) and not self._m2(f)

866

871

867

@propertycache

872

@propertycache

868

def _files(self):

873

def _files(self):

869

if self.isexact():

874

if self.isexact():

870

return [f for f in self._m1.files() if self(f)]

875

return [f for f in self._m1.files() if self(f)]

871

# If m1 is not an exact matcher, we can't easily figure out the set of

876

# If m1 is not an exact matcher, we can't easily figure out the set of

872

# files, because its files() are not always files. For example, if

877

# files, because its files() are not always files. For example, if

873

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

878

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

874

# want to remove "dir" from the set even though it would match m2,

879

# want to remove "dir" from the set even though it would match m2,

875

# because the "dir" in m1 may not be a file.

880

# because the "dir" in m1 may not be a file.

876

return self._m1.files()

881

return self._m1.files()

877

882

878

def visitdir(self, dir):

883

def visitdir(self, dir):

879

if self._m2.visitdir(dir) == b'all':

884

if self._m2.visitdir(dir) == b'all':

880

return False

885

return False

881

elif not self._m2.visitdir(dir):

886

elif not self._m2.visitdir(dir):

882

# m2 does not match dir, we can return 'all' here if possible

887

# m2 does not match dir, we can return 'all' here if possible

883

return self._m1.visitdir(dir)

888

return self._m1.visitdir(dir)

884

return bool(self._m1.visitdir(dir))

889

return bool(self._m1.visitdir(dir))

885

890

886

def visitchildrenset(self, dir):

891

def visitchildrenset(self, dir):

887

m2_set = self._m2.visitchildrenset(dir)

892

m2_set = self._m2.visitchildrenset(dir)

888

if m2_set == b'all':

893

if m2_set == b'all':

889

return set()

894

return set()

890

m1_set = self._m1.visitchildrenset(dir)

895

m1_set = self._m1.visitchildrenset(dir)

891

# Possible values for m1: 'all', 'this', set(...), set()

896

# Possible values for m1: 'all', 'this', set(...), set()

892

# Possible values for m2: 'this', set(...), set()

897

# Possible values for m2: 'this', set(...), set()

893

# If m2 has nothing under here that we care about, return m1, even if

898

# If m2 has nothing under here that we care about, return m1, even if

894

# it's 'all'. This is a change in behavior from visitdir, which would

899

# it's 'all'. This is a change in behavior from visitdir, which would

895

# return True, not 'all', for some reason.

900

# return True, not 'all', for some reason.

896

if not m2_set:

901

if not m2_set:

897

return m1_set

902

return m1_set

898

if m1_set in [b'all', b'this']:

903

if m1_set in [b'all', b'this']:

899

# Never return 'all' here if m2_set is any kind of non-empty (either

904

# Never return 'all' here if m2_set is any kind of non-empty (either

900

# 'this' or set(foo)), since m2 might return set() for a

905

# 'this' or set(foo)), since m2 might return set() for a

901

# subdirectory.

906

# subdirectory.

902

return b'this'

907

return b'this'

903

# Possible values for m1: set(...), set()

908

# Possible values for m1: set(...), set()

904

# Possible values for m2: 'this', set(...)

909

# Possible values for m2: 'this', set(...)

905

# We ignore m2's set results. They're possibly incorrect:

910

# We ignore m2's set results. They're possibly incorrect:

906

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

911

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

907

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

912

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

908

# return set(), which is *not* correct, we still need to visit 'dir'!

913

# return set(), which is *not* correct, we still need to visit 'dir'!

909

return m1_set

914

return m1_set

910

915

911

def isexact(self):

916

def isexact(self):

912

return self._m1.isexact()

917

return self._m1.isexact()

913

918

914

@encoding.strmethod

919

@encoding.strmethod

915

def __repr__(self):

920

def __repr__(self):

916

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

921

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

917

922

918

923

919

def intersectmatchers(m1, m2):

924

def intersectmatchers(m1, m2):

920

"""Composes two matchers by matching if both of them match.

925

"""Composes two matchers by matching if both of them match.

921

926

922

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

927

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

923

"""

928

"""

924

if m1 is None or m2 is None:

929

if m1 is None or m2 is None:

925

return m1 or m2

930

return m1 or m2

926

if m1.always():

931

if m1.always():

927

m = copy.copy(m2)

932

m = copy.copy(m2)

928

# TODO: Consider encapsulating these things in a class so there's only

933

# TODO: Consider encapsulating these things in a class so there's only

929

# one thing to copy from m1.

934

# one thing to copy from m1.

930

m.bad = m1.bad

935

m.bad = m1.bad

931

m.traversedir = m1.traversedir

936

m.traversedir = m1.traversedir

932

return m

937

return m

933

if m2.always():

938

if m2.always():

934

m = copy.copy(m1)

939

m = copy.copy(m1)

935

return m

940

return m

936

return intersectionmatcher(m1, m2)

941

return intersectionmatcher(m1, m2)

937

942

938

943

939

class intersectionmatcher(basematcher):

944

class intersectionmatcher(basematcher):

940

def __init__(self, m1, m2):

945

def __init__(self, m1, m2):

941

super(intersectionmatcher, self).__init__()

946

super(intersectionmatcher, self).__init__()

942

self._m1 = m1

947

self._m1 = m1

943

self._m2 = m2

948

self._m2 = m2

944

self.bad = m1.bad

949

self.bad = m1.bad

945

self.traversedir = m1.traversedir

950

self.traversedir = m1.traversedir

946

951

947

@propertycache

952

@propertycache

948

def _files(self):

953

def _files(self):

949

if self.isexact():

954

if self.isexact():

950

m1, m2 = self._m1, self._m2

955

m1, m2 = self._m1, self._m2

951

if not m1.isexact():

956

if not m1.isexact():

952

m1, m2 = m2, m1

957

m1, m2 = m2, m1

953

return [f for f in m1.files() if m2(f)]

958

return [f for f in m1.files() if m2(f)]

954

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

959

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

955

# the set of files, because their files() are not always files. For

960

# the set of files, because their files() are not always files. For

956

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

961

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

957

# "path:dir2", we don't want to remove "dir2" from the set.

962

# "path:dir2", we don't want to remove "dir2" from the set.

958

return self._m1.files() + self._m2.files()

963

return self._m1.files() + self._m2.files()

959

964

960

def matchfn(self, f):

965

def matchfn(self, f):

961

return self._m1(f) and self._m2(f)

966

return self._m1(f) and self._m2(f)

962

967

963

def visitdir(self, dir):

968

def visitdir(self, dir):

964

visit1 = self._m1.visitdir(dir)

969

visit1 = self._m1.visitdir(dir)

965

if visit1 == b'all':

970

if visit1 == b'all':

966

return self._m2.visitdir(dir)

971

return self._m2.visitdir(dir)

967

# bool() because visit1=True + visit2='all' should not be 'all'

972

# bool() because visit1=True + visit2='all' should not be 'all'

968

return bool(visit1 and self._m2.visitdir(dir))

973

return bool(visit1 and self._m2.visitdir(dir))

969

974

970

def visitchildrenset(self, dir):

975

def visitchildrenset(self, dir):

971

m1_set = self._m1.visitchildrenset(dir)

976

m1_set = self._m1.visitchildrenset(dir)

972

if not m1_set:

977

if not m1_set:

973

return set()

978

return set()

974

m2_set = self._m2.visitchildrenset(dir)

979

m2_set = self._m2.visitchildrenset(dir)

975

if not m2_set:

980

if not m2_set:

976

return set()

981

return set()

977

982

978

if m1_set == b'all':

983

if m1_set == b'all':

979

return m2_set

984

return m2_set

980

elif m2_set == b'all':

985

elif m2_set == b'all':

981

return m1_set

986

return m1_set

982

987

983

if m1_set == b'this' or m2_set == b'this':

988

if m1_set == b'this' or m2_set == b'this':

984

return b'this'

989

return b'this'

985

990

986

assert isinstance(m1_set, set) and isinstance(m2_set, set)

991

assert isinstance(m1_set, set) and isinstance(m2_set, set)

987

return m1_set.intersection(m2_set)

992

return m1_set.intersection(m2_set)

988

993

989

def always(self):

994

def always(self):

990

return self._m1.always() and self._m2.always()

995

return self._m1.always() and self._m2.always()

991

996

992

def isexact(self):

997

def isexact(self):

993

return self._m1.isexact() or self._m2.isexact()

998

return self._m1.isexact() or self._m2.isexact()

994

999

995

@encoding.strmethod

1000

@encoding.strmethod

996

def __repr__(self):

1001

def __repr__(self):

997

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

1002

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

998

1003

999

1004

1000

class subdirmatcher(basematcher):

1005

class subdirmatcher(basematcher):

1001

"""Adapt a matcher to work on a subdirectory only.

1006

"""Adapt a matcher to work on a subdirectory only.

1002

1007

1003

The paths are remapped to remove/insert the path as needed:

1008

The paths are remapped to remove/insert the path as needed:

1004

1009

1005

>>> from . import pycompat

1010

>>> from . import pycompat

1006

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

1011

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

1007

>>> m2 = subdirmatcher(b'sub', m1)

1012

>>> m2 = subdirmatcher(b'sub', m1)

1008

>>> m2(b'a.txt')

1013

>>> m2(b'a.txt')

1009

False

1014

False

1010

>>> m2(b'b.txt')

1015

>>> m2(b'b.txt')

1011

True

1016

True

1012

>>> m2.matchfn(b'a.txt')

1017

>>> m2.matchfn(b'a.txt')

1013

False

1018

False

1014

>>> m2.matchfn(b'b.txt')

1019

>>> m2.matchfn(b'b.txt')

1015

True

1020

True

1016

>>> m2.files()

1021

>>> m2.files()

1017

['b.txt']

1022

['b.txt']

1018

>>> m2.exact(b'b.txt')

1023

>>> m2.exact(b'b.txt')

1019

True

1024

True

1020

>>> def bad(f, msg):

1025

>>> def bad(f, msg):

1021

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1026

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1022

>>> m1.bad = bad

1027

>>> m1.bad = bad

1023

>>> m2.bad(b'x.txt', b'No such file')

1028

>>> m2.bad(b'x.txt', b'No such file')

1024

sub/x.txt: No such file

1029

sub/x.txt: No such file

1025

"""

1030

"""

1026

1031

1027

def __init__(self, path, matcher):

1032

def __init__(self, path, matcher):

1028

super(subdirmatcher, self).__init__()

1033

super(subdirmatcher, self).__init__()

1029

self._path = path

1034

self._path = path

1030

self._matcher = matcher

1035

self._matcher = matcher

1031

self._always = matcher.always()

1036

self._always = matcher.always()

1032

1037

1033

self._files = [

1038

self._files = [

1034

f[len(path) + 1 :]

1039

f[len(path) + 1 :]

1035

for f in matcher._files

1040

for f in matcher._files

1036

if f.startswith(path + b"/")

1041

if f.startswith(path + b"/")

1037

]

1042

]

1038

1043

1039

# If the parent repo had a path to this subrepo and the matcher is

1044

# If the parent repo had a path to this subrepo and the matcher is

1040

# a prefix matcher, this submatcher always matches.

1045

# a prefix matcher, this submatcher always matches.

1041

if matcher.prefix():

1046

if matcher.prefix():

1042

self._always = any(f == path for f in matcher._files)

1047

self._always = any(f == path for f in matcher._files)

1043

1048

1044

def bad(self, f, msg):

1049

def bad(self, f, msg):

1045

self._matcher.bad(self._path + b"/" + f, msg)

1050

self._matcher.bad(self._path + b"/" + f, msg)

1046

1051

1047

def matchfn(self, f):

1052

def matchfn(self, f):

1048

# Some information is lost in the superclass's constructor, so we

1053

# Some information is lost in the superclass's constructor, so we

1049

# can not accurately create the matching function for the subdirectory

1054

# can not accurately create the matching function for the subdirectory

1050

# from the inputs. Instead, we override matchfn() and visitdir() to

1055

# from the inputs. Instead, we override matchfn() and visitdir() to

1051

# call the original matcher with the subdirectory path prepended.

1056

# call the original matcher with the subdirectory path prepended.

1052

return self._matcher.matchfn(self._path + b"/" + f)

1057

return self._matcher.matchfn(self._path + b"/" + f)

1053

1058

1054

def visitdir(self, dir):

1059

def visitdir(self, dir):

1055

if dir == b'':

1060

if dir == b'':

1056

dir = self._path

1061

dir = self._path

1057

else:

1062

else:

1058

dir = self._path + b"/" + dir

1063

dir = self._path + b"/" + dir

1059

return self._matcher.visitdir(dir)

1064

return self._matcher.visitdir(dir)

1060

1065

1061

def visitchildrenset(self, dir):

1066

def visitchildrenset(self, dir):

1062

if dir == b'':

1067

if dir == b'':

1063

dir = self._path

1068

dir = self._path

1064

else:

1069

else:

1065

dir = self._path + b"/" + dir

1070

dir = self._path + b"/" + dir

1066

return self._matcher.visitchildrenset(dir)

1071

return self._matcher.visitchildrenset(dir)

1067

1072

1068

def always(self):

1073

def always(self):

1069

return self._always

1074

return self._always

1070

1075

1071

def prefix(self):

1076

def prefix(self):

1072

return self._matcher.prefix() and not self._always

1077

return self._matcher.prefix() and not self._always

1073

1078

1074

@encoding.strmethod

1079

@encoding.strmethod

1075

def __repr__(self):

1080

def __repr__(self):

1076

return b'<subdirmatcher path=%r, matcher=%r>' % (

1081

return b'<subdirmatcher path=%r, matcher=%r>' % (

1077

self._path,

1082

self._path,

1078

self._matcher,

1083

self._matcher,

1079

)

1084

)

1080

1085

1081

1086

1082

class prefixdirmatcher(basematcher):

1087

class prefixdirmatcher(basematcher):

1083

"""Adapt a matcher to work on a parent directory.

1088

"""Adapt a matcher to work on a parent directory.

1084

1089

1085

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1090

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1086

1091

1087

The prefix path should usually be the relative path from the root of

1092

The prefix path should usually be the relative path from the root of

1088

this matcher to the root of the wrapped matcher.

1093

this matcher to the root of the wrapped matcher.

1089

1094

1090

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1095

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1091

>>> m2 = prefixdirmatcher(b'd/e', m1)

1096

>>> m2 = prefixdirmatcher(b'd/e', m1)

1092

>>> m2(b'a.txt')

1097

>>> m2(b'a.txt')

1093

False

1098

False

1094

>>> m2(b'd/e/a.txt')

1099

>>> m2(b'd/e/a.txt')

1095

True

1100

True

1096

>>> m2(b'd/e/b.txt')

1101

>>> m2(b'd/e/b.txt')

1097

False

1102

False

1098

>>> m2.files()

1103

>>> m2.files()

1099

['d/e/a.txt', 'd/e/f/b.txt']

1104

['d/e/a.txt', 'd/e/f/b.txt']

1100

>>> m2.exact(b'd/e/a.txt')

1105

>>> m2.exact(b'd/e/a.txt')

1101

True

1106

True

1102

>>> m2.visitdir(b'd')

1107

>>> m2.visitdir(b'd')

1103

True

1108

True

1104

>>> m2.visitdir(b'd/e')

1109

>>> m2.visitdir(b'd/e')

1105

True

1110

True

1106

>>> m2.visitdir(b'd/e/f')

1111

>>> m2.visitdir(b'd/e/f')

1107

True

1112

True

1108

>>> m2.visitdir(b'd/e/g')

1113

>>> m2.visitdir(b'd/e/g')

1109

False

1114

False

1110

>>> m2.visitdir(b'd/ef')

1115

>>> m2.visitdir(b'd/ef')

1111

False

1116

False

1112

"""

1117

"""

1113

1118

1114

def __init__(self, path, matcher, badfn=None):

1119

def __init__(self, path, matcher, badfn=None):

1115

super(prefixdirmatcher, self).__init__(badfn)

1120

super(prefixdirmatcher, self).__init__(badfn)

1116

if not path:

1121

if not path:

1117

raise error.ProgrammingError(b'prefix path must not be empty')

1122

raise error.ProgrammingError(b'prefix path must not be empty')

1118

self._path = path

1123

self._path = path

1119

self._pathprefix = path + b'/'

1124

self._pathprefix = path + b'/'

1120

self._matcher = matcher

1125

self._matcher = matcher

1121

1126

1122

@propertycache

1127

@propertycache

1123

def _files(self):

1128

def _files(self):

1124

return [self._pathprefix + f for f in self._matcher._files]

1129

return [self._pathprefix + f for f in self._matcher._files]

1125

1130

1126

def matchfn(self, f):

1131

def matchfn(self, f):

1127

if not f.startswith(self._pathprefix):

1132

if not f.startswith(self._pathprefix):

1128

return False

1133

return False

1129

return self._matcher.matchfn(f[len(self._pathprefix) :])

1134

return self._matcher.matchfn(f[len(self._pathprefix) :])

1130

1135

1131

@propertycache

1136

@propertycache

1132

def _pathdirs(self):

1137

def _pathdirs(self):

1133

return set(pathutil.finddirs(self._path))

1138

return set(pathutil.finddirs(self._path))

1134

1139

1135

def visitdir(self, dir):

1140

def visitdir(self, dir):

1136

if dir == self._path:

1141

if dir == self._path:

1137

return self._matcher.visitdir(b'')

1142

return self._matcher.visitdir(b'')

1138

if dir.startswith(self._pathprefix):

1143

if dir.startswith(self._pathprefix):

1139

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1144

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1140

return dir in self._pathdirs

1145

return dir in self._pathdirs

1141

1146

1142

def visitchildrenset(self, dir):

1147

def visitchildrenset(self, dir):

1143

if dir == self._path:

1148

if dir == self._path:

1144

return self._matcher.visitchildrenset(b'')

1149

return self._matcher.visitchildrenset(b'')

1145

if dir.startswith(self._pathprefix):

1150

if dir.startswith(self._pathprefix):

1146

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1151

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1147

if dir in self._pathdirs:

1152

if dir in self._pathdirs:

1148

return b'this'

1153

return b'this'

1149

return set()

1154

return set()

1150

1155

1151

def isexact(self):

1156

def isexact(self):

1152

return self._matcher.isexact()

1157

return self._matcher.isexact()

1153

1158

1154

def prefix(self):

1159

def prefix(self):

1155

return self._matcher.prefix()

1160

return self._matcher.prefix()

1156

1161

1157

@encoding.strmethod

1162

@encoding.strmethod

1158

def __repr__(self):

1163

def __repr__(self):

1159

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1164

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1160

pycompat.bytestr(self._path),

1165

pycompat.bytestr(self._path),

1161

self._matcher,

1166

self._matcher,

1162

)

1167

)

1163

1168

1164

1169

1165

class unionmatcher(basematcher):

1170

class unionmatcher(basematcher):

1166

"""A matcher that is the union of several matchers.

1171

"""A matcher that is the union of several matchers.

1167

1172

1168

The non-matching-attributes (bad, traversedir) are taken from the first

1173

The non-matching-attributes (bad, traversedir) are taken from the first

1169

matcher.

1174

matcher.

1170

"""

1175

"""

1171

1176

1172

def __init__(self, matchers):

1177

def __init__(self, matchers):

1173

m1 = matchers[0]

1178

m1 = matchers[0]

1174

super(unionmatcher, self).__init__()

1179

super(unionmatcher, self).__init__()

1175

self.traversedir = m1.traversedir

1180

self.traversedir = m1.traversedir

1176

self._matchers = matchers

1181

self._matchers = matchers

1177

1182

1178

def matchfn(self, f):

1183

def matchfn(self, f):

1179

for match in self._matchers:

1184

for match in self._matchers:

1180

if match(f):

1185

if match(f):

1181

return True

1186

return True

1182

return False

1187

return False

1183

1188

1184

def visitdir(self, dir):

1189

def visitdir(self, dir):

1185

r = False

1190

r = False

1186

for m in self._matchers:

1191

for m in self._matchers:

1187

v = m.visitdir(dir)

1192

v = m.visitdir(dir)

1188

if v == b'all':

1193

if v == b'all':

1189

return v

1194

return v

1190

r |= v

1195

r |= v

1191

return r

1196

return r

1192

1197

1193

def visitchildrenset(self, dir):

1198

def visitchildrenset(self, dir):

1194

r = set()

1199

r = set()

1195

this = False

1200

this = False

1196

for m in self._matchers:

1201

for m in self._matchers:

1197

v = m.visitchildrenset(dir)

1202

v = m.visitchildrenset(dir)

1198

if not v:

1203

if not v:

1199

continue

1204

continue

1200

if v == b'all':

1205

if v == b'all':

1201

return v

1206

return v

1202

if this or v == b'this':

1207

if this or v == b'this':

1203

this = True

1208

this = True

1204

# don't break, we might have an 'all' in here.

1209

# don't break, we might have an 'all' in here.

1205

continue

1210

continue

1206

assert isinstance(v, set)

1211

assert isinstance(v, set)

1207

r = r.union(v)

1212

r = r.union(v)

1208

if this:

1213

if this:

1209

return b'this'

1214

return b'this'

1210

return r

1215

return r

1211

1216

1212

@encoding.strmethod

1217

@encoding.strmethod

1213

def __repr__(self):

1218

def __repr__(self):

1214

return b'<unionmatcher matchers=%r>' % self._matchers

1219

return b'<unionmatcher matchers=%r>' % self._matchers

1215

1220

1216

1221

1217

def patkind(pattern, default=None):

1222

def patkind(pattern, default=None):

1218

r"""If pattern is 'kind:pat' with a known kind, return kind.

1223

r"""If pattern is 'kind:pat' with a known kind, return kind.

1219

1224

1220

>>> patkind(br're:.*\.c$')

1225

>>> patkind(br're:.*\.c$')

1221

're'

1226

're'

1222

>>> patkind(b'glob:*.c')

1227

>>> patkind(b'glob:*.c')

1223

'glob'

1228

'glob'

1224

>>> patkind(b'relpath:test.py')

1229

>>> patkind(b'relpath:test.py')

1225

'relpath'

1230

'relpath'

1226

>>> patkind(b'main.py')

1231

>>> patkind(b'main.py')

1227

>>> patkind(b'main.py', default=b're')

1232

>>> patkind(b'main.py', default=b're')

1228

're'

1233

're'

1229

"""

1234

"""

1230

return _patsplit(pattern, default)[0]

1235

return _patsplit(pattern, default)[0]

1231

1236

1232

1237

1233

def _patsplit(pattern, default):

1238

def _patsplit(pattern, default):

1234

"""Split a string into the optional pattern kind prefix and the actual

1239

"""Split a string into the optional pattern kind prefix and the actual

1235

pattern."""

1240

pattern."""

1236

if b':' in pattern:

1241

if b':' in pattern:

1237

kind, pat = pattern.split(b':', 1)

1242

kind, pat = pattern.split(b':', 1)

1238

if kind in allpatternkinds:

1243

if kind in allpatternkinds:

1239

return kind, pat

1244

return kind, pat

1240

return default, pattern

1245

return default, pattern

1241

1246

1242

1247

1243

def _globre(pat):

1248

def _globre(pat):

1244

r"""Convert an extended glob string to a regexp string.

1249

r"""Convert an extended glob string to a regexp string.

1245

1250

1246

>>> from . import pycompat

1251

>>> from . import pycompat

1247

>>> def bprint(s):

1252

>>> def bprint(s):

1248

... print(pycompat.sysstr(s))

1253

... print(pycompat.sysstr(s))

1249

>>> bprint(_globre(br'?'))

1254

>>> bprint(_globre(br'?'))

1250

.

1255

.

1251

>>> bprint(_globre(br'*'))

1256

>>> bprint(_globre(br'*'))

1252

[^/]*

1257

[^/]*

1253

>>> bprint(_globre(br'**'))

1258

>>> bprint(_globre(br'**'))

1254

.*

1259

.*

1255

>>> bprint(_globre(br'**/a'))

1260

>>> bprint(_globre(br'**/a'))

1256

(?:.*/)?a

1261

(?:.*/)?a

1257

>>> bprint(_globre(br'a/**/b'))

1262

>>> bprint(_globre(br'a/**/b'))

1258

a/(?:.*/)?b

1263

a/(?:.*/)?b

1259

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1264

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1260

[a*?!^][\^b][^c]

1265

[a*?!^][\^b][^c]

1261

>>> bprint(_globre(br'{a,b}'))

1266

>>> bprint(_globre(br'{a,b}'))

1262

(?:a|b)

1267

(?:a|b)

1263

>>> bprint(_globre(br'.\*\?'))

1268

>>> bprint(_globre(br'.\*\?'))

1264

\.\*\?

1269

\.\*\?

1265

"""

1270

"""

1266

i, n = 0, len(pat)

1271

i, n = 0, len(pat)

1267

res = b''

1272

res = b''

1268

group = 0

1273

group = 0

1269

escape = util.stringutil.regexbytesescapemap.get

1274

escape = util.stringutil.regexbytesescapemap.get

1270

1275

1271

def peek():

1276

def peek():

1272

return i < n and pat[i : i + 1]

1277

return i < n and pat[i : i + 1]

1273

1278

1274

while i < n:

1279

while i < n:

1275

c = pat[i : i + 1]

1280

c = pat[i : i + 1]

1276

i += 1

1281

i += 1

1277

if c not in b'*?[{},\\':

1282

if c not in b'*?[{},\\':

1278

res += escape(c, c)

1283

res += escape(c, c)

1279

elif c == b'*':

1284

elif c == b'*':

1280

if peek() == b'*':

1285

if peek() == b'*':

1281

i += 1

1286

i += 1

1282

if peek() == b'/':

1287

if peek() == b'/':

1283

i += 1

1288

i += 1

1284

res += b'(?:.*/)?'

1289

res += b'(?:.*/)?'

1285

else:

1290

else:

1286

res += b'.*'

1291

res += b'.*'

1287

else:

1292

else:

1288

res += b'[^/]*'

1293

res += b'[^/]*'

1289

elif c == b'?':

1294

elif c == b'?':

1290

res += b'.'

1295

res += b'.'

1291

elif c == b'[':

1296

elif c == b'[':

1292

j = i

1297

j = i

1293

if j < n and pat[j : j + 1] in b'!]':

1298

if j < n and pat[j : j + 1] in b'!]':

1294

j += 1

1299

j += 1

1295

while j < n and pat[j : j + 1] != b']':

1300

while j < n and pat[j : j + 1] != b']':

1296

j += 1

1301

j += 1

1297

if j >= n:

1302

if j >= n:

1298

res += b'\\['

1303

res += b'\\['

1299

else:

1304

else:

1300

stuff = pat[i:j].replace(b'\\', b'\\\\')

1305

stuff = pat[i:j].replace(b'\\', b'\\\\')

1301

i = j + 1

1306

i = j + 1

1302

if stuff[0:1] == b'!':

1307

if stuff[0:1] == b'!':

1303

stuff = b'^' + stuff[1:]

1308

stuff = b'^' + stuff[1:]

1304

elif stuff[0:1] == b'^':

1309

elif stuff[0:1] == b'^':

1305

stuff = b'\\' + stuff

1310

stuff = b'\\' + stuff

1306

res = b'%s[%s]' % (res, stuff)

1311

res = b'%s[%s]' % (res, stuff)

1307

elif c == b'{':

1312

elif c == b'{':

1308

group += 1

1313

group += 1

1309

res += b'(?:'

1314

res += b'(?:'

1310

elif c == b'}' and group:

1315

elif c == b'}' and group:

1311

res += b')'

1316

res += b')'

1312

group -= 1

1317

group -= 1

1313

elif c == b',' and group:

1318

elif c == b',' and group:

1314

res += b'|'

1319

res += b'|'

1315

elif c == b'\\':

1320

elif c == b'\\':

1316

p = peek()

1321

p = peek()

1317

if p:

1322

if p:

1318

i += 1

1323

i += 1

1319

res += escape(p, p)

1324

res += escape(p, p)

1320

else:

1325

else:

1321

res += escape(c, c)

1326

res += escape(c, c)

1322

else:

1327

else:

1323

res += escape(c, c)

1328

res += escape(c, c)

1324

return res

1329

return res

1325

1330

1326

1331

1327

FLAG_RE = util.re.compile(br'^$\?([aiLmsux]+)$(.*)')

1332

FLAG_RE = util.re.compile(br'^$\?([aiLmsux]+)$(.*)')

1328

1333

1329

1334

1330

def _regex(kind, pat, globsuffix):

1335

def _regex(kind, pat, globsuffix):

1331

"""Convert a (normalized) pattern of any kind into a

1336

"""Convert a (normalized) pattern of any kind into a

1332

regular expression.

1337

regular expression.

1333

globsuffix is appended to the regexp of globs."""

1338

globsuffix is appended to the regexp of globs."""

1334

if not pat and kind in (b'glob', b'relpath'):

1339

if not pat and kind in (b'glob', b'relpath'):

1335

return b''

1340

return b''

1336

if kind == b're':

1341

if kind == b're':

1337

return pat

1342

return pat

1338

if kind in (b'path', b'relpath'):

1343

if kind in (b'path', b'relpath'):

1339

if pat == b'.':

1344

if pat == b'.':

1340

return b''

1345

return b''

1341

return util.stringutil.reescape(pat) + b'(?:/|$)'

1346

return util.stringutil.reescape(pat) + b'(?:/|$)'

1342

if kind == b'rootfilesin':

1347

if kind == b'rootfilesin':

1343

if pat == b'.':

1348

if pat == b'.':

1344

escaped = b''

1349

escaped = b''

1345

else:

1350

else:

1346

# Pattern is a directory name.

1351

# Pattern is a directory name.

1347

escaped = util.stringutil.reescape(pat) + b'/'

1352

escaped = util.stringutil.reescape(pat) + b'/'

1348

# Anything after the pattern must be a non-directory.

1353

# Anything after the pattern must be a non-directory.

1349

return escaped + b'[^/]+$'

1354

return escaped + b'[^/]+$'

1350

if kind == b'relglob':

1355

if kind == b'relglob':

1351

globre = _globre(pat)

1356

globre = _globre(pat)

1352

if globre.startswith(b'[^/]*'):

1357

if globre.startswith(b'[^/]*'):

1353

# When pat has the form *XYZ (common), make the returned regex more

1358

# When pat has the form *XYZ (common), make the returned regex more

1354

# legible by returning the regex for **XYZ instead of **/*XYZ.

1359

# legible by returning the regex for **XYZ instead of **/*XYZ.

1355

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1360

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1356

return b'(?:|.*/)' + globre + globsuffix

1361

return b'(?:|.*/)' + globre + globsuffix

1357

if kind == b'relre':

1362

if kind == b'relre':

1358

flag = None

1363

flag = None

1359

m = FLAG_RE.match(pat)

1364

m = FLAG_RE.match(pat)

1360

if m:

1365

if m:

1361

flag, pat = m.groups()

1366

flag, pat = m.groups()

1362

if not pat.startswith(b'^'):

1367

if not pat.startswith(b'^'):

1363

pat = b'.*' + pat

1368

pat = b'.*' + pat

1364

if flag is not None:

1369

if flag is not None:

1365

pat = br'(?%s:%s)' % (flag, pat)

1370

pat = br'(?%s:%s)' % (flag, pat)

1366

return pat

1371

return pat

1367

if kind in (b'glob', b'rootglob'):

1372

if kind in (b'glob', b'rootglob'):

1368

return _globre(pat) + globsuffix

1373

return _globre(pat) + globsuffix

1369

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1374

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1370

1375

1371

1376

1372

def _buildmatch(kindpats, globsuffix, root):

1377

def _buildmatch(kindpats, globsuffix, root):

1373

"""Return regexp string and a matcher function for kindpats.

1378

"""Return regexp string and a matcher function for kindpats.

1374

globsuffix is appended to the regexp of globs."""

1379

globsuffix is appended to the regexp of globs."""

1375

matchfuncs = []

1380

matchfuncs = []

1376

1381

1377

subincludes, kindpats = _expandsubinclude(kindpats, root)

1382

subincludes, kindpats = _expandsubinclude(kindpats, root)

1378

if subincludes:

1383

if subincludes:

1379

submatchers = {}

1384

submatchers = {}

1380

1385

1381

def matchsubinclude(f):

1386

def matchsubinclude(f):

1382

for prefix, matcherargs in subincludes:

1387

for prefix, matcherargs in subincludes:

1383

if f.startswith(prefix):

1388

if f.startswith(prefix):

1384

mf = submatchers.get(prefix)

1389

mf = submatchers.get(prefix)

1385

if mf is None:

1390

if mf is None:

1386

mf = match(*matcherargs)

1391

mf = match(*matcherargs)

1387

submatchers[prefix] = mf

1392

submatchers[prefix] = mf

1388

1393

1389

if mf(f[len(prefix) :]):

1394

if mf(f[len(prefix) :]):

1390

return True

1395

return True

1391

return False

1396

return False

1392

1397

1393

matchfuncs.append(matchsubinclude)

1398

matchfuncs.append(matchsubinclude)

1394

1399

1395

regex = b''

1400

regex = b''

1396

if kindpats:

1401

if kindpats:

1397

if all(k == b'rootfilesin' for k, p, s in kindpats):

1402

if all(k == b'rootfilesin' for k, p, s in kindpats):

1398

dirs = {p for k, p, s in kindpats}

1403

dirs = {p for k, p, s in kindpats}

1399

1404

1400

def mf(f):

1405

def mf(f):

1401

i = f.rfind(b'/')

1406

i = f.rfind(b'/')

1402

if i >= 0:

1407

if i >= 0:

1403

dir = f[:i]

1408

dir = f[:i]

1404

else:

1409

else:

1405

dir = b'.'

1410

dir = b'.'

1406

return dir in dirs

1411

return dir in dirs

1407

1412

1408

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1413

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1409

matchfuncs.append(mf)

1414

matchfuncs.append(mf)

1410

else:

1415

else:

1411

regex, mf = _buildregexmatch(kindpats, globsuffix)

1416

regex, mf = _buildregexmatch(kindpats, globsuffix)

1412

matchfuncs.append(mf)

1417

matchfuncs.append(mf)

1413

1418

1414

if len(matchfuncs) == 1:

1419

if len(matchfuncs) == 1:

1415

return regex, matchfuncs[0]

1420

return regex, matchfuncs[0]

1416

else:

1421

else:

1417

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1422

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1418

1423

1419

1424

1420

MAX_RE_SIZE = 20000

1425

MAX_RE_SIZE = 20000

1421

1426

1422

1427

1423

def _joinregexes(regexps):

1428

def _joinregexes(regexps):

1424

"""gather multiple regular expressions into a single one"""

1429

"""gather multiple regular expressions into a single one"""

1425

return b'|'.join(regexps)

1430

return b'|'.join(regexps)

1426

1431

1427

1432

1428

def _buildregexmatch(kindpats, globsuffix):

1433

def _buildregexmatch(kindpats, globsuffix):

1429

"""Build a match function from a list of kinds and kindpats,

1434

"""Build a match function from a list of kinds and kindpats,

1430

return regexp string and a matcher function.

1435

return regexp string and a matcher function.

1431

1436

1432

Test too large input

1437

Test too large input

1433

>>> _buildregexmatch([

1438

>>> _buildregexmatch([

1434

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1439

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1435

... ], b'$')

1440

... ], b'$')

1436

Traceback (most recent call last):

1441

Traceback (most recent call last):

1437

...

1442

...

1438

Abort: matcher pattern is too long (20009 bytes)

1443

Abort: matcher pattern is too long (20009 bytes)

1439

"""

1444

"""

1440

try:

1445

try:

1441

allgroups = []

1446

allgroups = []

1442

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1447

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1443

fullregexp = _joinregexes(regexps)

1448

fullregexp = _joinregexes(regexps)

1444

1449

1445

startidx = 0

1450

startidx = 0

1446

groupsize = 0

1451

groupsize = 0

1447

for idx, r in enumerate(regexps):

1452

for idx, r in enumerate(regexps):

1448

piecesize = len(r)

1453

piecesize = len(r)

1449

if piecesize > MAX_RE_SIZE:

1454

if piecesize > MAX_RE_SIZE:

1450

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1455

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1451

raise error.Abort(msg)

1456

raise error.Abort(msg)

1452

elif (groupsize + piecesize) > MAX_RE_SIZE:

1457

elif (groupsize + piecesize) > MAX_RE_SIZE:

1453

group = regexps[startidx:idx]

1458

group = regexps[startidx:idx]

1454

allgroups.append(_joinregexes(group))

1459

allgroups.append(_joinregexes(group))

1455

startidx = idx

1460

startidx = idx

1456

groupsize = 0

1461

groupsize = 0

1457

groupsize += piecesize + 1

1462

groupsize += piecesize + 1

1458

1463

1459

if startidx == 0:

1464

if startidx == 0:

1460

matcher = _rematcher(fullregexp)

1465

matcher = _rematcher(fullregexp)

1461

func = lambda s: bool(matcher(s))

1466

func = lambda s: bool(matcher(s))

1462

else:

1467

else:

1463

group = regexps[startidx:]

1468

group = regexps[startidx:]

1464

allgroups.append(_joinregexes(group))

1469

allgroups.append(_joinregexes(group))

1465

allmatchers = [_rematcher(g) for g in allgroups]

1470

allmatchers = [_rematcher(g) for g in allgroups]

1466

func = lambda s: any(m(s) for m in allmatchers)

1471

func = lambda s: any(m(s) for m in allmatchers)

1467

return fullregexp, func

1472

return fullregexp, func

1468

except re.error:

1473

except re.error:

1469

for k, p, s in kindpats:

1474

for k, p, s in kindpats:

1470

try:

1475

try:

1471

_rematcher(_regex(k, p, globsuffix))

1476

_rematcher(_regex(k, p, globsuffix))

1472

except re.error:

1477

except re.error:

1473

if s:

1478

if s:

1474

raise error.Abort(

1479

raise error.Abort(

1475

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1480

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1476

)

1481

)

1477

else:

1482

else:

1478

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1483

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1479

raise error.Abort(_(b"invalid pattern"))

1484

raise error.Abort(_(b"invalid pattern"))

1480

1485

1481

1486

1482

def _patternrootsanddirs(kindpats):

1487

def _patternrootsanddirs(kindpats):

1483

"""Returns roots and directories corresponding to each pattern.

1488

"""Returns roots and directories corresponding to each pattern.

1484

1489

1485

This calculates the roots and directories exactly matching the patterns and

1490

This calculates the roots and directories exactly matching the patterns and

1486

returns a tuple of (roots, dirs) for each. It does not return other

1491

returns a tuple of (roots, dirs) for each. It does not return other

1487

directories which may also need to be considered, like the parent

1492

directories which may also need to be considered, like the parent

1488

directories.

1493

directories.

1489

"""

1494

"""

1490

r = []

1495

r = []

1491

d = []

1496

d = []

1492

for kind, pat, source in kindpats:

1497

for kind, pat, source in kindpats:

1493

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1498

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1494

root = []

1499

root = []

1495

for p in pat.split(b'/'):

1500

for p in pat.split(b'/'):

1496

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1501

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1497

break

1502

break

1498

root.append(p)

1503

root.append(p)

1499

r.append(b'/'.join(root))

1504

r.append(b'/'.join(root))

1500

elif kind in (b'relpath', b'path'):

1505

elif kind in (b'relpath', b'path'):

1501

if pat == b'.':

1506

if pat == b'.':

1502

pat = b''

1507

pat = b''

1503

r.append(pat)

1508

r.append(pat)

1504

elif kind in (b'rootfilesin',):

1509

elif kind in (b'rootfilesin',):

1505

if pat == b'.':

1510

if pat == b'.':

1506

pat = b''

1511

pat = b''

1507

d.append(pat)

1512

d.append(pat)

1508

else: # relglob, re, relre

1513

else: # relglob, re, relre

1509

r.append(b'')

1514

r.append(b'')

1510

return r, d

1515

return r, d

1511

1516

1512

1517

1513

def _roots(kindpats):

1518

def _roots(kindpats):

1514

'''Returns root directories to match recursively from the given patterns.'''

1519

'''Returns root directories to match recursively from the given patterns.'''

1515

roots, dirs = _patternrootsanddirs(kindpats)

1520

roots, dirs = _patternrootsanddirs(kindpats)

1516

return roots

1521

return roots

1517

1522

1518

1523

1519

def _rootsdirsandparents(kindpats):

1524

def _rootsdirsandparents(kindpats):

1520

"""Returns roots and exact directories from patterns.

1525

"""Returns roots and exact directories from patterns.

1521

1526

1522

`roots` are directories to match recursively, `dirs` should

1527

`roots` are directories to match recursively, `dirs` should

1523

be matched non-recursively, and `parents` are the implicitly required

1528

be matched non-recursively, and `parents` are the implicitly required

1524

directories to walk to items in either roots or dirs.

1529

directories to walk to items in either roots or dirs.

1525

1530

1526

Returns a tuple of (roots, dirs, parents).

1531

Returns a tuple of (roots, dirs, parents).

1527

1532

1528

>>> r = _rootsdirsandparents(

1533

>>> r = _rootsdirsandparents(

1529

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1534

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1530

... (b'glob', b'g*', b'')])

1535

... (b'glob', b'g*', b'')])

1531

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1536

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1532

(['g/h', 'g/h', ''], []) ['', 'g']

1537

(['g/h', 'g/h', ''], []) ['', 'g']

1533

>>> r = _rootsdirsandparents(

1538

>>> r = _rootsdirsandparents(

1534

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1539

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1535

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1540

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1536

([], ['g/h', '']) ['', 'g']

1541

([], ['g/h', '']) ['', 'g']

1537

>>> r = _rootsdirsandparents(

1542

>>> r = _rootsdirsandparents(

1538

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1543

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1539

... (b'path', b'', b'')])

1544

... (b'path', b'', b'')])

1540

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1545

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1541

(['r', 'p/p', ''], []) ['', 'p']

1546

(['r', 'p/p', ''], []) ['', 'p']

1542

>>> r = _rootsdirsandparents(

1547

>>> r = _rootsdirsandparents(

1543

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1548

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1544

... (b'relre', b'rr', b'')])

1549

... (b'relre', b'rr', b'')])

1545

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1550

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1546

(['', '', ''], []) ['']

1551

(['', '', ''], []) ['']

1547

"""

1552

"""

1548

r, d = _patternrootsanddirs(kindpats)

1553

r, d = _patternrootsanddirs(kindpats)

1549

1554

1550

p = set()

1555

p = set()

1551

# Add the parents as non-recursive/exact directories, since they must be

1556

# Add the parents as non-recursive/exact directories, since they must be

1552

# scanned to get to either the roots or the other exact directories.

1557

# scanned to get to either the roots or the other exact directories.

1553

p.update(pathutil.dirs(d))

1558

p.update(pathutil.dirs(d))

1554

p.update(pathutil.dirs(r))

1559

p.update(pathutil.dirs(r))

1555

1560

1556

# FIXME: all uses of this function convert these to sets, do so before

1561

# FIXME: all uses of this function convert these to sets, do so before

1557

# returning.

1562

# returning.

1558

# FIXME: all uses of this function do not need anything in 'roots' and

1563

# FIXME: all uses of this function do not need anything in 'roots' and

1559

# 'dirs' to also be in 'parents', consider removing them before returning.

1564

# 'dirs' to also be in 'parents', consider removing them before returning.

1560

return r, d, p

1565

return r, d, p

1561

1566

1562

1567

1563

def _explicitfiles(kindpats):

1568

def _explicitfiles(kindpats):

1564

"""Returns the potential explicit filenames from the patterns.

1569

"""Returns the potential explicit filenames from the patterns.

1565

1570

1566

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1571

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1567

['foo/bar']

1572

['foo/bar']

1568

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1573

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1569

[]

1574

[]

1570

"""

1575

"""

1571

# Keep only the pattern kinds where one can specify filenames (vs only

1576

# Keep only the pattern kinds where one can specify filenames (vs only

1572

# directory names).

1577

# directory names).

1573

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1578

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1574

return _roots(filable)

1579

return _roots(filable)

1575

1580

1576

1581

1577

def _prefix(kindpats):

1582

def _prefix(kindpats):

1578

'''Whether all the patterns match a prefix (i.e. recursively)'''

1583

'''Whether all the patterns match a prefix (i.e. recursively)'''

1579

for kind, pat, source in kindpats:

1584

for kind, pat, source in kindpats:

1580

if kind not in (b'path', b'relpath'):

1585

if kind not in (b'path', b'relpath'):

1581

return False

1586

return False

1582

return True

1587

return True

1583

1588

1584

1589

1585

_commentre = None

1590

_commentre = None

1586

1591

1587

1592

1588

def readpatternfile(filepath, warn, sourceinfo=False):

1593

def readpatternfile(filepath, warn, sourceinfo=False):

1589

"""parse a pattern file, returning a list of

1594

"""parse a pattern file, returning a list of

1590

patterns. These patterns should be given to compile()

1595

patterns. These patterns should be given to compile()

1591

to be validated and converted into a match function.

1596

to be validated and converted into a match function.

1592

1597

1593

trailing white space is dropped.

1598

trailing white space is dropped.

1594

the escape character is backslash.

1599

the escape character is backslash.

1595

comments start with #.

1600

comments start with #.

1596

empty lines are skipped.

1601

empty lines are skipped.

1597

1602

1598

lines can be of the following formats:

1603

lines can be of the following formats:

1599

1604

1600

syntax: regexp # defaults following lines to non-rooted regexps

1605

syntax: regexp # defaults following lines to non-rooted regexps

1601

syntax: glob # defaults following lines to non-rooted globs

1606

syntax: glob # defaults following lines to non-rooted globs

1602

re:pattern # non-rooted regular expression

1607

re:pattern # non-rooted regular expression

1603

glob:pattern # non-rooted glob

1608

glob:pattern # non-rooted glob

1604

rootglob:pat # rooted glob (same root as ^ in regexps)

1609

rootglob:pat # rooted glob (same root as ^ in regexps)

1605

pattern # pattern of the current default type

1610

pattern # pattern of the current default type

1606

1611

1607

if sourceinfo is set, returns a list of tuples:

1612

if sourceinfo is set, returns a list of tuples:

1608

(pattern, lineno, originalline).

1613

(pattern, lineno, originalline).

1609

This is useful to debug ignore patterns.

1614

This is useful to debug ignore patterns.

1610

"""

1615

"""

1611

1616

1612

syntaxes = {

1617

syntaxes = {

1613

b're': b'relre:',

1618

b're': b'relre:',

1614

b'regexp': b'relre:',

1619

b'regexp': b'relre:',

1615

b'glob': b'relglob:',

1620

b'glob': b'relglob:',

1616

b'rootglob': b'rootglob:',

1621

b'rootglob': b'rootglob:',

1617

b'include': b'include',

1622

b'include': b'include',

1618

b'subinclude': b'subinclude',

1623

b'subinclude': b'subinclude',

1619

}

1624

}

1620

syntax = b'relre:'

1625

syntax = b'relre:'

1621

patterns = []

1626

patterns = []

1622

1627

1623

fp = open(filepath, b'rb')

1628

fp = open(filepath, b'rb')

1624

for lineno, line in enumerate(fp, start=1):

1629

for lineno, line in enumerate(fp, start=1):

1625

if b"#" in line:

1630

if b"#" in line:

1626

global _commentre

1631

global _commentre

1627

if not _commentre:

1632

if not _commentre:

1628

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1633

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1629

# remove comments prefixed by an even number of escapes

1634

# remove comments prefixed by an even number of escapes

1630

m = _commentre.search(line)

1635

m = _commentre.search(line)

1631

if m:

1636

if m:

1632

line = line[: m.end(1)]

1637

line = line[: m.end(1)]

1633

# fixup properly escaped comments that survived the above

1638

# fixup properly escaped comments that survived the above

1634

line = line.replace(b"\\#", b"#")

1639

line = line.replace(b"\\#", b"#")

1635

line = line.rstrip()

1640

line = line.rstrip()

1636

if not line:

1641

if not line:

1637

continue

1642

continue

1638

1643

1639

if line.startswith(b'syntax:'):

1644

if line.startswith(b'syntax:'):

1640

s = line[7:].strip()

1645

s = line[7:].strip()

1641

try:

1646

try:

1642

syntax = syntaxes[s]

1647

syntax = syntaxes[s]

1643

except KeyError:

1648

except KeyError:

1644

if warn:

1649

if warn:

1645

warn(

1650

warn(

1646

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1651

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1647

)

1652

)

1648

continue

1653

continue

1649

1654

1650

linesyntax = syntax

1655

linesyntax = syntax

1651

for s, rels in syntaxes.items():

1656

for s, rels in syntaxes.items():

1652

if line.startswith(rels):

1657

if line.startswith(rels):

1653

linesyntax = rels

1658

linesyntax = rels

1654

line = line[len(rels) :]

1659

line = line[len(rels) :]

1655

break

1660

break

1656

elif line.startswith(s + b':'):

1661

elif line.startswith(s + b':'):

1657

linesyntax = rels

1662

linesyntax = rels

1658

line = line[len(s) + 1 :]

1663

line = line[len(s) + 1 :]

1659

break

1664

break

1660

if sourceinfo:

1665

if sourceinfo:

1661

patterns.append((linesyntax + line, lineno, line))

1666

patterns.append((linesyntax + line, lineno, line))

1662

else:

1667

else:

1663

patterns.append(linesyntax + line)

1668

patterns.append(linesyntax + line)

1664

fp.close()

1669

fp.close()

1665

return patterns

1670

return patterns

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import bisect
             import copy
             import itertools
             import os
             import re
             from .i18n import _
             from .pycompat import open
             from . import (
                 encoding,
                 error,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             rustmod = policy.importrust('dirstate')
             allpatternkinds = (
                 b're',
                 b'glob',
                 b'path',
                 b'relglob',
                 b'relpath',
                 b'relre',
                 b'rootglob',
                 b'listfile',
                 b'listfile0',
                 b'set',
                 b'include',
                 b'subinclude',
                 b'rootfilesin',
             )
             cwdrelativepatternkinds = (b'relpath', b'glob')
             propertycache = util.propertycache
             def _rematcher(regex):
                 """compile the regexp with the best available regexp engine and return a
                 matcher function"""
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
                 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
                 matchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'set':
                         if ctx is None:
                             raise error.ProgrammingError(
                                 b"fileset expression with no context"
                             )
                         matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
                         if listsubrepos:
                             for subpath in ctx.substate:
                                 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
                                 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
                                 matchers.append(pm)
                         continue
                     other.append((kind, pat, source))
                 return matchers, other
             def _expandsubinclude(kindpats, root):
                 """Returns the list of subinclude matcher args and the kindpats without the
                 subincludes in it."""
                 relmatchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'subinclude':
                         sourceroot = pathutil.dirname(util.normpath(source))
                         pat = util.pconvert(pat)
                         path = pathutil.join(sourceroot, pat)
                         newroot = pathutil.dirname(path)
                         matcherargs = (newroot, b'', [], [b'include:%s' % path])
                         prefix = pathutil.canonpath(root, root, newroot)
                         if prefix:
                             prefix += b'/'
                         relmatchers.append((prefix, matcherargs))
                     else:
                         other.append((kind, pat, source))
                 return relmatchers, other
             def _kindpatsalwaysmatch(kindpats):
                 """Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat, source in kindpats:
                     if pat != b'' or kind not in [b'relpath', b'glob']:
                         return False
                 return True
             def _buildkindpatsmatcher(
                 matchercls,
                 root,
                 cwd,
                 kindpats,
                 ctx=None,
                 listsubrepos=False,
                 badfn=None,
             ):
                 matchers = []
                 fms, kindpats = _expandsets(
                     cwd,
                     kindpats,
                     ctx=ctx,
                     listsubrepos=listsubrepos,
                     badfn=badfn,
                 )
                 if kindpats:
                     m = matchercls(root, kindpats, badfn=badfn)
                     matchers.append(m)
                 if fms:
                     matchers.extend(fms)
                 if not matchers:
                     return nevermatcher(badfn=badfn)
                 if len(matchers) == 1:
                     return matchers[0]
                 return unionmatcher(matchers)
             def match(
                 root,
                 cwd,
                 patterns=None,
                 include=None,
                 exclude=None,
                 default=b'glob',
                 auditor=None,
                 ctx=None,
                 listsubrepos=False,
                 warn=None,
                 badfn=None,
                 icasefs=False,
             ):
                 r"""build an object to match a set of file patterns
                 arguments:
                 root - the canonical root of the tree you're matching against
                 cwd - the current working directory, if relevant
                 patterns - patterns to find
                 include - patterns to include (unless they are excluded)
                 exclude - patterns to exclude (even if they are included)
                 default - if a pattern in patterns has no explicit type, assume this one
                 auditor - optional path auditor
                 ctx - optional changecontext
                 listsubrepos - if True, recurse into subrepositories
                 warn - optional function used for printing warnings
                 badfn - optional bad() callback for this matcher instead of the default
                 icasefs - make a matcher for wdir on case insensitive filesystems, which
                     normalizes the given patterns to the case in the filesystem
                 a pattern is one of:
                 'glob:<glob>' - a glob relative to cwd
                 're:<regexp>' - a regular expression
                 'path:<path>' - a path relative to repository root, which is matched
                                 recursively
                 'rootfilesin:<path>' - a path relative to repository root, which is
                                 matched non-recursively (will not match subdirectories)
                 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                 'relpath:<path>' - a path relative to cwd
                 'relre:<regexp>' - a regexp that needn't match the start of a name
                 'set:<fileset>' - a fileset expression
                 'include:<path>' - a file of patterns to read and include
                 'subinclude:<path>' - a file of patterns to match against files under
                                       the same directory
                 '<something>' - a pattern of the specified default type
                 >>> def _match(root, *args, **kwargs):
                 ...     return match(util.localpath(root), *args, **kwargs)
                 Usually a patternmatcher is returned:
                 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
                 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
                 Combining 'patterns' with 'include' (resp. 'exclude') gives an
                 intersectionmatcher (resp. a differencematcher):
                 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
                 <class 'mercurial.match.intersectionmatcher'>
                 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
                 <class 'mercurial.match.differencematcher'>
                 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
                 >>> _match(b'/foo', b'.', [])
                 <alwaysmatcher>
                 The 'default' argument determines which kind of pattern is assumed if a
                 pattern has no prefix:
                 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
                 <patternmatcher patterns='.*\\.c$'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
                 <patternmatcher patterns='main\\.py(?:/|$)'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
                 <patternmatcher patterns='main.py'>
                 The primary use of matchers is to check whether a value (usually a file
                 name) matches againset one of the patterns given at initialization. There
                 are two ways of doing this check.
                 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
 . Calling the matcher with a file name returns True if any pattern
                 matches that file name:
                 >>> m(b'a')
                 True
                 >>> m(b'main.c')
                 True
                 >>> m(b'test.py')
                 False
 . Using the exact() method only returns True if the file name matches one
                 of the exact patterns (i.e. not re: or glob: patterns):
                 >>> m.exact(b'a')
                 True
                 >>> m.exact(b'main.c')
                 False
                 """
                 assert os.path.isabs(root)
                 cwd = os.path.join(root, util.localpath(cwd))
                 normalize = _donormalize
                 if icasefs:
                     dirstate = ctx.repo().dirstate
                     dsnormalize = dirstate.normalize
                     def normalize(patterns, default, root, cwd, auditor, warn):
                         kp = _donormalize(patterns, default, root, cwd, auditor, warn)
                         kindpats = []
                         for kind, pats, source in kp:
                             if kind not in (b're', b'relre'):  # regex can't be normalized
                                 p = pats
                                 pats = dsnormalize(pats)
                                 # Preserve the original to handle a case only rename.
                                 if p != pats and p in dirstate:
                                     kindpats.append((kind, p, source))
                             kindpats.append((kind, pats, source))
                         return kindpats
                 if patterns:
                     kindpats = normalize(patterns, default, root, cwd, auditor, warn)
                     if _kindpatsalwaysmatch(kindpats):
                         m = alwaysmatcher(badfn)
                     else:
                         m = _buildkindpatsmatcher(
                             patternmatcher,
                             root,
                             cwd,
                             kindpats,
                             ctx=ctx,
                             listsubrepos=listsubrepos,
                             badfn=badfn,
                         )
                 else:
                     # It's a little strange that no patterns means to match everything.
                     # Consider changing this to match nothing (probably using nevermatcher).
                     m = alwaysmatcher(badfn)
                 if include:
                     kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
                     im = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = intersectmatchers(m, im)
                 if exclude:
                     kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
                     em = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = differencematcher(m, em)
                 return m
             def exact(files, badfn=None):
                 return exactmatcher(files, badfn=badfn)
             def always(badfn=None):
                 return alwaysmatcher(badfn)
             def never(badfn=None):
                 return nevermatcher(badfn)
             def badmatch(match, badfn):
                 """Make a copy of the given matcher, replacing its bad method with the given
                 one.
                 """
                 m = copy.copy(match)
                 m.bad = badfn
                 return m
             def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
                 """Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded."""
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in cwdrelativepatternkinds:
                         pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
                     elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
                         pat = util.normpath(pat)
                     elif kind in (b'listfile', b'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == b'listfile0':
                                 files = files.split(b'\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise error.Abort(_(b"unable to read file list (%s)") % pat)
                         for k, p, source in _donormalize(
                             files, default, root, cwd, auditor, warn
                         ):
                             kindpats.append((k, p, pat))
                         continue
                     elif kind == b'include':
                         try:
                             fullpath = os.path.join(root, util.localpath(pat))
                             includepats = readpatternfile(fullpath, warn)
                             for k, p, source in _donormalize(
                                 includepats, default, root, cwd, auditor, warn
                             ):
                                 kindpats.append((k, p, source or pat))
                         except error.Abort as inst:
                             raise error.Abort(
                                 b'%s: %s'
                                 % (
                                     pat,
                                     inst.message,
                                 )
                             )
                         except IOError as inst:
                             if warn:
                                 warn(
                                     _(b"skipping unreadable pattern file '%s': %s\n")
                                     % (pat, stringutil.forcebytestr(inst.strerror))
                                 )
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat, b''))
                 return kindpats
             class basematcher:
                 def __init__(self, badfn=None):
                     if badfn is not None:
                         self.bad = badfn
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     """Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message."""
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 @propertycache
                 def _files(self):
                     return []
                 def files(self):
                     """Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots"""
                     return self._files
                 @propertycache
                 def _fileset(self):
                     return set(self._files)
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fileset
                 def matchfn(self, f):
                     return False
                 def visitdir(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories. This is
                     based on the match's primary, included, and excluded patterns.
                     Returns the string 'all' if the given directory and all subdirectories
                     should be visited. Otherwise returns True or False indicating whether
                     the given directory should be visited.
                     """
                     return True
                 def visitchildrenset(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories, and
                     potentially lists which subdirectories of that directory should be
                     visited. This is based on the match's primary, included, and excluded
                     patterns.
                     This function is very similar to 'visitdir', and the following mapping
                     can be applied:
                          visitdir | visitchildrenlist
                         ----------+-------------------
                          False    | set()
                          'all'    | 'all'
                          True     | 'this' OR non-empty set of subdirs -or files- to visit
                     Example:
                       Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
                       the following values (assuming the implementation of visitchildrenset
                       is capable of recognizing this; some implementations are not).
                       '' -> {'foo', 'qux'}
                       'baz' -> set()
                       'foo' -> {'bar'}
                       # Ideally this would be 'all', but since the prefix nature of matchers
                       # is applied to the entire matcher, we have to downgrade this to
                       # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
                       # in.
                       'foo/bar' -> 'this'
                       'qux' -> 'this'
                     Important:
                       Most matchers do not know if they're representing files or
                       directories. They see ['path:dir/f'] and don't know whether 'f' is a
                       file or a directory, so visitchildrenset('dir') for most matchers will
                       return {'f'}, but if the matcher knows it's a file (like exactmatcher
                       does), it may return 'this'. Do not rely on the return being a set
                       indicating that there are no files in this dir to investigate (or
                       equivalently that if there are files to investigate in 'dir' that it
                       will always return 'this').
                     """
                     return b'this'
                 def always(self):
                     """Matcher will match everything and .files() will be empty --
                     optimization might be possible."""
                     return False
                 def isexact(self):
                     """Matcher will match exactly the list of files in .files() --
                     optimization might be possible."""
                     return False
                 def prefix(self):
                     """Matcher will match the paths in .files() recursively --
                     optimization might be possible."""
                     return False
                 def anypats(self):
                     """None of .always(), .isexact(), and .prefix() is true --
                     optimizations will be difficult."""
                     return not self.always() and not self.isexact() and not self.prefix()
             class alwaysmatcher(basematcher):
                 '''Matches everything.'''
                 def __init__(self, badfn=None):
                     super(alwaysmatcher, self).__init__(badfn)
                 def always(self):
                     return True
                 def matchfn(self, f):
                     return True
                 def visitdir(self, dir):
                     return b'all'
                 def visitchildrenset(self, dir):
                     return b'all'
                 def __repr__(self):
                     return r'<alwaysmatcher>'
             class nevermatcher(basematcher):
                 '''Matches nothing.'''
                 def __init__(self, badfn=None):
                     super(nevermatcher, self).__init__(badfn)
                 # It's a little weird to say that the nevermatcher is an exact matcher
                 # or a prefix matcher, but it seems to make sense to let callers take
                 # fast paths based on either. There will be no exact matches, nor any
                 # prefixes (files() returns []), so fast paths iterating over them should
                 # be efficient (and correct).
                 def isexact(self):
                     return True
                 def prefix(self):
                     return True
                 def visitdir(self, dir):
                     return False
                 def visitchildrenset(self, dir):
                     return set()
                 def __repr__(self):
                     return r'<nevermatcher>'
             class predicatematcher(basematcher):
                 """A matcher adapter for a simple boolean function"""
                 def __init__(self, predfn, predrepr=None, badfn=None):
                     super(predicatematcher, self).__init__(badfn)
                     self.matchfn = predfn
                     self._predrepr = predrepr
                 @encoding.strmethod
                 def __repr__(self):
                     s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
                         self.matchfn
                     )
                     return b'<predicatenmatcher pred=%s>' % s
             def path_or_parents_in_set(path, prefix_set):
                 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
                 l = len(prefix_set)
                 if l == 0:
                     return False
                 if path in prefix_set:
                     return True
                 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
                 # "walk up" the directory hierarchy instead, with the assumption that most
                 # directory hierarchies are relatively shallow and hash lookup is cheap.
                 if l > 5:
                     return any(
                         parentdir in prefix_set for parentdir in pathutil.finddirs(path)
                     )
                 # FIXME: Ideally we'd never get to this point if this is the case - we'd
                 # recognize ourselves as an 'always' matcher and skip this.
                 if b'' in prefix_set:
                     return True
                 sl = ord(b'/')
                 # We already checked that path isn't in prefix_set exactly, so
                 # `path[len(pf)] should never raise IndexError.
                 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
             class patternmatcher(basematcher):
                 r"""Matches a set of (kind, pat, source) against a 'root' directory.
                 >>> kindpats = [
                 ...     (b're', br'.*\.c$', b''),
                 ...     (b'path', b'foo/a', b''),
                 ...     (b'relpath', b'b', b''),
                 ...     (b'glob', b'*.h', b''),
                 ... ]
                 >>> m = patternmatcher(b'foo', kindpats)
                 >>> m(b'main.c')  # matches re:.*\.c$
                 True
                 >>> m(b'b.txt')
                 False
                 >>> m(b'foo/a')  # matches path:foo/a
                 True
                 >>> m(b'a')  # does not match path:b, since 'root' is 'foo'
                 False
                 >>> m(b'b')  # matches relpath:b, since 'root' is 'foo'
                 True
                 >>> m(b'lib.h')  # matches glob:*.h
                 True
                 >>> m.files()
                 [b'', b'foo/a', b'', b'b']
                 >>> m.exact(b'foo/a')
                 True
                 >>> m.exact(b'b')
                 True
                 >>> m.exact(b'lib.h')  # exact matches are for (rel)path kinds
                 False
                 """
                 def __init__(self, root, kindpats, badfn=None):
                     super(patternmatcher, self).__init__(badfn)
                     kindpats.sort()
                     self._files = _explicitfiles(kindpats)
                     self._prefix = _prefix(kindpats)
-                    self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
+                    self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
+                def matchfn(self, fn):
+                    if fn in self._fileset:
+                        return True
+                    return self._matchfn(fn)
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     if self._prefix and dir in self._fileset:
                         return b'all'
                     return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
                 def visitchildrenset(self, dir):
                     ret = self.visitdir(dir)
                     if ret is True:
                         return b'this'
                     elif not ret:
                         return set()
                     assert ret == b'all'
                     return b'all'
                 def prefix(self):
                     return self._prefix
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
             # This is basically a reimplementation of pathutil.dirs that stores the
             # children instead of just a count of them, plus a small optional optimization
             # to avoid some directories we don't need.
             class _dirchildren:
                 def __init__(self, paths, onlyinclude=None):
                     self._dirs = {}
                     self._onlyinclude = onlyinclude or []
                     addpath = self.addpath
                     for f in paths:
                         addpath(f)
                 def addpath(self, path):
                     if path == b'':
                         return
                     dirs = self._dirs
                     findsplitdirs = _dirchildren._findsplitdirs
                     for d, b in findsplitdirs(path):
                         if d not in self._onlyinclude:
                             continue
                         dirs.setdefault(d, set()).add(b)
                 @staticmethod
                 def _findsplitdirs(path):
                     # yields (dirname, basename) tuples, walking back to the root.  This is
                     # very similar to pathutil.finddirs, except:
                     #  - produces a (dirname, basename) tuple, not just 'dirname'
                     # Unlike manifest._splittopdir, this does not suffix `dirname` with a
                     # slash.
                     oldpos = len(path)
                     pos = path.rfind(b'/')
                     while pos != -1:
                         yield path[:pos], path[pos + 1 : oldpos]
                         oldpos = pos
                         pos = path.rfind(b'/', 0, pos)
                     yield b'', path[:oldpos]
                 def get(self, path):
                     return self._dirs.get(path, set())
             class includematcher(basematcher):
                 def __init__(self, root, kindpats, badfn=None):
                     super(includematcher, self).__init__(badfn)
                     if rustmod is not None:
                         # We need to pass the patterns to Rust because they can contain
                         # patterns from the user interface
                         self._kindpats = kindpats
                     self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
                     self._prefix = _prefix(kindpats)
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     # roots are directories which are recursively included.
                     self._roots = set(roots)
                     # dirs are directories which are non-recursively included.
                     self._dirs = set(dirs)
                     # parents are directories which are non-recursively included because
                     # they are needed to get to items in _dirs or _roots.
                     self._parents = parents
                 def visitdir(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     return (
                         dir in self._dirs
                         or dir in self._parents
                         or path_or_parents_in_set(dir, self._roots)
                     )
                 @propertycache
                 def _allparentschildren(self):
                     # It may seem odd that we add dirs, roots, and parents, and then
                     # restrict to only parents. This is to catch the case of:
                     #   dirs = ['foo/bar']
                     #   parents = ['foo']
                     # if we asked for the children of 'foo', but had only added
                     # self._parents, we wouldn't be able to respond ['bar'].
                     return _dirchildren(
                         itertools.chain(self._dirs, self._roots, self._parents),
                         onlyinclude=self._parents,
                     )
                 def visitchildrenset(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     # Note: this does *not* include the 'dir in self._parents' case from
                     # visitdir, that's handled below.
                     if (
                         b'' in self._roots
                         or dir in self._dirs
                         or path_or_parents_in_set(dir, self._roots)
                     ):
                         return b'this'
                     if dir in self._parents:
                         return self._allparentschildren.get(dir) or set()
                     return set()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
             class exactmatcher(basematcher):
                 r"""Matches the input files exactly. They are interpreted as paths, not
                 patterns (so no kind-prefixes).
                 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
                 >>> m(b'a.txt')
                 True
                 >>> m(b'b.txt')
                 False
                 Input files that would be matched are exactly those returned by .files()
                 >>> m.files()
                 ['a.txt', 're:.*\\.c$']
                 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
                 >>> m(b'main.c')
                 False
                 >>> m(br're:.*\.c$')
                 True
                 """
                 def __init__(self, files, badfn=None):
                     super(exactmatcher, self).__init__(badfn)
                     if isinstance(files, list):
                         self._files = files
                     else:
                         self._files = list(files)
                 matchfn = basematcher.exact
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     return dir in self._dirs
                 @propertycache
                 def _visitchildrenset_candidates(self):
                     """A memoized set of candidates for visitchildrenset."""
                     return self._fileset | self._dirs - {b''}
                 @propertycache
                 def _sorted_visitchildrenset_candidates(self):
                     """A memoized sorted list of candidates for visitchildrenset."""
                     return sorted(self._visitchildrenset_candidates)
                 def visitchildrenset(self, dir):
                     if not self._fileset or dir not in self._dirs:
                         return set()
                     if dir == b'':
                         candidates = self._visitchildrenset_candidates
                     else:
                         candidates = self._sorted_visitchildrenset_candidates
                         d = dir + b'/'
                         # Use bisect to find the first element potentially starting with d
                         # (i.e. >= d). This should always find at least one element (we'll
                         # assert later if this is not the case).
                         first = bisect.bisect_left(candidates, d)
                         # We need a representation of the first element that is > d that
                         # does not start with d, so since we added a `/` on the end of dir,
                         # we'll add whatever comes after slash (we could probably assume
                         # that `0` is after `/`, but let's not) to the end of dir instead.
                         dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
                         # Use bisect to find the first element >= d_next
                         last = bisect.bisect_left(candidates, dnext, lo=first)
                         dlen = len(d)
                         candidates = {c[dlen:] for c in candidates[first:last]}
                     # self._dirs includes all of the directories, recursively, so if
                     # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
                     # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     # '/' in it, indicating a it's for a subdir-of-a-subdir; the
                     # immediate subdir will be in there without a slash.
                     ret = {c for c in candidates if b'/' not in c}
                     # We really do not expect ret to be empty, since that would imply that
                     # there's something in _dirs that didn't have a file in _fileset.
                     assert ret
                     return ret
                 def isexact(self):
                     return True
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<exactmatcher files=%r>' % self._files
             class differencematcher(basematcher):
                 """Composes two matchers by matching if the first matches and the second
                 does not.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 def __init__(self, m1, m2):
                     super(differencematcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 def matchfn(self, f):
                     return self._m1(f) and not self._m2(f)
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         return [f for f in self._m1.files() if self(f)]
                     # If m1 is not an exact matcher, we can't easily figure out the set of
                     # files, because its files() are not always files. For example, if
                     # m1 is "path:dir" and m2 is "rootfileins:.", we don't
                     # want to remove "dir" from the set even though it would match m2,
                     # because the "dir" in m1 may not be a file.
                     return self._m1.files()
                 def visitdir(self, dir):
                     if self._m2.visitdir(dir) == b'all':
                         return False
                     elif not self._m2.visitdir(dir):
                         # m2 does not match dir, we can return 'all' here if possible
                         return self._m1.visitdir(dir)
                     return bool(self._m1.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m2_set = self._m2.visitchildrenset(dir)
                     if m2_set == b'all':
                         return set()
                     m1_set = self._m1.visitchildrenset(dir)
                     # Possible values for m1: 'all', 'this', set(...), set()
                     # Possible values for m2:        'this', set(...), set()
                     # If m2 has nothing under here that we care about, return m1, even if
                     # it's 'all'. This is a change in behavior from visitdir, which would
                     # return True, not 'all', for some reason.
                     if not m2_set:
                         return m1_set
                     if m1_set in [b'all', b'this']:
                         # Never return 'all' here if m2_set is any kind of non-empty (either
                         # 'this' or set(foo)), since m2 might return set() for a
                         # subdirectory.
                         return b'this'
                     # Possible values for m1:         set(...), set()
                     # Possible values for m2: 'this', set(...)
                     # We ignore m2's set results. They're possibly incorrect:
                     #  m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
                     #    m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
                     #    return set(), which is *not* correct, we still need to visit 'dir'!
                     return m1_set
                 def isexact(self):
                     return self._m1.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
             def intersectmatchers(m1, m2):
                 """Composes two matchers by matching if both of them match.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 if m1 is None or m2 is None:
                     return m1 or m2
                 if m1.always():
                     m = copy.copy(m2)
                     # TODO: Consider encapsulating these things in a class so there's only
                     # one thing to copy from m1.
                     m.bad = m1.bad
                     m.traversedir = m1.traversedir
                     return m
                 if m2.always():
                     m = copy.copy(m1)
                     return m
                 return intersectionmatcher(m1, m2)
             class intersectionmatcher(basematcher):
                 def __init__(self, m1, m2):
                     super(intersectionmatcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         m1, m2 = self._m1, self._m2
                         if not m1.isexact():
                             m1, m2 = m2, m1
                         return [f for f in m1.files() if m2(f)]
                     # It neither m1 nor m2 is an exact matcher, we can't easily intersect
                     # the set of files, because their files() are not always files. For
                     # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
                     # "path:dir2", we don't want to remove "dir2" from the set.
                     return self._m1.files() + self._m2.files()
                 def matchfn(self, f):
                     return self._m1(f) and self._m2(f)
                 def visitdir(self, dir):
                     visit1 = self._m1.visitdir(dir)
                     if visit1 == b'all':
                         return self._m2.visitdir(dir)
                     # bool() because visit1=True + visit2='all' should not be 'all'
                     return bool(visit1 and self._m2.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m1_set = self._m1.visitchildrenset(dir)
                     if not m1_set:
                         return set()
                     m2_set = self._m2.visitchildrenset(dir)
                     if not m2_set:
                         return set()
                     if m1_set == b'all':
                         return m2_set
                     elif m2_set == b'all':
                         return m1_set
                     if m1_set == b'this' or m2_set == b'this':
                         return b'this'
                     assert isinstance(m1_set, set) and isinstance(m2_set, set)
                     return m1_set.intersection(m2_set)
                 def always(self):
                     return self._m1.always() and self._m2.always()
                 def isexact(self):
                     return self._m1.isexact() or self._m2.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
             class subdirmatcher(basematcher):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> from . import pycompat
                 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
                 >>> m2 = subdirmatcher(b'sub', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'b.txt')
                 True
                 >>> m2.matchfn(b'a.txt')
                 False
                 >>> m2.matchfn(b'b.txt')
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact(b'b.txt')
                 True
                 >>> def bad(f, msg):
                 ...     print(pycompat.sysstr(b"%s: %s" % (f, msg)))
                 >>> m1.bad = bad
                 >>> m2.bad(b'x.txt', b'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     super(subdirmatcher, self).__init__()
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher.always()
                     self._files = [
                         f[len(path) + 1 :]
                         for f in matcher._files
                         if f.startswith(path + b"/")
                     ]
                     # If the parent repo had a path to this subrepo and the matcher is
                     # a prefix matcher, this submatcher always matches.
                     if matcher.prefix():
                         self._always = any(f == path for f in matcher._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + b"/" + f, msg)
                 def matchfn(self, f):
                     # Some information is lost in the superclass's constructor, so we
                     # can not accurately create the matching function for the subdirectory
                     # from the inputs. Instead, we override matchfn() and visitdir() to
                     # call the original matcher with the subdirectory path prepended.
                     return self._matcher.matchfn(self._path + b"/" + f)
                 def visitdir(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitdir(dir)
                 def visitchildrenset(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitchildrenset(dir)
                 def always(self):
                     return self._always
                 def prefix(self):
                     return self._matcher.prefix() and not self._always
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<subdirmatcher path=%r, matcher=%r>' % (
                         self._path,
                         self._matcher,
                     )
             class prefixdirmatcher(basematcher):
                 """Adapt a matcher to work on a parent directory.
                 The matcher's non-matching-attributes (bad, traversedir) are ignored.
                 The prefix path should usually be the relative path from the root of
                 this matcher to the root of the wrapped matcher.
                 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
                 >>> m2 = prefixdirmatcher(b'd/e', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'd/e/a.txt')
                 True
                 >>> m2(b'd/e/b.txt')
                 False
                 >>> m2.files()
                 ['d/e/a.txt', 'd/e/f/b.txt']
                 >>> m2.exact(b'd/e/a.txt')
                 True
                 >>> m2.visitdir(b'd')
                 True
                 >>> m2.visitdir(b'd/e')
                 True
                 >>> m2.visitdir(b'd/e/f')
                 True
                 >>> m2.visitdir(b'd/e/g')
                 False
                 >>> m2.visitdir(b'd/ef')
                 False
                 """
                 def __init__(self, path, matcher, badfn=None):
                     super(prefixdirmatcher, self).__init__(badfn)
                     if not path:
                         raise error.ProgrammingError(b'prefix path must not be empty')
                     self._path = path
                     self._pathprefix = path + b'/'
                     self._matcher = matcher
                 @propertycache
                 def _files(self):
                     return [self._pathprefix + f for f in self._matcher._files]
                 def matchfn(self, f):
                     if not f.startswith(self._pathprefix):
                         return False
                     return self._matcher.matchfn(f[len(self._pathprefix) :])
                 @propertycache
                 def _pathdirs(self):
                     return set(pathutil.finddirs(self._path))
                 def visitdir(self, dir):
                     if dir == self._path:
                         return self._matcher.visitdir(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitdir(dir[len(self._pathprefix) :])
                     return dir in self._pathdirs
                 def visitchildrenset(self, dir):
                     if dir == self._path:
                         return self._matcher.visitchildrenset(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
                     if dir in self._pathdirs:
                         return b'this'
                     return set()
                 def isexact(self):
                     return self._matcher.isexact()
                 def prefix(self):
                     return self._matcher.prefix()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<prefixdirmatcher path=%r, matcher=%r>' % (
                         pycompat.bytestr(self._path),
                         self._matcher,
                     )
             class unionmatcher(basematcher):
                 """A matcher that is the union of several matchers.
                 The non-matching-attributes (bad, traversedir) are taken from the first
                 matcher.
                 """
                 def __init__(self, matchers):
                     m1 = matchers[0]
                     super(unionmatcher, self).__init__()
                     self.traversedir = m1.traversedir
                     self._matchers = matchers
                 def matchfn(self, f):
                     for match in self._matchers:
                         if match(f):
                             return True
                     return False
                 def visitdir(self, dir):
                     r = False
                     for m in self._matchers:
                         v = m.visitdir(dir)
                         if v == b'all':
                             return v
                         r |= v
                     return r
                 def visitchildrenset(self, dir):
                     r = set()
                     this = False
                     for m in self._matchers:
                         v = m.visitchildrenset(dir)
                         if not v:
                             continue
                         if v == b'all':
                             return v
                         if this or v == b'this':
                             this = True
                             # don't break, we might have an 'all' in here.
                             continue
                         assert isinstance(v, set)
                         r = r.union(v)
                     if this:
                         return b'this'
                     return r
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<unionmatcher matchers=%r>' % self._matchers
             def patkind(pattern, default=None):
                 r"""If pattern is 'kind:pat' with a known kind, return kind.
                 >>> patkind(br're:.*\.c$')
                 're'
                 >>> patkind(b'glob:*.c')
                 'glob'
                 >>> patkind(b'relpath:test.py')
                 'relpath'
                 >>> patkind(b'main.py')
                 >>> patkind(b'main.py', default=b're')
                 're'
                 """
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if b':' in pattern:
                     kind, pat = pattern.split(b':', 1)
                     if kind in allpatternkinds:
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r"""Convert an extended glob string to a regexp string.
                 >>> from . import pycompat
                 >>> def bprint(s):
                 ...     print(pycompat.sysstr(s))
                 >>> bprint(_globre(br'?'))
                 .
                 >>> bprint(_globre(br'*'))
                 [^/]*
                 >>> bprint(_globre(br'**'))
                 .*
                 >>> bprint(_globre(br'**/a'))
                 (?:.*/)?a
                 >>> bprint(_globre(br'a/**/b'))
                 a/(?:.*/)?b
                 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
                 [a*?!^][\^b][^c]
                 >>> bprint(_globre(br'{a,b}'))
                 (?:a|b)
                 >>> bprint(_globre(br'.\*\?'))
                 \.\*\?
                 """
                 i, n = 0, len(pat)
                 res = b''
                 group = 0
                 escape = util.stringutil.regexbytesescapemap.get
                 def peek():
                     return i < n and pat[i : i + 1]
                 while i < n:
                     c = pat[i : i + 1]
                     i += 1
                     if c not in b'*?[{},\\':
                         res += escape(c, c)
                     elif c == b'*':
                         if peek() == b'*':
                             i += 1
                             if peek() == b'/':
                                 i += 1
                                 res += b'(?:.*/)?'
                             else:
                                 res += b'.*'
                         else:
                             res += b'[^/]*'
                     elif c == b'?':
                         res += b'.'
                     elif c == b'[':
                         j = i
                         if j < n and pat[j : j + 1] in b'!]':
                             j += 1
                         while j < n and pat[j : j + 1] != b']':
                             j += 1
                         if j >= n:
                             res += b'\\['
                         else:
                             stuff = pat[i:j].replace(b'\\', b'\\\\')
                             i = j + 1
                             if stuff[0:1] == b'!':
                                 stuff = b'^' + stuff[1:]
                             elif stuff[0:1] == b'^':
                                 stuff = b'\\' + stuff
                             res = b'%s[%s]' % (res, stuff)
                     elif c == b'{':
                         group += 1
                         res += b'(?:'
                     elif c == b'}' and group:
                         res += b')'
                         group -= 1
                     elif c == b',' and group:
                         res += b'|'
                     elif c == b'\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p, p)
                         else:
                             res += escape(c, c)
                     else:
                         res += escape(c, c)
                 return res
             FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
             def _regex(kind, pat, globsuffix):
                 """Convert a (normalized) pattern of any kind into a
                 regular expression.
                 globsuffix is appended to the regexp of globs."""
                 if not pat and kind in (b'glob', b'relpath'):
                     return b''
                 if kind == b're':
                     return pat
                 if kind in (b'path', b'relpath'):
                     if pat == b'.':
                         return b''
                     return util.stringutil.reescape(pat) + b'(?:/|$)'
                 if kind == b'rootfilesin':
                     if pat == b'.':
                         escaped = b''
                     else:
                         # Pattern is a directory name.
                         escaped = util.stringutil.reescape(pat) + b'/'
                     # Anything after the pattern must be a non-directory.
                     return escaped + b'[^/]+$'
                 if kind == b'relglob':
                     globre = _globre(pat)
                     if globre.startswith(b'[^/]*'):
                         # When pat has the form *XYZ (common), make the returned regex more
                         # legible by returning the regex for **XYZ instead of **/*XYZ.
                         return b'.*' + globre[len(b'[^/]*') :] + globsuffix
                     return b'(?:|.*/)' + globre + globsuffix
                 if kind == b'relre':
                     flag = None
                     m = FLAG_RE.match(pat)
                     if m:
                         flag, pat = m.groups()
                     if not pat.startswith(b'^'):
                         pat = b'.*' + pat
                     if flag is not None:
                         pat = br'(?%s:%s)' % (flag, pat)
                     return pat
                 if kind in (b'glob', b'rootglob'):
                     return _globre(pat) + globsuffix
                 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
             def _buildmatch(kindpats, globsuffix, root):
                 """Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs."""
                 matchfuncs = []
                 subincludes, kindpats = _expandsubinclude(kindpats, root)
                 if subincludes:
                     submatchers = {}
                     def matchsubinclude(f):
                         for prefix, matcherargs in subincludes:
                             if f.startswith(prefix):
                                 mf = submatchers.get(prefix)
                                 if mf is None:
                                     mf = match(*matcherargs)
                                     submatchers[prefix] = mf
                                 if mf(f[len(prefix) :]):
                                     return True
                         return False
                     matchfuncs.append(matchsubinclude)
                 regex = b''
                 if kindpats:
                     if all(k == b'rootfilesin' for k, p, s in kindpats):
                         dirs = {p for k, p, s in kindpats}
                         def mf(f):
                             i = f.rfind(b'/')
                             if i >= 0:
                                 dir = f[:i]
                             else:
                                 dir = b'.'
                             return dir in dirs
                         regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
                         matchfuncs.append(mf)
                     else:
                         regex, mf = _buildregexmatch(kindpats, globsuffix)
                         matchfuncs.append(mf)
                 if len(matchfuncs) == 1:
                     return regex, matchfuncs[0]
                 else:
                     return regex, lambda f: any(mf(f) for mf in matchfuncs)
             MAX_RE_SIZE = 20000
             def _joinregexes(regexps):
                 """gather multiple regular expressions into a single one"""
                 return b'|'.join(regexps)
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function.
                 Test too large input
                 >>> _buildregexmatch([
                 ...     (b'relglob', b'?' * MAX_RE_SIZE, b'')
                 ... ], b'$')
                 Traceback (most recent call last):
                 ...
                 Abort: matcher pattern is too long (20009 bytes)
                 """
                 try:
                     allgroups = []
                     regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
                     fullregexp = _joinregexes(regexps)
                     startidx = 0
                     groupsize = 0
                     for idx, r in enumerate(regexps):
                         piecesize = len(r)
                         if piecesize > MAX_RE_SIZE:
                             msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
                             raise error.Abort(msg)
                         elif (groupsize + piecesize) > MAX_RE_SIZE:
                             group = regexps[startidx:idx]
                             allgroups.append(_joinregexes(group))
                             startidx = idx
                             groupsize = 0
                         groupsize += piecesize + 1
                     if startidx == 0:
                         matcher = _rematcher(fullregexp)
                         func = lambda s: bool(matcher(s))
                     else:
                         group = regexps[startidx:]
                         allgroups.append(_joinregexes(group))
                         allmatchers = [_rematcher(g) for g in allgroups]
                         func = lambda s: any(m(s) for m in allmatchers)
                     return fullregexp, func
                 except re.error:
                     for k, p, s in kindpats:
                         try:
                             _rematcher(_regex(k, p, globsuffix))
                         except re.error:
                             if s:
                                 raise error.Abort(
                                     _(b"%s: invalid pattern (%s): %s") % (s, k, p)
                                 )
                             else:
                                 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
                     raise error.Abort(_(b"invalid pattern"))
             def _patternrootsanddirs(kindpats):
                 """Returns roots and directories corresponding to each pattern.
                 This calculates the roots and directories exactly matching the patterns and
                 returns a tuple of (roots, dirs) for each. It does not return other
                 directories which may also need to be considered, like the parent
                 directories.
                 """
                 r = []
                 d = []
                 for kind, pat, source in kindpats:
                     if kind in (b'glob', b'rootglob'):  # find the non-glob prefix
                         root = []
                         for p in pat.split(b'/'):
                             if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
                                 break
                             root.append(p)
                         r.append(b'/'.join(root))
                     elif kind in (b'relpath', b'path'):
                         if pat == b'.':
                             pat = b''
                         r.append(pat)
                     elif kind in (b'rootfilesin',):
                         if pat == b'.':
                             pat = b''
                         d.append(pat)
                     else:  # relglob, re, relre
                         r.append(b'')
                 return r, d
             def _roots(kindpats):
                 '''Returns root directories to match recursively from the given patterns.'''
                 roots, dirs = _patternrootsanddirs(kindpats)
                 return roots
             def _rootsdirsandparents(kindpats):
                 """Returns roots and exact directories from patterns.
                 `roots` are directories to match recursively, `dirs` should
                 be matched non-recursively, and `parents` are the implicitly required
                 directories to walk to items in either roots or dirs.
                 Returns a tuple of (roots, dirs, parents).
                 >>> r = _rootsdirsandparents(
                 ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
                 ...      (b'glob', b'g*', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['g/h', 'g/h', ''], []) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 ([], ['g/h', '']) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
                 ...      (b'path', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['r', 'p/p', ''], []) ['', 'p']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
                 ...      (b'relre', b'rr', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['', '', ''], []) ['']
                 """
                 r, d = _patternrootsanddirs(kindpats)
                 p = set()
                 # Add the parents as non-recursive/exact directories, since they must be
                 # scanned to get to either the roots or the other exact directories.
                 p.update(pathutil.dirs(d))
                 p.update(pathutil.dirs(r))
                 # FIXME: all uses of this function convert these to sets, do so before
                 # returning.
                 # FIXME: all uses of this function do not need anything in 'roots' and
                 # 'dirs' to also be in 'parents', consider removing them before returning.
                 return r, d, p
             def _explicitfiles(kindpats):
                 """Returns the potential explicit filenames from the patterns.
                 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
                 ['foo/bar']
                 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
                 []
                 """
                 # Keep only the pattern kinds where one can specify filenames (vs only
                 # directory names).
                 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
                 return _roots(filable)
             def _prefix(kindpats):
                 '''Whether all the patterns match a prefix (i.e. recursively)'''
                 for kind, pat, source in kindpats:
                     if kind not in (b'path', b'relpath'):
                         return False
                 return True
             _commentre = None
             def readpatternfile(filepath, warn, sourceinfo=False):
                 """parse a pattern file, returning a list of
                 patterns. These patterns should be given to compile()
                 to be validated and converted into a match function.
                 trailing white space is dropped.
                 the escape character is backslash.
                 comments start with #.
                 empty lines are skipped.
                 lines can be of the following formats:
                 syntax: regexp # defaults following lines to non-rooted regexps
                 syntax: glob   # defaults following lines to non-rooted globs
                 re:pattern     # non-rooted regular expression
                 glob:pattern   # non-rooted glob
                 rootglob:pat   # rooted glob (same root as ^ in regexps)
                 pattern        # pattern of the current default type
                 if sourceinfo is set, returns a list of tuples:
                 (pattern, lineno, originalline).
                 This is useful to debug ignore patterns.
                 """
                 syntaxes = {
                     b're': b'relre:',
                     b'regexp': b'relre:',
                     b'glob': b'relglob:',
                     b'rootglob': b'rootglob:',
                     b'include': b'include',
                     b'subinclude': b'subinclude',
                 }
                 syntax = b'relre:'
                 patterns = []
                 fp = open(filepath, b'rb')
                 for lineno, line in enumerate(fp, start=1):
                     if b"#" in line:
                         global _commentre
                         if not _commentre:
                             _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
                         # remove comments prefixed by an even number of escapes
                         m = _commentre.search(line)
                         if m:
                             line = line[: m.end(1)]
                         # fixup properly escaped comments that survived the above
                         line = line.replace(b"\\#", b"#")
                     line = line.rstrip()
                     if not line:
                         continue
                     if line.startswith(b'syntax:'):
                         s = line[7:].strip()
                         try:
                             syntax = syntaxes[s]
                         except KeyError:
                             if warn:
                                 warn(
                                     _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
                                 )
                         continue
                     linesyntax = syntax
                     for s, rels in syntaxes.items():
                         if line.startswith(rels):
                             linesyntax = rels
                             line = line[len(rels) :]
                             break
                         elif line.startswith(s + b':'):
                             linesyntax = rels
                             line = line[len(s) + 1 :]
                             break
                     if sourceinfo:
                         patterns.append((linesyntax + line, lineno, line))
                     else:
                         patterns.append(linesyntax + line)
                 fp.close()
                 return patterns