upstream/mercurial-mirror Commit - r44114:5e1b0470

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import copy

10

import copy

11

import itertools

11

import itertools

12

import os

12

import os

13

import re

13

import re

14

15

from .i18n import _

15

from .i18n import _

16

from .pycompat import open

16

from .pycompat import open

17

from . import (

17

from . import (

18

encoding,

18

encoding,

19

error,

19

error,

20

pathutil,

20

pathutil,

21

policy,

21

policy,

22

pycompat,

22

pycompat,

23

util,

23

util,

24

)

24

)

25

from .utils import stringutil

25

from .utils import stringutil

26

27

rustmod = policy.importrust('filepatterns')

27

rustmod = policy.importrust('filepatterns')

28

29

allpatternkinds = (

29

allpatternkinds = (

30

b're',

30

b're',

31

b'glob',

31

b'glob',

32

b'path',

32

b'path',

33

b'relglob',

33

b'relglob',

34

b'relpath',

34

b'relpath',

35

b'relre',

35

b'relre',

36

b'rootglob',

36

b'rootglob',

37

b'listfile',

37

b'listfile',

38

b'listfile0',

38

b'listfile0',

39

b'set',

39

b'set',

40

b'include',

40

b'include',

41

b'subinclude',

41

b'subinclude',

42

b'rootfilesin',

42

b'rootfilesin',

43

)

43

)

44

cwdrelativepatternkinds = (b'relpath', b'glob')

44

cwdrelativepatternkinds = (b'relpath', b'glob')

45

46

propertycache = util.propertycache

46

propertycache = util.propertycache

47

48

49

def _rematcher(regex):

49

def _rematcher(regex):

50

'''compile the regexp with the best available regexp engine and return a

50

'''compile the regexp with the best available regexp engine and return a

51

matcher function'''

51

matcher function'''

52

m = util.re.compile(regex)

52

m = util.re.compile(regex)

53

try:

53

try:

54

# slightly faster, provided by facebook's re2 bindings

54

# slightly faster, provided by facebook's re2 bindings

55

return m.test_match

55

return m.test_match

56

except AttributeError:

56

except AttributeError:

57

return m.match

57

return m.match

58

59

60

def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):

60

def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

62

matchers = []

62

matchers = []

63

other = []

63

other = []

64

65

for kind, pat, source in kindpats:

65

for kind, pat, source in kindpats:

66

if kind == b'set':

66

if kind == b'set':

67

if ctx is None:

67

if ctx is None:

68

raise error.ProgrammingError(

68

raise error.ProgrammingError(

69

b"fileset expression with no context"

69

b"fileset expression with no context"

70

)

70

)

71

matchers.append(ctx.matchfileset(pat, badfn=badfn))

71

matchers.append(ctx.matchfileset(pat, badfn=badfn))

72

73

if listsubrepos:

73

if listsubrepos:

74

for subpath in ctx.substate:

74

for subpath in ctx.substate:

75

sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)

75

sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

77

matchers.append(pm)

77

matchers.append(pm)

78

79

continue

79

continue

80

other.append((kind, pat, source))

80

other.append((kind, pat, source))

81

return matchers, other

81

return matchers, other

82

83

84

def _expandsubinclude(kindpats, root):

84

def _expandsubinclude(kindpats, root):

85

'''Returns the list of subinclude matcher args and the kindpats without the

85

'''Returns the list of subinclude matcher args and the kindpats without the

86

subincludes in it.'''

86

subincludes in it.'''

87

relmatchers = []

87

relmatchers = []

88

other = []

88

other = []

89

90

for kind, pat, source in kindpats:

90

for kind, pat, source in kindpats:

91

if kind == b'subinclude':

91

if kind == b'subinclude':

92

sourceroot = pathutil.dirname(util.normpath(source))

92

sourceroot = pathutil.dirname(util.normpath(source))

93

pat = util.pconvert(pat)

93

pat = util.pconvert(pat)

94

path = pathutil.join(sourceroot, pat)

94

path = pathutil.join(sourceroot, pat)

95

96

newroot = pathutil.dirname(path)

96

newroot = pathutil.dirname(path)

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

98

99

prefix = pathutil.canonpath(root, root, newroot)

99

prefix = pathutil.canonpath(root, root, newroot)

100

if prefix:

100

if prefix:

101

prefix += b'/'

101

prefix += b'/'

102

relmatchers.append((prefix, matcherargs))

102

relmatchers.append((prefix, matcherargs))

103

else:

103

else:

104

other.append((kind, pat, source))

104

other.append((kind, pat, source))

105

106

return relmatchers, other

106

return relmatchers, other

107

108

109

def _kindpatsalwaysmatch(kindpats):

109

def _kindpatsalwaysmatch(kindpats):

110

""""Checks whether the kindspats match everything, as e.g.

110

""""Checks whether the kindspats match everything, as e.g.

111

'relpath:.' does.

111

'relpath:.' does.

112

"""

112

"""

113

for kind, pat, source in kindpats:

113

for kind, pat, source in kindpats:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

115

return False

115

return False

116

return True

116

return True

117

118

119

def _buildkindpatsmatcher(

119

def _buildkindpatsmatcher(

120

matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None

120

matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None

121

):

121

):

122

matchers = []

122

matchers = []

123

fms, kindpats = _expandsets(

123

fms, kindpats = _expandsets(

124

kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn

124

kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn

125

)

125

)

126

if kindpats:

126

if kindpats:

127

m = matchercls(root, kindpats, badfn=badfn)

127

m = matchercls(root, kindpats, badfn=badfn)

128

matchers.append(m)

128

matchers.append(m)

129

if fms:

129

if fms:

130

matchers.extend(fms)

130

matchers.extend(fms)

131

if not matchers:

131

if not matchers:

132

return nevermatcher(badfn=badfn)

132

return nevermatcher(badfn=badfn)

133

if len(matchers) == 1:

133

if len(matchers) == 1:

134

return matchers[0]

134

return matchers[0]

135

return unionmatcher(matchers)

135

return unionmatcher(matchers)

136

137

138

def match(

138

def match(

139

root,

139

root,

140

cwd,

140

cwd,

141

patterns=None,

141

patterns=None,

142

include=None,

142

include=None,

143

exclude=None,

143

exclude=None,

144

default=b'glob',

144

default=b'glob',

145

auditor=None,

145

auditor=None,

146

ctx=None,

146

ctx=None,

147

listsubrepos=False,

147

listsubrepos=False,

148

warn=None,

148

warn=None,

149

badfn=None,

149

badfn=None,

150

icasefs=False,

150

icasefs=False,

151

):

151

):

152

r"""build an object to match a set of file patterns

152

r"""build an object to match a set of file patterns

153

154

arguments:

154

arguments:

155

root - the canonical root of the tree you're matching against

155

root - the canonical root of the tree you're matching against

156

cwd - the current working directory, if relevant

156

cwd - the current working directory, if relevant

157

patterns - patterns to find

157

patterns - patterns to find

158

include - patterns to include (unless they are excluded)

158

include - patterns to include (unless they are excluded)

159

exclude - patterns to exclude (even if they are included)

159

exclude - patterns to exclude (even if they are included)

160

default - if a pattern in patterns has no explicit type, assume this one

160

default - if a pattern in patterns has no explicit type, assume this one

161

auditor - optional path auditor

161

auditor - optional path auditor

162

ctx - optional changecontext

162

ctx - optional changecontext

163

listsubrepos - if True, recurse into subrepositories

163

listsubrepos - if True, recurse into subrepositories

164

warn - optional function used for printing warnings

164

warn - optional function used for printing warnings

165

badfn - optional bad() callback for this matcher instead of the default

165

badfn - optional bad() callback for this matcher instead of the default

166

icasefs - make a matcher for wdir on case insensitive filesystems, which

166

icasefs - make a matcher for wdir on case insensitive filesystems, which

167

normalizes the given patterns to the case in the filesystem

167

normalizes the given patterns to the case in the filesystem

168

169

a pattern is one of:

169

a pattern is one of:

170

'glob:<glob>' - a glob relative to cwd

170

'glob:<glob>' - a glob relative to cwd

171

're:<regexp>' - a regular expression

171

're:<regexp>' - a regular expression

172

'path:<path>' - a path relative to repository root, which is matched

172

'path:<path>' - a path relative to repository root, which is matched

173

recursively

173

recursively

174

'rootfilesin:<path>' - a path relative to repository root, which is

174

'rootfilesin:<path>' - a path relative to repository root, which is

175

matched non-recursively (will not match subdirectories)

175

matched non-recursively (will not match subdirectories)

176

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

176

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

177

'relpath:<path>' - a path relative to cwd

177

'relpath:<path>' - a path relative to cwd

178

'relre:<regexp>' - a regexp that needn't match the start of a name

178

'relre:<regexp>' - a regexp that needn't match the start of a name

179

'set:<fileset>' - a fileset expression

179

'set:<fileset>' - a fileset expression

180

'include:<path>' - a file of patterns to read and include

180

'include:<path>' - a file of patterns to read and include

181

'subinclude:<path>' - a file of patterns to match against files under

181

'subinclude:<path>' - a file of patterns to match against files under

182

the same directory

182

the same directory

183

'<something>' - a pattern of the specified default type

183

'<something>' - a pattern of the specified default type

184

185

Usually a patternmatcher is returned:

185

Usually a patternmatcher is returned:

186

>>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

186

>>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

187

187

188

189

Combining 'patterns' with 'include' (resp. 'exclude') gives an

189

Combining 'patterns' with 'include' (resp. 'exclude') gives an

190

intersectionmatcher (resp. a differencematcher):

190

intersectionmatcher (resp. a differencematcher):

191

>>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

191

>>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

192

192

193

>>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

193

>>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

194

194

195

196

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

196

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

197

>>> match(b'foo', b'.', [])

197

>>> match(b'foo', b'.', [])

198

198

199

200

The 'default' argument determines which kind of pattern is assumed if a

200

The 'default' argument determines which kind of pattern is assumed if a

201

pattern has no prefix:

201

pattern has no prefix:

202

>>> match(b'foo', b'.', [b'.*\.c$'], default=b're')

202

>>> match(b'foo', b'.', [b'.*\.c$'], default=b're')

203

203

204

>>> match(b'foo', b'.', [b'main.py'], default=b'relpath')

204

>>> match(b'foo', b'.', [b'main.py'], default=b'relpath')

205

205

206

>>> match(b'foo', b'.', [b'main.py'], default=b're')

206

>>> match(b'foo', b'.', [b'main.py'], default=b're')

207

207

208

209

The primary use of matchers is to check whether a value (usually a file

209

The primary use of matchers is to check whether a value (usually a file

210

name) matches againset one of the patterns given at initialization. There

210

name) matches againset one of the patterns given at initialization. There

211

are two ways of doing this check.

211

are two ways of doing this check.

212

213

>>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])

213

>>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])

214

215

1. Calling the matcher with a file name returns True if any pattern

215

1. Calling the matcher with a file name returns True if any pattern

216

matches that file name:

216

matches that file name:

217

>>> m(b'a')

217

>>> m(b'a')

218

True

218

True

219

>>> m(b'main.c')

219

>>> m(b'main.c')

220

True

220

True

221

>>> m(b'test.py')

221

>>> m(b'test.py')

222

False

222

False

223

224

2. Using the exact() method only returns True if the file name matches one

224

2. Using the exact() method only returns True if the file name matches one

225

of the exact patterns (i.e. not re: or glob: patterns):

225

of the exact patterns (i.e. not re: or glob: patterns):

226

>>> m.exact(b'a')

226

>>> m.exact(b'a')

227

True

227

True

228

>>> m.exact(b'main.c')

228

>>> m.exact(b'main.c')

229

False

229

False

230

"""

230

"""

231

normalize = _donormalize

231

normalize = _donormalize

232

if icasefs:

232

if icasefs:

233

dirstate = ctx.repo().dirstate

233

dirstate = ctx.repo().dirstate

234

dsnormalize = dirstate.normalize

234

dsnormalize = dirstate.normalize

235

236

def normalize(patterns, default, root, cwd, auditor, warn):

236

def normalize(patterns, default, root, cwd, auditor, warn):

237

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

237

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

238

kindpats = []

238

kindpats = []

239

for kind, pats, source in kp:

239

for kind, pats, source in kp:

240

if kind not in (b're', b'relre'): # regex can't be normalized

240

if kind not in (b're', b'relre'): # regex can't be normalized

241

p = pats

241

p = pats

242

pats = dsnormalize(pats)

242

pats = dsnormalize(pats)

243

244

# Preserve the original to handle a case only rename.

244

# Preserve the original to handle a case only rename.

245

if p != pats and p in dirstate:

245

if p != pats and p in dirstate:

246

kindpats.append((kind, p, source))

246

kindpats.append((kind, p, source))

247

248

kindpats.append((kind, pats, source))

248

kindpats.append((kind, pats, source))

249

return kindpats

249

return kindpats

250

251

if patterns:

251

if patterns:

252

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

252

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

253

if _kindpatsalwaysmatch(kindpats):

253

if _kindpatsalwaysmatch(kindpats):

254

m = alwaysmatcher(badfn)

254

m = alwaysmatcher(badfn)

255

else:

255

else:

256

m = _buildkindpatsmatcher(

256

m = _buildkindpatsmatcher(

257

patternmatcher,

257

patternmatcher,

258

root,

258

root,

259

kindpats,

259

kindpats,

260

ctx=ctx,

260

ctx=ctx,

261

listsubrepos=listsubrepos,

261

listsubrepos=listsubrepos,

262

badfn=badfn,

262

badfn=badfn,

263

)

263

)

264

else:

264

else:

265

# It's a little strange that no patterns means to match everything.

265

# It's a little strange that no patterns means to match everything.

266

# Consider changing this to match nothing (probably using nevermatcher).

266

# Consider changing this to match nothing (probably using nevermatcher).

267

m = alwaysmatcher(badfn)

267

m = alwaysmatcher(badfn)

268

269

if include:

269

if include:

270

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

270

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

271

im = _buildkindpatsmatcher(

271

im = _buildkindpatsmatcher(

272

includematcher,

272

includematcher,

273

root,

273

root,

274

kindpats,

274

kindpats,

275

ctx=ctx,

275

ctx=ctx,

276

listsubrepos=listsubrepos,

276

listsubrepos=listsubrepos,

277

badfn=None,

277

badfn=None,

278

)

278

)

279

m = intersectmatchers(m, im)

279

m = intersectmatchers(m, im)

280

if exclude:

280

if exclude:

281

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

281

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

282

em = _buildkindpatsmatcher(

282

em = _buildkindpatsmatcher(

283

includematcher,

283

includematcher,

284

root,

284

root,

285

kindpats,

285

kindpats,

286

ctx=ctx,

286

ctx=ctx,

287

listsubrepos=listsubrepos,

287

listsubrepos=listsubrepos,

288

badfn=None,

288

badfn=None,

289

)

289

)

290

m = differencematcher(m, em)

290

m = differencematcher(m, em)

291

return m

291

return m

292

293

294

def exact(files, badfn=None):

294

def exact(files, badfn=None):

295

return exactmatcher(files, badfn=badfn)

295

return exactmatcher(files, badfn=badfn)

296

297

298

def always(badfn=None):

298

def always(badfn=None):

299

return alwaysmatcher(badfn)

299

return alwaysmatcher(badfn)

300

301

302

def never(badfn=None):

302

def never(badfn=None):

303

return nevermatcher(badfn)

303

return nevermatcher(badfn)

304

305

306

def badmatch(match, badfn):

306

def badmatch(match, badfn):

307

"""Make a copy of the given matcher, replacing its bad method with the given

307

"""Make a copy of the given matcher, replacing its bad method with the given

308

one.

308

one.

309

"""

309

"""

310

m = copy.copy(match)

310

m = copy.copy(match)

311

m.bad = badfn

311

m.bad = badfn

312

return m

312

return m

313

314

315

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

315

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

316

'''Convert 'kind:pat' from the patterns list to tuples with kind and

316

'''Convert 'kind:pat' from the patterns list to tuples with kind and

317

normalized and rooted patterns and with listfiles expanded.'''

317

normalized and rooted patterns and with listfiles expanded.'''

318

kindpats = []

318

kindpats = []

319

for kind, pat in [_patsplit(p, default) for p in patterns]:

319

for kind, pat in [_patsplit(p, default) for p in patterns]:

320

if kind in cwdrelativepatternkinds:

320

if kind in cwdrelativepatternkinds:

321

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

321

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

322

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

322

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

323

pat = util.normpath(pat)

323

pat = util.normpath(pat)

324

elif kind in (b'listfile', b'listfile0'):

324

elif kind in (b'listfile', b'listfile0'):

325

try:

325

try:

326

files = util.readfile(pat)

326

files = util.readfile(pat)

327

if kind == b'listfile0':

327

if kind == b'listfile0':

328

files = files.split(b'\0')

328

files = files.split(b'\0')

329

else:

329

else:

330

files = files.splitlines()

330

files = files.splitlines()

331

files = [f for f in files if f]

331

files = [f for f in files if f]

332

except EnvironmentError:

332

except EnvironmentError:

333

raise error.Abort(_(b"unable to read file list (%s)") % pat)

333

raise error.Abort(_(b"unable to read file list (%s)") % pat)

334

for k, p, source in _donormalize(

334

for k, p, source in _donormalize(

335

files, default, root, cwd, auditor, warn

335

files, default, root, cwd, auditor, warn

336

):

336

):

337

kindpats.append((k, p, pat))

337

kindpats.append((k, p, pat))

338

continue

338

continue

339

elif kind == b'include':

339

elif kind == b'include':

340

try:

340

try:

341

fullpath = os.path.join(root, util.localpath(pat))

341

fullpath = os.path.join(root, util.localpath(pat))

342

includepats = readpatternfile(fullpath, warn)

342

includepats = readpatternfile(fullpath, warn)

343

for k, p, source in _donormalize(

343

for k, p, source in _donormalize(

344

includepats, default, root, cwd, auditor, warn

344

includepats, default, root, cwd, auditor, warn

345

):

345

):

346

kindpats.append((k, p, source or pat))

346

kindpats.append((k, p, source or pat))

347

except error.Abort as inst:

347

except error.Abort as inst:

348

raise error.Abort(

348

raise error.Abort(

349

b'%s: %s'

349

b'%s: %s'

350

% (pat, inst[0]) # pytype: disable=unsupported-operands

350

% (pat, inst[0]) # pytype: disable=unsupported-operands

351

)

351

)

352

except IOError as inst:

352

except IOError as inst:

353

if warn:

353

if warn:

354

warn(

354

warn(

355

_(b"skipping unreadable pattern file '%s': %s\n")

355

_(b"skipping unreadable pattern file '%s': %s\n")

356

% (pat, stringutil.forcebytestr(inst.strerror))

356

% (pat, stringutil.forcebytestr(inst.strerror))

357

)

357

)

358

continue

358

continue

359

# else: re or relre - which cannot be normalized

359

# else: re or relre - which cannot be normalized

360

kindpats.append((kind, pat, b''))

360

kindpats.append((kind, pat, b''))

361

return kindpats

361

return kindpats

362

363

364

class basematcher(object):

364

class basematcher(object):

365

def __init__(self, badfn=None):

365

def __init__(self, badfn=None):

366

if badfn is not None:

366

if badfn is not None:

367

self.bad = badfn

367

self.bad = badfn

368

369

def __call__(self, fn):

369

def __call__(self, fn):

370

return self.matchfn(fn)

370

return self.matchfn(fn)

371

372

# Callbacks related to how the matcher is used by dirstate.walk.

372

# Callbacks related to how the matcher is used by dirstate.walk.

373

# Subscribers to these events must monkeypatch the matcher object.

373

# Subscribers to these events must monkeypatch the matcher object.

374

def bad(self, f, msg):

374

def bad(self, f, msg):

375

'''Callback from dirstate.walk for each explicit file that can't be

375

'''Callback from dirstate.walk for each explicit file that can't be

376

found/accessed, with an error message.'''

376

found/accessed, with an error message.'''

377

378

# If an explicitdir is set, it will be called when an explicitly listed

379

# directory is visited.

380

explicitdir = None

381

382

# If an traversedir is set, it will be called when a directory discovered

378

# If an traversedir is set, it will be called when a directory discovered

383

# by recursive traversal is visited.

379

# by recursive traversal is visited.

384

traversedir = None

380

traversedir = None

385

381

386

@propertycache

382

@propertycache

387

def _files(self):

383

def _files(self):

388

return []

384

return []

389

385

390

def files(self):

386

def files(self):

391

'''Explicitly listed files or patterns or roots:

387

'''Explicitly listed files or patterns or roots:

392

if no patterns or .always(): empty list,

388

if no patterns or .always(): empty list,

393

if exact: list exact files,

389

if exact: list exact files,

394

if not .anypats(): list all files and dirs,

390

if not .anypats(): list all files and dirs,

395

else: optimal roots'''

391

else: optimal roots'''

396

return self._files

392

return self._files

397

393

398

@propertycache

394

@propertycache

399

def _fileset(self):

395

def _fileset(self):

400

return set(self._files)

396

return set(self._files)

401

397

402

def exact(self, f):

398

def exact(self, f):

403

'''Returns True if f is in .files().'''

399

'''Returns True if f is in .files().'''

404

return f in self._fileset

400

return f in self._fileset

405

401

406

def matchfn(self, f):

402

def matchfn(self, f):

407

return False

403

return False

408

404

409

def visitdir(self, dir):

405

def visitdir(self, dir):

410

'''Decides whether a directory should be visited based on whether it

406

'''Decides whether a directory should be visited based on whether it

411

has potential matches in it or one of its subdirectories. This is

407

has potential matches in it or one of its subdirectories. This is

412

based on the match's primary, included, and excluded patterns.

408

based on the match's primary, included, and excluded patterns.

413

409

414

Returns the string 'all' if the given directory and all subdirectories

410

Returns the string 'all' if the given directory and all subdirectories

415

should be visited. Otherwise returns True or False indicating whether

411

should be visited. Otherwise returns True or False indicating whether

416

the given directory should be visited.

412

the given directory should be visited.

417

'''

413

'''

418

return True

414

return True

419

415

420

def visitchildrenset(self, dir):

416

def visitchildrenset(self, dir):

421

'''Decides whether a directory should be visited based on whether it

417

'''Decides whether a directory should be visited based on whether it

422

has potential matches in it or one of its subdirectories, and

418

has potential matches in it or one of its subdirectories, and

423

potentially lists which subdirectories of that directory should be

419

potentially lists which subdirectories of that directory should be

424

visited. This is based on the match's primary, included, and excluded

420

visited. This is based on the match's primary, included, and excluded

425

patterns.

421

patterns.

426

422

427

This function is very similar to 'visitdir', and the following mapping

423

This function is very similar to 'visitdir', and the following mapping

428

can be applied:

424

can be applied:

429

425

430

visitdir | visitchildrenlist

426

visitdir | visitchildrenlist

431

----------+-------------------

427

----------+-------------------

432

False | set()

428

False | set()

433

'all' | 'all'

429

'all' | 'all'

434

True | 'this' OR non-empty set of subdirs -or files- to visit

430

True | 'this' OR non-empty set of subdirs -or files- to visit

435

431

436

Example:

432

Example:

437

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

433

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

438

the following values (assuming the implementation of visitchildrenset

434

the following values (assuming the implementation of visitchildrenset

439

is capable of recognizing this; some implementations are not).

435

is capable of recognizing this; some implementations are not).

440

436

441

'' -> {'foo', 'qux'}

437

'' -> {'foo', 'qux'}

442

'baz' -> set()

438

'baz' -> set()

443

'foo' -> {'bar'}

439

'foo' -> {'bar'}

444

# Ideally this would be 'all', but since the prefix nature of matchers

440

# Ideally this would be 'all', but since the prefix nature of matchers

445

# is applied to the entire matcher, we have to downgrade this to

441

# is applied to the entire matcher, we have to downgrade this to

446

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

442

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

447

# in.

443

# in.

448

'foo/bar' -> 'this'

444

'foo/bar' -> 'this'

449

'qux' -> 'this'

445

'qux' -> 'this'

450

446

451

Important:

447

Important:

452

Most matchers do not know if they're representing files or

448

Most matchers do not know if they're representing files or

453

directories. They see ['path:dir/f'] and don't know whether 'f' is a

449

directories. They see ['path:dir/f'] and don't know whether 'f' is a

454

file or a directory, so visitchildrenset('dir') for most matchers will

450

file or a directory, so visitchildrenset('dir') for most matchers will

455

return {'f'}, but if the matcher knows it's a file (like exactmatcher

451

return {'f'}, but if the matcher knows it's a file (like exactmatcher

456

does), it may return 'this'. Do not rely on the return being a set

452

does), it may return 'this'. Do not rely on the return being a set

457

indicating that there are no files in this dir to investigate (or

453

indicating that there are no files in this dir to investigate (or

458

equivalently that if there are files to investigate in 'dir' that it

454

equivalently that if there are files to investigate in 'dir' that it

459

will always return 'this').

455

will always return 'this').

460

'''

456

'''

461

return b'this'

457

return b'this'

462

458

463

def always(self):

459

def always(self):

464

'''Matcher will match everything and .files() will be empty --

460

'''Matcher will match everything and .files() will be empty --

465

optimization might be possible.'''

461

optimization might be possible.'''

466

return False

462

return False

467

463

468

def isexact(self):

464

def isexact(self):

469

'''Matcher will match exactly the list of files in .files() --

465

'''Matcher will match exactly the list of files in .files() --

470

optimization might be possible.'''

466

optimization might be possible.'''

471

return False

467

return False

472

468

473

def prefix(self):

469

def prefix(self):

474

'''Matcher will match the paths in .files() recursively --

470

'''Matcher will match the paths in .files() recursively --

475

optimization might be possible.'''

471

optimization might be possible.'''

476

return False

472

return False

477

473

478

def anypats(self):

474

def anypats(self):

479

'''None of .always(), .isexact(), and .prefix() is true --

475

'''None of .always(), .isexact(), and .prefix() is true --

480

optimizations will be difficult.'''

476

optimizations will be difficult.'''

481

return not self.always() and not self.isexact() and not self.prefix()

477

return not self.always() and not self.isexact() and not self.prefix()

482

478

483

479

484

class alwaysmatcher(basematcher):

480

class alwaysmatcher(basematcher):

485

'''Matches everything.'''

481

'''Matches everything.'''

486

482

487

def __init__(self, badfn=None):

483

def __init__(self, badfn=None):

488

super(alwaysmatcher, self).__init__(badfn)

484

super(alwaysmatcher, self).__init__(badfn)

489

485

490

def always(self):

486

def always(self):

491

return True

487

return True

492

488

493

def matchfn(self, f):

489

def matchfn(self, f):

494

return True

490

return True

495

491

496

def visitdir(self, dir):

492

def visitdir(self, dir):

497

return b'all'

493

return b'all'

498

494

499

def visitchildrenset(self, dir):

495

def visitchildrenset(self, dir):

500

return b'all'

496

return b'all'

501

497

502

def __repr__(self):

498

def __repr__(self):

503

return r'<alwaysmatcher>'

499

return r'<alwaysmatcher>'

504

500

505

501

506

class nevermatcher(basematcher):

502

class nevermatcher(basematcher):

507

'''Matches nothing.'''

503

'''Matches nothing.'''

508

504

509

def __init__(self, badfn=None):

505

def __init__(self, badfn=None):

510

super(nevermatcher, self).__init__(badfn)

506

super(nevermatcher, self).__init__(badfn)

511

507

512

# It's a little weird to say that the nevermatcher is an exact matcher

508

# It's a little weird to say that the nevermatcher is an exact matcher

513

# or a prefix matcher, but it seems to make sense to let callers take

509

# or a prefix matcher, but it seems to make sense to let callers take

514

# fast paths based on either. There will be no exact matches, nor any

510

# fast paths based on either. There will be no exact matches, nor any

515

# prefixes (files() returns []), so fast paths iterating over them should

511

# prefixes (files() returns []), so fast paths iterating over them should

516

# be efficient (and correct).

512

# be efficient (and correct).

517

def isexact(self):

513

def isexact(self):

518

return True

514

return True

519

515

520

def prefix(self):

516

def prefix(self):

521

return True

517

return True

522

518

523

def visitdir(self, dir):

519

def visitdir(self, dir):

524

return False

520

return False

525

521

526

def visitchildrenset(self, dir):

522

def visitchildrenset(self, dir):

527

return set()

523

return set()

528

524

529

def __repr__(self):

525

def __repr__(self):

530

return r'<nevermatcher>'

526

return r'<nevermatcher>'

531

527

532

528

533

class predicatematcher(basematcher):

529

class predicatematcher(basematcher):

534

"""A matcher adapter for a simple boolean function"""

530

"""A matcher adapter for a simple boolean function"""

535

531

536

def __init__(self, predfn, predrepr=None, badfn=None):

532

def __init__(self, predfn, predrepr=None, badfn=None):

537

super(predicatematcher, self).__init__(badfn)

533

super(predicatematcher, self).__init__(badfn)

538

self.matchfn = predfn

534

self.matchfn = predfn

539

self._predrepr = predrepr

535

self._predrepr = predrepr

540

536

541

@encoding.strmethod

537

@encoding.strmethod

542

def __repr__(self):

538

def __repr__(self):

543

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

539

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

544

self.matchfn

540

self.matchfn

545

)

541

)

546

return b'<predicatenmatcher pred=%s>' % s

542

return b'<predicatenmatcher pred=%s>' % s

547

543

548

544

549

class patternmatcher(basematcher):

545

class patternmatcher(basematcher):

550

"""Matches a set of (kind, pat, source) against a 'root' directory.

546

"""Matches a set of (kind, pat, source) against a 'root' directory.

551

547

552

>>> kindpats = [

548

>>> kindpats = [

553

... (b're', br'.*\.c$', b''),

549

... (b're', br'.*\.c$', b''),

554

... (b'path', b'foo/a', b''),

550

... (b'path', b'foo/a', b''),

555

... (b'relpath', b'b', b''),

551

... (b'relpath', b'b', b''),

556

... (b'glob', b'*.h', b''),

552

... (b'glob', b'*.h', b''),

557

... ]

553

... ]

558

>>> m = patternmatcher(b'foo', kindpats)

554

>>> m = patternmatcher(b'foo', kindpats)

559

>>> m(b'main.c') # matches re:.*\.c$

555

>>> m(b'main.c') # matches re:.*\.c$

560

True

556

True

561

>>> m(b'b.txt')

557

>>> m(b'b.txt')

562

False

558

False

563

>>> m(b'foo/a') # matches path:foo/a

559

>>> m(b'foo/a') # matches path:foo/a

564

True

560

True

565

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

561

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

566

False

562

False

567

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

563

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

568

True

564

True

569

>>> m(b'lib.h') # matches glob:*.h

565

>>> m(b'lib.h') # matches glob:*.h

570

True

566

True

571

567

572

>>> m.files()

568

>>> m.files()

573

['', 'foo/a', 'b', '']

569

['', 'foo/a', 'b', '']

574

>>> m.exact(b'foo/a')

570

>>> m.exact(b'foo/a')

575

True

571

True

576

>>> m.exact(b'b')

572

>>> m.exact(b'b')

577

True

573

True

578

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

574

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

579

False

575

False

580

"""

576

"""

581

577

582

def __init__(self, root, kindpats, badfn=None):

578

def __init__(self, root, kindpats, badfn=None):

583

super(patternmatcher, self).__init__(badfn)

579

super(patternmatcher, self).__init__(badfn)

584

580

585

self._files = _explicitfiles(kindpats)

581

self._files = _explicitfiles(kindpats)

586

self._prefix = _prefix(kindpats)

582

self._prefix = _prefix(kindpats)

587

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

583

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

588

584

589

@propertycache

585

@propertycache

590

def _dirs(self):

586

def _dirs(self):

591

return set(pathutil.dirs(self._fileset))

587

return set(pathutil.dirs(self._fileset))

592

588

593

def visitdir(self, dir):

589

def visitdir(self, dir):

594

if self._prefix and dir in self._fileset:

590

if self._prefix and dir in self._fileset:

595

return b'all'

591

return b'all'

596

return (

592

return (

597

dir in self._fileset

593

dir in self._fileset

598

or dir in self._dirs

594

or dir in self._dirs

599

or any(

595

or any(

600

parentdir in self._fileset

596

parentdir in self._fileset

601

for parentdir in pathutil.finddirs(dir)

597

for parentdir in pathutil.finddirs(dir)

602

)

598

)

603

)

599

)

604

600

605

def visitchildrenset(self, dir):

601

def visitchildrenset(self, dir):

606

ret = self.visitdir(dir)

602

ret = self.visitdir(dir)

607

if ret is True:

603

if ret is True:

608

return b'this'

604

return b'this'

609

elif not ret:

605

elif not ret:

610

return set()

606

return set()

611

assert ret == b'all'

607

assert ret == b'all'

612

return b'all'

608

return b'all'

613

609

614

def prefix(self):

610

def prefix(self):

615

return self._prefix

611

return self._prefix

616

612

617

@encoding.strmethod

613

@encoding.strmethod

618

def __repr__(self):

614

def __repr__(self):

619

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

615

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

620

616

621

617

622

# This is basically a reimplementation of pathutil.dirs that stores the

618

# This is basically a reimplementation of pathutil.dirs that stores the

623

# children instead of just a count of them, plus a small optional optimization

619

# children instead of just a count of them, plus a small optional optimization

624

# to avoid some directories we don't need.

620

# to avoid some directories we don't need.

625

class _dirchildren(object):

621

class _dirchildren(object):

626

def __init__(self, paths, onlyinclude=None):

622

def __init__(self, paths, onlyinclude=None):

627

self._dirs = {}

623

self._dirs = {}

628

self._onlyinclude = onlyinclude or []

624

self._onlyinclude = onlyinclude or []

629

addpath = self.addpath

625

addpath = self.addpath

630

for f in paths:

626

for f in paths:

631

addpath(f)

627

addpath(f)

632

628

633

def addpath(self, path):

629

def addpath(self, path):

634

if path == b'':

630

if path == b'':

635

return

631

return

636

dirs = self._dirs

632

dirs = self._dirs

637

findsplitdirs = _dirchildren._findsplitdirs

633

findsplitdirs = _dirchildren._findsplitdirs

638

for d, b in findsplitdirs(path):

634

for d, b in findsplitdirs(path):

639

if d not in self._onlyinclude:

635

if d not in self._onlyinclude:

640

continue

636

continue

641

dirs.setdefault(d, set()).add(b)

637

dirs.setdefault(d, set()).add(b)

642

638

643

@staticmethod

639

@staticmethod

644

def _findsplitdirs(path):

640

def _findsplitdirs(path):

645

# yields (dirname, basename) tuples, walking back to the root. This is

641

# yields (dirname, basename) tuples, walking back to the root. This is

646

# very similar to pathutil.finddirs, except:

642

# very similar to pathutil.finddirs, except:

647

# - produces a (dirname, basename) tuple, not just 'dirname'

643

# - produces a (dirname, basename) tuple, not just 'dirname'

648

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

644

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

649

# slash.

645

# slash.

650

oldpos = len(path)

646

oldpos = len(path)

651

pos = path.rfind(b'/')

647

pos = path.rfind(b'/')

652

while pos != -1:

648

while pos != -1:

653

yield path[:pos], path[pos + 1 : oldpos]

649

yield path[:pos], path[pos + 1 : oldpos]

654

oldpos = pos

650

oldpos = pos

655

pos = path.rfind(b'/', 0, pos)

651

pos = path.rfind(b'/', 0, pos)

656

yield b'', path[:oldpos]

652

yield b'', path[:oldpos]

657

653

658

def get(self, path):

654

def get(self, path):

659

return self._dirs.get(path, set())

655

return self._dirs.get(path, set())

660

656

661

657

662

class includematcher(basematcher):

658

class includematcher(basematcher):

663

def __init__(self, root, kindpats, badfn=None):

659

def __init__(self, root, kindpats, badfn=None):

664

super(includematcher, self).__init__(badfn)

660

super(includematcher, self).__init__(badfn)

665

661

666

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

662

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

667

self._prefix = _prefix(kindpats)

663

self._prefix = _prefix(kindpats)

668

roots, dirs, parents = _rootsdirsandparents(kindpats)

664

roots, dirs, parents = _rootsdirsandparents(kindpats)

669

# roots are directories which are recursively included.

665

# roots are directories which are recursively included.

670

self._roots = set(roots)

666

self._roots = set(roots)

671

# dirs are directories which are non-recursively included.

667

# dirs are directories which are non-recursively included.

672

self._dirs = set(dirs)

668

self._dirs = set(dirs)

673

# parents are directories which are non-recursively included because

669

# parents are directories which are non-recursively included because

674

# they are needed to get to items in _dirs or _roots.

670

# they are needed to get to items in _dirs or _roots.

675

self._parents = parents

671

self._parents = parents

676

672

677

def visitdir(self, dir):

673

def visitdir(self, dir):

678

if self._prefix and dir in self._roots:

674

if self._prefix and dir in self._roots:

679

return b'all'

675

return b'all'

680

return (

676

return (

681

dir in self._roots

677

dir in self._roots

682

or dir in self._dirs

678

or dir in self._dirs

683

or dir in self._parents

679

or dir in self._parents

684

or any(

680

or any(

685

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

681

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

686

)

682

)

687

)

683

)

688

684

689

@propertycache

685

@propertycache

690

def _allparentschildren(self):

686

def _allparentschildren(self):

691

# It may seem odd that we add dirs, roots, and parents, and then

687

# It may seem odd that we add dirs, roots, and parents, and then

692

# restrict to only parents. This is to catch the case of:

688

# restrict to only parents. This is to catch the case of:

693

# dirs = ['foo/bar']

689

# dirs = ['foo/bar']

694

# parents = ['foo']

690

# parents = ['foo']

695

# if we asked for the children of 'foo', but had only added

691

# if we asked for the children of 'foo', but had only added

696

# self._parents, we wouldn't be able to respond ['bar'].

692

# self._parents, we wouldn't be able to respond ['bar'].

697

return _dirchildren(

693

return _dirchildren(

698

itertools.chain(self._dirs, self._roots, self._parents),

694

itertools.chain(self._dirs, self._roots, self._parents),

699

onlyinclude=self._parents,

695

onlyinclude=self._parents,

700

)

696

)

701

697

702

def visitchildrenset(self, dir):

698

def visitchildrenset(self, dir):

703

if self._prefix and dir in self._roots:

699

if self._prefix and dir in self._roots:

704

return b'all'

700

return b'all'

705

# Note: this does *not* include the 'dir in self._parents' case from

701

# Note: this does *not* include the 'dir in self._parents' case from

706

# visitdir, that's handled below.

702

# visitdir, that's handled below.

707

if (

703

if (

708

b'' in self._roots

704

b'' in self._roots

709

or dir in self._roots

705

or dir in self._roots

710

or dir in self._dirs

706

or dir in self._dirs

711

or any(

707

or any(

712

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

708

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

713

)

709

)

714

):

710

):

715

return b'this'

711

return b'this'

716

712

717

if dir in self._parents:

713

if dir in self._parents:

718

return self._allparentschildren.get(dir) or set()

714

return self._allparentschildren.get(dir) or set()

719

return set()

715

return set()

720

716

721

@encoding.strmethod

717

@encoding.strmethod

722

def __repr__(self):

718

def __repr__(self):

723

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

719

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

724

720

725

721

726

class exactmatcher(basematcher):

722

class exactmatcher(basematcher):

727

r'''Matches the input files exactly. They are interpreted as paths, not

723

r'''Matches the input files exactly. They are interpreted as paths, not

728

patterns (so no kind-prefixes).

724

patterns (so no kind-prefixes).

729

725

730

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

726

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

731

>>> m(b'a.txt')

727

>>> m(b'a.txt')

732

True

728

True

733

>>> m(b'b.txt')

729

>>> m(b'b.txt')

734

False

730

False

735

731

736

Input files that would be matched are exactly those returned by .files()

732

Input files that would be matched are exactly those returned by .files()

737

>>> m.files()

733

>>> m.files()

738

['a.txt', 're:.*\\.c$']

734

['a.txt', 're:.*\\.c$']

739

735

740

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

736

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

741

>>> m(b'main.c')

737

>>> m(b'main.c')

742

False

738

False

743

>>> m(br're:.*\.c$')

739

>>> m(br're:.*\.c$')

744

True

740

True

745

'''

741

'''

746

742

747

def __init__(self, files, badfn=None):

743

def __init__(self, files, badfn=None):

748

super(exactmatcher, self).__init__(badfn)

744

super(exactmatcher, self).__init__(badfn)

749

745

750

if isinstance(files, list):

746

if isinstance(files, list):

751

self._files = files

747

self._files = files

752

else:

748

else:

753

self._files = list(files)

749

self._files = list(files)

754

750

755

matchfn = basematcher.exact

751

matchfn = basematcher.exact

756

752

757

@propertycache

753

@propertycache

758

def _dirs(self):

754

def _dirs(self):

759

return set(pathutil.dirs(self._fileset))

755

return set(pathutil.dirs(self._fileset))

760

756

761

def visitdir(self, dir):

757

def visitdir(self, dir):

762

return dir in self._dirs

758

return dir in self._dirs

763

759

764

def visitchildrenset(self, dir):

760

def visitchildrenset(self, dir):

765

if not self._fileset or dir not in self._dirs:

761

if not self._fileset or dir not in self._dirs:

766

return set()

762

return set()

767

763

768

candidates = self._fileset | self._dirs - {b''}

764

candidates = self._fileset | self._dirs - {b''}

769

if dir != b'':

765

if dir != b'':

770

d = dir + b'/'

766

d = dir + b'/'

771

candidates = set(c[len(d) :] for c in candidates if c.startswith(d))

767

candidates = set(c[len(d) :] for c in candidates if c.startswith(d))

772

# self._dirs includes all of the directories, recursively, so if

768

# self._dirs includes all of the directories, recursively, so if

773

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

769

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

774

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

770

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

775

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

771

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

776

# immediate subdir will be in there without a slash.

772

# immediate subdir will be in there without a slash.

777

ret = {c for c in candidates if b'/' not in c}

773

ret = {c for c in candidates if b'/' not in c}

778

# We really do not expect ret to be empty, since that would imply that

774

# We really do not expect ret to be empty, since that would imply that

779

# there's something in _dirs that didn't have a file in _fileset.

775

# there's something in _dirs that didn't have a file in _fileset.

780

assert ret

776

assert ret

781

return ret

777

return ret

782

778

783

def isexact(self):

779

def isexact(self):

784

return True

780

return True

785

781

786

@encoding.strmethod

782

@encoding.strmethod

787

def __repr__(self):

783

def __repr__(self):

788

return b'<exactmatcher files=%r>' % self._files

784

return b'<exactmatcher files=%r>' % self._files

789

785

790

786

791

class differencematcher(basematcher):

787

class differencematcher(basematcher):

792

'''Composes two matchers by matching if the first matches and the second

788

'''Composes two matchers by matching if the first matches and the second

793

does not.

789

does not.

794

790

795

The second matcher's non-matching-attributes (bad, ~~explicitdir,~~

791

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

796

traversedir) are ignored.

797

'''

792

'''

798

793

799

def __init__(self, m1, m2):

794

def __init__(self, m1, m2):

800

super(differencematcher, self).__init__()

795

super(differencematcher, self).__init__()

801

self._m1 = m1

796

self._m1 = m1

802

self._m2 = m2

797

self._m2 = m2

803

self.bad = m1.bad

798

self.bad = m1.bad

804

self.explicitdir = m1.explicitdir

805

self.traversedir = m1.traversedir

799

self.traversedir = m1.traversedir

806

800

807

def matchfn(self, f):

801

def matchfn(self, f):

808

return self._m1(f) and not self._m2(f)

802

return self._m1(f) and not self._m2(f)

809

803

810

@propertycache

804

@propertycache

811

def _files(self):

805

def _files(self):

812

if self.isexact():

806

if self.isexact():

813

return [f for f in self._m1.files() if self(f)]

807

return [f for f in self._m1.files() if self(f)]

814

# If m1 is not an exact matcher, we can't easily figure out the set of

808

# If m1 is not an exact matcher, we can't easily figure out the set of

815

# files, because its files() are not always files. For example, if

809

# files, because its files() are not always files. For example, if

816

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

810

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

817

# want to remove "dir" from the set even though it would match m2,

811

# want to remove "dir" from the set even though it would match m2,

818

# because the "dir" in m1 may not be a file.

812

# because the "dir" in m1 may not be a file.

819

return self._m1.files()

813

return self._m1.files()

820

814

821

def visitdir(self, dir):

815

def visitdir(self, dir):

822

if self._m2.visitdir(dir) == b'all':

816

if self._m2.visitdir(dir) == b'all':

823

return False

817

return False

824

elif not self._m2.visitdir(dir):

818

elif not self._m2.visitdir(dir):

825

# m2 does not match dir, we can return 'all' here if possible

819

# m2 does not match dir, we can return 'all' here if possible

826

return self._m1.visitdir(dir)

820

return self._m1.visitdir(dir)

827

return bool(self._m1.visitdir(dir))

821

return bool(self._m1.visitdir(dir))

828

822

829

def visitchildrenset(self, dir):

823

def visitchildrenset(self, dir):

830

m2_set = self._m2.visitchildrenset(dir)

824

m2_set = self._m2.visitchildrenset(dir)

831

if m2_set == b'all':

825

if m2_set == b'all':

832

return set()

826

return set()

833

m1_set = self._m1.visitchildrenset(dir)

827

m1_set = self._m1.visitchildrenset(dir)

834

# Possible values for m1: 'all', 'this', set(...), set()

828

# Possible values for m1: 'all', 'this', set(...), set()

835

# Possible values for m2: 'this', set(...), set()

829

# Possible values for m2: 'this', set(...), set()

836

# If m2 has nothing under here that we care about, return m1, even if

830

# If m2 has nothing under here that we care about, return m1, even if

837

# it's 'all'. This is a change in behavior from visitdir, which would

831

# it's 'all'. This is a change in behavior from visitdir, which would

838

# return True, not 'all', for some reason.

832

# return True, not 'all', for some reason.

839

if not m2_set:

833

if not m2_set:

840

return m1_set

834

return m1_set

841

if m1_set in [b'all', b'this']:

835

if m1_set in [b'all', b'this']:

842

# Never return 'all' here if m2_set is any kind of non-empty (either

836

# Never return 'all' here if m2_set is any kind of non-empty (either

843

# 'this' or set(foo)), since m2 might return set() for a

837

# 'this' or set(foo)), since m2 might return set() for a

844

# subdirectory.

838

# subdirectory.

845

return b'this'

839

return b'this'

846

# Possible values for m1: set(...), set()

840

# Possible values for m1: set(...), set()

847

# Possible values for m2: 'this', set(...)

841

# Possible values for m2: 'this', set(...)

848

# We ignore m2's set results. They're possibly incorrect:

842

# We ignore m2's set results. They're possibly incorrect:

849

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

843

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

850

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

844

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

851

# return set(), which is *not* correct, we still need to visit 'dir'!

845

# return set(), which is *not* correct, we still need to visit 'dir'!

852

return m1_set

846

return m1_set

853

847

854

def isexact(self):

848

def isexact(self):

855

return self._m1.isexact()

849

return self._m1.isexact()

856

850

857

@encoding.strmethod

851

@encoding.strmethod

858

def __repr__(self):

852

def __repr__(self):

859

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

853

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

860

854

861

855

862

def intersectmatchers(m1, m2):

856

def intersectmatchers(m1, m2):

863

'''Composes two matchers by matching if both of them match.

857

'''Composes two matchers by matching if both of them match.

864

858

865

The second matcher's non-matching-attributes (bad, ~~explicitdir,~~

859

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

866

traversedir) are ignored.

867

'''

860

'''

868

if m1 is None or m2 is None:

861

if m1 is None or m2 is None:

869

return m1 or m2

862

return m1 or m2

870

if m1.always():

863

if m1.always():

871

m = copy.copy(m2)

864

m = copy.copy(m2)

872

# TODO: Consider encapsulating these things in a class so there's only

865

# TODO: Consider encapsulating these things in a class so there's only

873

# one thing to copy from m1.

866

# one thing to copy from m1.

874

m.bad = m1.bad

867

m.bad = m1.bad

875

m.explicitdir = m1.explicitdir

876

m.traversedir = m1.traversedir

868

m.traversedir = m1.traversedir

877

return m

869

return m

878

if m2.always():

870

if m2.always():

879

m = copy.copy(m1)

871

m = copy.copy(m1)

880

return m

872

return m

881

return intersectionmatcher(m1, m2)

873

return intersectionmatcher(m1, m2)

882

874

883

875

884

class intersectionmatcher(basematcher):

876

class intersectionmatcher(basematcher):

885

def __init__(self, m1, m2):

877

def __init__(self, m1, m2):

886

super(intersectionmatcher, self).__init__()

878

super(intersectionmatcher, self).__init__()

887

self._m1 = m1

879

self._m1 = m1

888

self._m2 = m2

880

self._m2 = m2

889

self.bad = m1.bad

881

self.bad = m1.bad

890

self.explicitdir = m1.explicitdir

891

self.traversedir = m1.traversedir

882

self.traversedir = m1.traversedir

892

883

893

@propertycache

884

@propertycache

894

def _files(self):

885

def _files(self):

895

if self.isexact():

886

if self.isexact():

896

m1, m2 = self._m1, self._m2

887

m1, m2 = self._m1, self._m2

897

if not m1.isexact():

888

if not m1.isexact():

898

m1, m2 = m2, m1

889

m1, m2 = m2, m1

899

return [f for f in m1.files() if m2(f)]

890

return [f for f in m1.files() if m2(f)]

900

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

891

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

901

# the set of files, because their files() are not always files. For

892

# the set of files, because their files() are not always files. For

902

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

893

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

903

# "path:dir2", we don't want to remove "dir2" from the set.

894

# "path:dir2", we don't want to remove "dir2" from the set.

904

return self._m1.files() + self._m2.files()

895

return self._m1.files() + self._m2.files()

905

896

906

def matchfn(self, f):

897

def matchfn(self, f):

907

return self._m1(f) and self._m2(f)

898

return self._m1(f) and self._m2(f)

908

899

909

def visitdir(self, dir):

900

def visitdir(self, dir):

910

visit1 = self._m1.visitdir(dir)

901

visit1 = self._m1.visitdir(dir)

911

if visit1 == b'all':

902

if visit1 == b'all':

912

return self._m2.visitdir(dir)

903

return self._m2.visitdir(dir)

913

# bool() because visit1=True + visit2='all' should not be 'all'

904

# bool() because visit1=True + visit2='all' should not be 'all'

914

return bool(visit1 and self._m2.visitdir(dir))

905

return bool(visit1 and self._m2.visitdir(dir))

915

906

916

def visitchildrenset(self, dir):

907

def visitchildrenset(self, dir):

917

m1_set = self._m1.visitchildrenset(dir)

908

m1_set = self._m1.visitchildrenset(dir)

918

if not m1_set:

909

if not m1_set:

919

return set()

910

return set()

920

m2_set = self._m2.visitchildrenset(dir)

911

m2_set = self._m2.visitchildrenset(dir)

921

if not m2_set:

912

if not m2_set:

922

return set()

913

return set()

923

914

924

if m1_set == b'all':

915

if m1_set == b'all':

925

return m2_set

916

return m2_set

926

elif m2_set == b'all':

917

elif m2_set == b'all':

927

return m1_set

918

return m1_set

928

919

929

if m1_set == b'this' or m2_set == b'this':

920

if m1_set == b'this' or m2_set == b'this':

930

return b'this'

921

return b'this'

931

922

932

assert isinstance(m1_set, set) and isinstance(m2_set, set)

923

assert isinstance(m1_set, set) and isinstance(m2_set, set)

933

return m1_set.intersection(m2_set)

924

return m1_set.intersection(m2_set)

934

925

935

def always(self):

926

def always(self):

936

return self._m1.always() and self._m2.always()

927

return self._m1.always() and self._m2.always()

937

928

938

def isexact(self):

929

def isexact(self):

939

return self._m1.isexact() or self._m2.isexact()

930

return self._m1.isexact() or self._m2.isexact()

940

931

941

@encoding.strmethod

932

@encoding.strmethod

942

def __repr__(self):

933

def __repr__(self):

943

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

934

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

944

935

945

936

946

class subdirmatcher(basematcher):

937

class subdirmatcher(basematcher):

947

"""Adapt a matcher to work on a subdirectory only.

938

"""Adapt a matcher to work on a subdirectory only.

948

939

949

The paths are remapped to remove/insert the path as needed:

940

The paths are remapped to remove/insert the path as needed:

950

941

951

>>> from . import pycompat

942

>>> from . import pycompat

952

>>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])

943

>>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])

953

>>> m2 = subdirmatcher(b'sub', m1)

944

>>> m2 = subdirmatcher(b'sub', m1)

954

>>> m2(b'a.txt')

945

>>> m2(b'a.txt')

955

False

946

False

956

>>> m2(b'b.txt')

947

>>> m2(b'b.txt')

957

True

948

True

958

>>> m2.matchfn(b'a.txt')

949

>>> m2.matchfn(b'a.txt')

959

False

950

False

960

>>> m2.matchfn(b'b.txt')

951

>>> m2.matchfn(b'b.txt')

961

True

952

True

962

>>> m2.files()

953

>>> m2.files()

963

['b.txt']

954

['b.txt']

964

>>> m2.exact(b'b.txt')

955

>>> m2.exact(b'b.txt')

965

True

956

True

966

>>> def bad(f, msg):

957

>>> def bad(f, msg):

967

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

958

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

968

>>> m1.bad = bad

959

>>> m1.bad = bad

969

>>> m2.bad(b'x.txt', b'No such file')

960

>>> m2.bad(b'x.txt', b'No such file')

970

sub/x.txt: No such file

961

sub/x.txt: No such file

971

"""

962

"""

972

963

973

def __init__(self, path, matcher):

964

def __init__(self, path, matcher):

974

super(subdirmatcher, self).__init__()

965

super(subdirmatcher, self).__init__()

975

self._path = path

966

self._path = path

976

self._matcher = matcher

967

self._matcher = matcher

977

self._always = matcher.always()

968

self._always = matcher.always()

978

969

979

self._files = [

970

self._files = [

980

f[len(path) + 1 :]

971

f[len(path) + 1 :]

981

for f in matcher._files

972

for f in matcher._files

982

if f.startswith(path + b"/")

973

if f.startswith(path + b"/")

983

]

974

]

984

975

985

# If the parent repo had a path to this subrepo and the matcher is

976

# If the parent repo had a path to this subrepo and the matcher is

986

# a prefix matcher, this submatcher always matches.

977

# a prefix matcher, this submatcher always matches.

987

if matcher.prefix():

978

if matcher.prefix():

988

self._always = any(f == path for f in matcher._files)

979

self._always = any(f == path for f in matcher._files)

989

980

990

def bad(self, f, msg):

981

def bad(self, f, msg):

991

self._matcher.bad(self._path + b"/" + f, msg)

982

self._matcher.bad(self._path + b"/" + f, msg)

992

983

993

def matchfn(self, f):

984

def matchfn(self, f):

994

# Some information is lost in the superclass's constructor, so we

985

# Some information is lost in the superclass's constructor, so we

995

# can not accurately create the matching function for the subdirectory

986

# can not accurately create the matching function for the subdirectory

996

# from the inputs. Instead, we override matchfn() and visitdir() to

987

# from the inputs. Instead, we override matchfn() and visitdir() to

997

# call the original matcher with the subdirectory path prepended.

988

# call the original matcher with the subdirectory path prepended.

998

return self._matcher.matchfn(self._path + b"/" + f)

989

return self._matcher.matchfn(self._path + b"/" + f)

999

990

1000

def visitdir(self, dir):

991

def visitdir(self, dir):

1001

if dir == b'':

992

if dir == b'':

1002

dir = self._path

993

dir = self._path

1003

else:

994

else:

1004

dir = self._path + b"/" + dir

995

dir = self._path + b"/" + dir

1005

return self._matcher.visitdir(dir)

996

return self._matcher.visitdir(dir)

1006

997

1007

def visitchildrenset(self, dir):

998

def visitchildrenset(self, dir):

1008

if dir == b'':

999

if dir == b'':

1009

dir = self._path

1000

dir = self._path

1010

else:

1001

else:

1011

dir = self._path + b"/" + dir

1002

dir = self._path + b"/" + dir

1012

return self._matcher.visitchildrenset(dir)

1003

return self._matcher.visitchildrenset(dir)

1013

1004

1014

def always(self):

1005

def always(self):

1015

return self._always

1006

return self._always

1016

1007

1017

def prefix(self):

1008

def prefix(self):

1018

return self._matcher.prefix() and not self._always

1009

return self._matcher.prefix() and not self._always

1019

1010

1020

@encoding.strmethod

1011

@encoding.strmethod

1021

def __repr__(self):

1012

def __repr__(self):

1022

return b'<subdirmatcher path=%r, matcher=%r>' % (

1013

return b'<subdirmatcher path=%r, matcher=%r>' % (

1023

self._path,

1014

self._path,

1024

self._matcher,

1015

self._matcher,

1025

)

1016

)

1026

1017

1027

1018

1028

class prefixdirmatcher(basematcher):

1019

class prefixdirmatcher(basematcher):

1029

"""Adapt a matcher to work on a parent directory.

1020

"""Adapt a matcher to work on a parent directory.

1030

1021

1031

The matcher's non-matching-attributes (bad, ~~explicitdir,~~ traversedir) are

1022

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1032

ignored.

1033

1023

1034

The prefix path should usually be the relative path from the root of

1024

The prefix path should usually be the relative path from the root of

1035

this matcher to the root of the wrapped matcher.

1025

this matcher to the root of the wrapped matcher.

1036

1026

1037

>>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])

1027

>>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])

1038

>>> m2 = prefixdirmatcher(b'd/e', m1)

1028

>>> m2 = prefixdirmatcher(b'd/e', m1)

1039

>>> m2(b'a.txt')

1029

>>> m2(b'a.txt')

1040

False

1030

False

1041

>>> m2(b'd/e/a.txt')

1031

>>> m2(b'd/e/a.txt')

1042

True

1032

True

1043

>>> m2(b'd/e/b.txt')

1033

>>> m2(b'd/e/b.txt')

1044

False

1034

False

1045

>>> m2.files()

1035

>>> m2.files()

1046

['d/e/a.txt', 'd/e/f/b.txt']

1036

['d/e/a.txt', 'd/e/f/b.txt']

1047

>>> m2.exact(b'd/e/a.txt')

1037

>>> m2.exact(b'd/e/a.txt')

1048

True

1038

True

1049

>>> m2.visitdir(b'd')

1039

>>> m2.visitdir(b'd')

1050

True

1040

True

1051

>>> m2.visitdir(b'd/e')

1041

>>> m2.visitdir(b'd/e')

1052

True

1042

True

1053

>>> m2.visitdir(b'd/e/f')

1043

>>> m2.visitdir(b'd/e/f')

1054

True

1044

True

1055

>>> m2.visitdir(b'd/e/g')

1045

>>> m2.visitdir(b'd/e/g')

1056

False

1046

False

1057

>>> m2.visitdir(b'd/ef')

1047

>>> m2.visitdir(b'd/ef')

1058

False

1048

False

1059

"""

1049

"""

1060

1050

1061

def __init__(self, path, matcher, badfn=None):

1051

def __init__(self, path, matcher, badfn=None):

1062

super(prefixdirmatcher, self).__init__(badfn)

1052

super(prefixdirmatcher, self).__init__(badfn)

1063

if not path:

1053

if not path:

1064

raise error.ProgrammingError(b'prefix path must not be empty')

1054

raise error.ProgrammingError(b'prefix path must not be empty')

1065

self._path = path

1055

self._path = path

1066

self._pathprefix = path + b'/'

1056

self._pathprefix = path + b'/'

1067

self._matcher = matcher

1057

self._matcher = matcher

1068

1058

1069

@propertycache

1059

@propertycache

1070

def _files(self):

1060

def _files(self):

1071

return [self._pathprefix + f for f in self._matcher._files]

1061

return [self._pathprefix + f for f in self._matcher._files]

1072

1062

1073

def matchfn(self, f):

1063

def matchfn(self, f):

1074

if not f.startswith(self._pathprefix):

1064

if not f.startswith(self._pathprefix):

1075

return False

1065

return False

1076

return self._matcher.matchfn(f[len(self._pathprefix) :])

1066

return self._matcher.matchfn(f[len(self._pathprefix) :])

1077

1067

1078

@propertycache

1068

@propertycache

1079

def _pathdirs(self):

1069

def _pathdirs(self):

1080

return set(pathutil.finddirs(self._path))

1070

return set(pathutil.finddirs(self._path))

1081

1071

1082

def visitdir(self, dir):

1072

def visitdir(self, dir):

1083

if dir == self._path:

1073

if dir == self._path:

1084

return self._matcher.visitdir(b'')

1074

return self._matcher.visitdir(b'')

1085

if dir.startswith(self._pathprefix):

1075

if dir.startswith(self._pathprefix):

1086

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1076

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1087

return dir in self._pathdirs

1077

return dir in self._pathdirs

1088

1078

1089

def visitchildrenset(self, dir):

1079

def visitchildrenset(self, dir):

1090

if dir == self._path:

1080

if dir == self._path:

1091

return self._matcher.visitchildrenset(b'')

1081

return self._matcher.visitchildrenset(b'')

1092

if dir.startswith(self._pathprefix):

1082

if dir.startswith(self._pathprefix):

1093

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1083

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1094

if dir in self._pathdirs:

1084

if dir in self._pathdirs:

1095

return b'this'

1085

return b'this'

1096

return set()

1086

return set()

1097

1087

1098

def isexact(self):

1088

def isexact(self):

1099

return self._matcher.isexact()

1089

return self._matcher.isexact()

1100

1090

1101

def prefix(self):

1091

def prefix(self):

1102

return self._matcher.prefix()

1092

return self._matcher.prefix()

1103

1093

1104

@encoding.strmethod

1094

@encoding.strmethod

1105

def __repr__(self):

1095

def __repr__(self):

1106

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1096

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1107

pycompat.bytestr(self._path),

1097

pycompat.bytestr(self._path),

1108

self._matcher,

1098

self._matcher,

1109

)

1099

)

1110

1100

1111

1101

1112

class unionmatcher(basematcher):

1102

class unionmatcher(basematcher):

1113

"""A matcher that is the union of several matchers.

1103

"""A matcher that is the union of several matchers.

1114

1104

1115

The non-matching-attributes (bad, ~~explicitdir,~~ traversedir) are taken from

1105

The non-matching-attributes (bad, traversedir) are taken from the first

1116

~~the first~~ matcher.

1106

matcher.

1117

"""

1107

"""

1118

1108

1119

def __init__(self, matchers):

1109

def __init__(self, matchers):

1120

m1 = matchers[0]

1110

m1 = matchers[0]

1121

super(unionmatcher, self).__init__()

1111

super(unionmatcher, self).__init__()

1122

self.explicitdir = m1.explicitdir

1123

self.traversedir = m1.traversedir

1112

self.traversedir = m1.traversedir

1124

self._matchers = matchers

1113

self._matchers = matchers

1125

1114

1126

def matchfn(self, f):

1115

def matchfn(self, f):

1127

for match in self._matchers:

1116

for match in self._matchers:

1128

if match(f):

1117

if match(f):

1129

return True

1118

return True

1130

return False

1119

return False

1131

1120

1132

def visitdir(self, dir):

1121

def visitdir(self, dir):

1133

r = False

1122

r = False

1134

for m in self._matchers:

1123

for m in self._matchers:

1135

v = m.visitdir(dir)

1124

v = m.visitdir(dir)

1136

if v == b'all':

1125

if v == b'all':

1137

return v

1126

return v

1138

r |= v

1127

r |= v

1139

return r

1128

return r

1140

1129

1141

def visitchildrenset(self, dir):

1130

def visitchildrenset(self, dir):

1142

r = set()

1131

r = set()

1143

this = False

1132

this = False

1144

for m in self._matchers:

1133

for m in self._matchers:

1145

v = m.visitchildrenset(dir)

1134

v = m.visitchildrenset(dir)

1146

if not v:

1135

if not v:

1147

continue

1136

continue

1148

if v == b'all':

1137

if v == b'all':

1149

return v

1138

return v

1150

if this or v == b'this':

1139

if this or v == b'this':

1151

this = True

1140

this = True

1152

# don't break, we might have an 'all' in here.

1141

# don't break, we might have an 'all' in here.

1153

continue

1142

continue

1154

assert isinstance(v, set)

1143

assert isinstance(v, set)

1155

r = r.union(v)

1144

r = r.union(v)

1156

if this:

1145

if this:

1157

return b'this'

1146

return b'this'

1158

return r

1147

return r

1159

1148

1160

@encoding.strmethod

1149

@encoding.strmethod

1161

def __repr__(self):

1150

def __repr__(self):

1162

return b'<unionmatcher matchers=%r>' % self._matchers

1151

return b'<unionmatcher matchers=%r>' % self._matchers

1163

1152

1164

1153

1165

def patkind(pattern, default=None):

1154

def patkind(pattern, default=None):

1166

'''If pattern is 'kind:pat' with a known kind, return kind.

1155

'''If pattern is 'kind:pat' with a known kind, return kind.

1167

1156

1168

>>> patkind(br're:.*\.c$')

1157

>>> patkind(br're:.*\.c$')

1169

're'

1158

're'

1170

>>> patkind(b'glob:*.c')

1159

>>> patkind(b'glob:*.c')

1171

'glob'

1160

'glob'

1172

>>> patkind(b'relpath:test.py')

1161

>>> patkind(b'relpath:test.py')

1173

'relpath'

1162

'relpath'

1174

>>> patkind(b'main.py')

1163

>>> patkind(b'main.py')

1175

>>> patkind(b'main.py', default=b're')

1164

>>> patkind(b'main.py', default=b're')

1176

're'

1165

're'

1177

'''

1166

'''

1178

return _patsplit(pattern, default)[0]

1167

return _patsplit(pattern, default)[0]

1179

1168

1180

1169

1181

def _patsplit(pattern, default):

1170

def _patsplit(pattern, default):

1182

"""Split a string into the optional pattern kind prefix and the actual

1171

"""Split a string into the optional pattern kind prefix and the actual

1183

pattern."""

1172

pattern."""

1184

if b':' in pattern:

1173

if b':' in pattern:

1185

kind, pat = pattern.split(b':', 1)

1174

kind, pat = pattern.split(b':', 1)

1186

if kind in allpatternkinds:

1175

if kind in allpatternkinds:

1187

return kind, pat

1176

return kind, pat

1188

return default, pattern

1177

return default, pattern

1189

1178

1190

1179

1191

def _globre(pat):

1180

def _globre(pat):

1192

r'''Convert an extended glob string to a regexp string.

1181

r'''Convert an extended glob string to a regexp string.

1193

1182

1194

>>> from . import pycompat

1183

>>> from . import pycompat

1195

>>> def bprint(s):

1184

>>> def bprint(s):

1196

... print(pycompat.sysstr(s))

1185

... print(pycompat.sysstr(s))

1197

>>> bprint(_globre(br'?'))

1186

>>> bprint(_globre(br'?'))

1198

.

1187

.

1199

>>> bprint(_globre(br'*'))

1188

>>> bprint(_globre(br'*'))

1200

[^/]*

1189

[^/]*

1201

>>> bprint(_globre(br'**'))

1190

>>> bprint(_globre(br'**'))

1202

.*

1191

.*

1203

>>> bprint(_globre(br'**/a'))

1192

>>> bprint(_globre(br'**/a'))

1204

(?:.*/)?a

1193

(?:.*/)?a

1205

>>> bprint(_globre(br'a/**/b'))

1194

>>> bprint(_globre(br'a/**/b'))

1206

a/(?:.*/)?b

1195

a/(?:.*/)?b

1207

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1196

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1208

[a*?!^][\^b][^c]

1197

[a*?!^][\^b][^c]

1209

>>> bprint(_globre(br'{a,b}'))

1198

>>> bprint(_globre(br'{a,b}'))

1210

(?:a|b)

1199

(?:a|b)

1211

>>> bprint(_globre(br'.\*\?'))

1200

>>> bprint(_globre(br'.\*\?'))

1212

\.\*\?

1201

\.\*\?

1213

'''

1202

'''

1214

i, n = 0, len(pat)

1203

i, n = 0, len(pat)

1215

res = b''

1204

res = b''

1216

group = 0

1205

group = 0

1217

escape = util.stringutil.regexbytesescapemap.get

1206

escape = util.stringutil.regexbytesescapemap.get

1218

1207

1219

def peek():

1208

def peek():

1220

return i < n and pat[i : i + 1]

1209

return i < n and pat[i : i + 1]

1221

1210

1222

while i < n:

1211

while i < n:

1223

c = pat[i : i + 1]

1212

c = pat[i : i + 1]

1224

i += 1

1213

i += 1

1225

if c not in b'*?[{},\\':

1214

if c not in b'*?[{},\\':

1226

res += escape(c, c)

1215

res += escape(c, c)

1227

elif c == b'*':

1216

elif c == b'*':

1228

if peek() == b'*':

1217

if peek() == b'*':

1229

i += 1

1218

i += 1

1230

if peek() == b'/':

1219

if peek() == b'/':

1231

i += 1

1220

i += 1

1232

res += b'(?:.*/)?'

1221

res += b'(?:.*/)?'

1233

else:

1222

else:

1234

res += b'.*'

1223

res += b'.*'

1235

else:

1224

else:

1236

res += b'[^/]*'

1225

res += b'[^/]*'

1237

elif c == b'?':

1226

elif c == b'?':

1238

res += b'.'

1227

res += b'.'

1239

elif c == b'[':

1228

elif c == b'[':

1240

j = i

1229

j = i

1241

if j < n and pat[j : j + 1] in b'!]':

1230

if j < n and pat[j : j + 1] in b'!]':

1242

j += 1

1231

j += 1

1243

while j < n and pat[j : j + 1] != b']':

1232

while j < n and pat[j : j + 1] != b']':

1244

j += 1

1233

j += 1

1245

if j >= n:

1234

if j >= n:

1246

res += b'\\['

1235

res += b'\\['

1247

else:

1236

else:

1248

stuff = pat[i:j].replace(b'\\', b'\\\\')

1237

stuff = pat[i:j].replace(b'\\', b'\\\\')

1249

i = j + 1

1238

i = j + 1

1250

if stuff[0:1] == b'!':

1239

if stuff[0:1] == b'!':

1251

stuff = b'^' + stuff[1:]

1240

stuff = b'^' + stuff[1:]

1252

elif stuff[0:1] == b'^':

1241

elif stuff[0:1] == b'^':

1253

stuff = b'\\' + stuff

1242

stuff = b'\\' + stuff

1254

res = b'%s[%s]' % (res, stuff)

1243

res = b'%s[%s]' % (res, stuff)

1255

elif c == b'{':

1244

elif c == b'{':

1256

group += 1

1245

group += 1

1257

res += b'(?:'

1246

res += b'(?:'

1258

elif c == b'}' and group:

1247

elif c == b'}' and group:

1259

res += b')'

1248

res += b')'

1260

group -= 1

1249

group -= 1

1261

elif c == b',' and group:

1250

elif c == b',' and group:

1262

res += b'|'

1251

res += b'|'

1263

elif c == b'\\':

1252

elif c == b'\\':

1264

p = peek()

1253

p = peek()

1265

if p:

1254

if p:

1266

i += 1

1255

i += 1

1267

res += escape(p, p)

1256

res += escape(p, p)

1268

else:

1257

else:

1269

res += escape(c, c)

1258

res += escape(c, c)

1270

else:

1259

else:

1271

res += escape(c, c)

1260

res += escape(c, c)

1272

return res

1261

return res

1273

1262

1274

1263

1275

def _regex(kind, pat, globsuffix):

1264

def _regex(kind, pat, globsuffix):

1276

'''Convert a (normalized) pattern of any kind into a

1265

'''Convert a (normalized) pattern of any kind into a

1277

regular expression.

1266

regular expression.

1278

globsuffix is appended to the regexp of globs.'''

1267

globsuffix is appended to the regexp of globs.'''

1279

1268

1280

if rustmod is not None:

1269

if rustmod is not None:

1281

try:

1270

try:

1282

return rustmod.build_single_regex(kind, pat, globsuffix)

1271

return rustmod.build_single_regex(kind, pat, globsuffix)

1283

except rustmod.PatternError:

1272

except rustmod.PatternError:

1284

raise error.ProgrammingError(

1273

raise error.ProgrammingError(

1285

b'not a regex pattern: %s:%s' % (kind, pat)

1274

b'not a regex pattern: %s:%s' % (kind, pat)

1286

)

1275

)

1287

1276

1288

if not pat and kind in (b'glob', b'relpath'):

1277

if not pat and kind in (b'glob', b'relpath'):

1289

return b''

1278

return b''

1290

if kind == b're':

1279

if kind == b're':

1291

return pat

1280

return pat

1292

if kind in (b'path', b'relpath'):

1281

if kind in (b'path', b'relpath'):

1293

if pat == b'.':

1282

if pat == b'.':

1294

return b''

1283

return b''

1295

return util.stringutil.reescape(pat) + b'(?:/|$)'

1284

return util.stringutil.reescape(pat) + b'(?:/|$)'

1296

if kind == b'rootfilesin':

1285

if kind == b'rootfilesin':

1297

if pat == b'.':

1286

if pat == b'.':

1298

escaped = b''

1287

escaped = b''

1299

else:

1288

else:

1300

# Pattern is a directory name.

1289

# Pattern is a directory name.

1301

escaped = util.stringutil.reescape(pat) + b'/'

1290

escaped = util.stringutil.reescape(pat) + b'/'

1302

# Anything after the pattern must be a non-directory.

1291

# Anything after the pattern must be a non-directory.

1303

return escaped + b'[^/]+$'

1292

return escaped + b'[^/]+$'

1304

if kind == b'relglob':

1293

if kind == b'relglob':

1305

globre = _globre(pat)

1294

globre = _globre(pat)

1306

if globre.startswith(b'[^/]*'):

1295

if globre.startswith(b'[^/]*'):

1307

# When pat has the form *XYZ (common), make the returned regex more

1296

# When pat has the form *XYZ (common), make the returned regex more

1308

# legible by returning the regex for **XYZ instead of **/*XYZ.

1297

# legible by returning the regex for **XYZ instead of **/*XYZ.

1309

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1298

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1310

return b'(?:|.*/)' + globre + globsuffix

1299

return b'(?:|.*/)' + globre + globsuffix

1311

if kind == b'relre':

1300

if kind == b'relre':

1312

if pat.startswith(b'^'):

1301

if pat.startswith(b'^'):

1313

return pat

1302

return pat

1314

return b'.*' + pat

1303

return b'.*' + pat

1315

if kind in (b'glob', b'rootglob'):

1304

if kind in (b'glob', b'rootglob'):

1316

return _globre(pat) + globsuffix

1305

return _globre(pat) + globsuffix

1317

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1306

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1318

1307

1319

1308

1320

def _buildmatch(kindpats, globsuffix, root):

1309

def _buildmatch(kindpats, globsuffix, root):

1321

'''Return regexp string and a matcher function for kindpats.

1310

'''Return regexp string and a matcher function for kindpats.

1322

globsuffix is appended to the regexp of globs.'''

1311

globsuffix is appended to the regexp of globs.'''

1323

matchfuncs = []

1312

matchfuncs = []

1324

1313

1325

subincludes, kindpats = _expandsubinclude(kindpats, root)

1314

subincludes, kindpats = _expandsubinclude(kindpats, root)

1326

if subincludes:

1315

if subincludes:

1327

submatchers = {}

1316

submatchers = {}

1328

1317

1329

def matchsubinclude(f):

1318

def matchsubinclude(f):

1330

for prefix, matcherargs in subincludes:

1319

for prefix, matcherargs in subincludes:

1331

if f.startswith(prefix):

1320

if f.startswith(prefix):

1332

mf = submatchers.get(prefix)

1321

mf = submatchers.get(prefix)

1333

if mf is None:

1322

if mf is None:

1334

mf = match(*matcherargs)

1323

mf = match(*matcherargs)

1335

submatchers[prefix] = mf

1324

submatchers[prefix] = mf

1336

1325

1337

if mf(f[len(prefix) :]):

1326

if mf(f[len(prefix) :]):

1338

return True

1327

return True

1339

return False

1328

return False

1340

1329

1341

matchfuncs.append(matchsubinclude)

1330

matchfuncs.append(matchsubinclude)

1342

1331

1343

regex = b''

1332

regex = b''

1344

if kindpats:

1333

if kindpats:

1345

if all(k == b'rootfilesin' for k, p, s in kindpats):

1334

if all(k == b'rootfilesin' for k, p, s in kindpats):

1346

dirs = {p for k, p, s in kindpats}

1335

dirs = {p for k, p, s in kindpats}

1347

1336

1348

def mf(f):

1337

def mf(f):

1349

i = f.rfind(b'/')

1338

i = f.rfind(b'/')

1350

if i >= 0:

1339

if i >= 0:

1351

dir = f[:i]

1340

dir = f[:i]

1352

else:

1341

else:

1353

dir = b'.'

1342

dir = b'.'

1354

return dir in dirs

1343

return dir in dirs

1355

1344

1356

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1345

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1357

matchfuncs.append(mf)

1346

matchfuncs.append(mf)

1358

else:

1347

else:

1359

regex, mf = _buildregexmatch(kindpats, globsuffix)

1348

regex, mf = _buildregexmatch(kindpats, globsuffix)

1360

matchfuncs.append(mf)

1349

matchfuncs.append(mf)

1361

1350

1362

if len(matchfuncs) == 1:

1351

if len(matchfuncs) == 1:

1363

return regex, matchfuncs[0]

1352

return regex, matchfuncs[0]

1364

else:

1353

else:

1365

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1354

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1366

1355

1367

1356

1368

MAX_RE_SIZE = 20000

1357

MAX_RE_SIZE = 20000

1369

1358

1370

1359

1371

def _joinregexes(regexps):

1360

def _joinregexes(regexps):

1372

"""gather multiple regular expressions into a single one"""

1361

"""gather multiple regular expressions into a single one"""

1373

return b'|'.join(regexps)

1362

return b'|'.join(regexps)

1374

1363

1375

1364

1376

def _buildregexmatch(kindpats, globsuffix):

1365

def _buildregexmatch(kindpats, globsuffix):

1377

"""Build a match function from a list of kinds and kindpats,

1366

"""Build a match function from a list of kinds and kindpats,

1378

return regexp string and a matcher function.

1367

return regexp string and a matcher function.

1379

1368

1380

Test too large input

1369

Test too large input

1381

>>> _buildregexmatch([

1370

>>> _buildregexmatch([

1382

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1371

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1383

... ], b'$')

1372

... ], b'$')

1384

Traceback (most recent call last):

1373

Traceback (most recent call last):

1385

...

1374

...

1386

Abort: matcher pattern is too long (20009 bytes)

1375

Abort: matcher pattern is too long (20009 bytes)

1387

"""

1376

"""

1388

try:

1377

try:

1389

allgroups = []

1378

allgroups = []

1390

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1379

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1391

fullregexp = _joinregexes(regexps)

1380

fullregexp = _joinregexes(regexps)

1392

1381

1393

startidx = 0

1382

startidx = 0

1394

groupsize = 0

1383

groupsize = 0

1395

for idx, r in enumerate(regexps):

1384

for idx, r in enumerate(regexps):

1396

piecesize = len(r)

1385

piecesize = len(r)

1397

if piecesize > MAX_RE_SIZE:

1386

if piecesize > MAX_RE_SIZE:

1398

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1387

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1399

raise error.Abort(msg)

1388

raise error.Abort(msg)

1400

elif (groupsize + piecesize) > MAX_RE_SIZE:

1389

elif (groupsize + piecesize) > MAX_RE_SIZE:

1401

group = regexps[startidx:idx]

1390

group = regexps[startidx:idx]

1402

allgroups.append(_joinregexes(group))

1391

allgroups.append(_joinregexes(group))

1403

startidx = idx

1392

startidx = idx

1404

groupsize = 0

1393

groupsize = 0

1405

groupsize += piecesize + 1

1394

groupsize += piecesize + 1

1406

1395

1407

if startidx == 0:

1396

if startidx == 0:

1408

matcher = _rematcher(fullregexp)

1397

matcher = _rematcher(fullregexp)

1409

func = lambda s: bool(matcher(s))

1398

func = lambda s: bool(matcher(s))

1410

else:

1399

else:

1411

group = regexps[startidx:]

1400

group = regexps[startidx:]

1412

allgroups.append(_joinregexes(group))

1401

allgroups.append(_joinregexes(group))

1413

allmatchers = [_rematcher(g) for g in allgroups]

1402

allmatchers = [_rematcher(g) for g in allgroups]

1414

func = lambda s: any(m(s) for m in allmatchers)

1403

func = lambda s: any(m(s) for m in allmatchers)

1415

return fullregexp, func

1404

return fullregexp, func

1416

except re.error:

1405

except re.error:

1417

for k, p, s in kindpats:

1406

for k, p, s in kindpats:

1418

try:

1407

try:

1419

_rematcher(_regex(k, p, globsuffix))

1408

_rematcher(_regex(k, p, globsuffix))

1420

except re.error:

1409

except re.error:

1421

if s:

1410

if s:

1422

raise error.Abort(

1411

raise error.Abort(

1423

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1412

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1424

)

1413

)

1425

else:

1414

else:

1426

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1415

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1427

raise error.Abort(_(b"invalid pattern"))

1416

raise error.Abort(_(b"invalid pattern"))

1428

1417

1429

1418

1430

def _patternrootsanddirs(kindpats):

1419

def _patternrootsanddirs(kindpats):

1431

'''Returns roots and directories corresponding to each pattern.

1420

'''Returns roots and directories corresponding to each pattern.

1432

1421

1433

This calculates the roots and directories exactly matching the patterns and

1422

This calculates the roots and directories exactly matching the patterns and

1434

returns a tuple of (roots, dirs) for each. It does not return other

1423

returns a tuple of (roots, dirs) for each. It does not return other

1435

directories which may also need to be considered, like the parent

1424

directories which may also need to be considered, like the parent

1436

directories.

1425

directories.

1437

'''

1426

'''

1438

r = []

1427

r = []

1439

d = []

1428

d = []

1440

for kind, pat, source in kindpats:

1429

for kind, pat, source in kindpats:

1441

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1430

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1442

root = []

1431

root = []

1443

for p in pat.split(b'/'):

1432

for p in pat.split(b'/'):

1444

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1433

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1445

break

1434

break

1446

root.append(p)

1435

root.append(p)

1447

r.append(b'/'.join(root))

1436

r.append(b'/'.join(root))

1448

elif kind in (b'relpath', b'path'):

1437

elif kind in (b'relpath', b'path'):

1449

if pat == b'.':

1438

if pat == b'.':

1450

pat = b''

1439

pat = b''

1451

r.append(pat)

1440

r.append(pat)

1452

elif kind in (b'rootfilesin',):

1441

elif kind in (b'rootfilesin',):

1453

if pat == b'.':

1442

if pat == b'.':

1454

pat = b''

1443

pat = b''

1455

d.append(pat)

1444

d.append(pat)

1456

else: # relglob, re, relre

1445

else: # relglob, re, relre

1457

r.append(b'')

1446

r.append(b'')

1458

return r, d

1447

return r, d

1459

1448

1460

1449

1461

def _roots(kindpats):

1450

def _roots(kindpats):

1462

'''Returns root directories to match recursively from the given patterns.'''

1451

'''Returns root directories to match recursively from the given patterns.'''

1463

roots, dirs = _patternrootsanddirs(kindpats)

1452

roots, dirs = _patternrootsanddirs(kindpats)

1464

return roots

1453

return roots

1465

1454

1466

1455

1467

def _rootsdirsandparents(kindpats):

1456

def _rootsdirsandparents(kindpats):

1468

'''Returns roots and exact directories from patterns.

1457

'''Returns roots and exact directories from patterns.

1469

1458

1470

`roots` are directories to match recursively, `dirs` should

1459

`roots` are directories to match recursively, `dirs` should

1471

be matched non-recursively, and `parents` are the implicitly required

1460

be matched non-recursively, and `parents` are the implicitly required

1472

directories to walk to items in either roots or dirs.

1461

directories to walk to items in either roots or dirs.

1473

1462

1474

Returns a tuple of (roots, dirs, parents).

1463

Returns a tuple of (roots, dirs, parents).

1475

1464

1476

>>> r = _rootsdirsandparents(

1465

>>> r = _rootsdirsandparents(

1477

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1466

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1478

... (b'glob', b'g*', b'')])

1467

... (b'glob', b'g*', b'')])

1479

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1468

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1480

(['g/h', 'g/h', ''], []) ['', 'g']

1469

(['g/h', 'g/h', ''], []) ['', 'g']

1481

>>> r = _rootsdirsandparents(

1470

>>> r = _rootsdirsandparents(

1482

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1471

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1483

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1472

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1484

([], ['g/h', '']) ['', 'g']

1473

([], ['g/h', '']) ['', 'g']

1485

>>> r = _rootsdirsandparents(

1474

>>> r = _rootsdirsandparents(

1486

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1475

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1487

... (b'path', b'', b'')])

1476

... (b'path', b'', b'')])

1488

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1477

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1489

(['r', 'p/p', ''], []) ['', 'p']

1478

(['r', 'p/p', ''], []) ['', 'p']

1490

>>> r = _rootsdirsandparents(

1479

>>> r = _rootsdirsandparents(

1491

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1480

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1492

... (b'relre', b'rr', b'')])

1481

... (b'relre', b'rr', b'')])

1493

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1482

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1494

(['', '', ''], []) ['']

1483

(['', '', ''], []) ['']

1495

'''

1484

'''

1496

r, d = _patternrootsanddirs(kindpats)

1485

r, d = _patternrootsanddirs(kindpats)

1497

1486

1498

p = set()

1487

p = set()

1499

# Add the parents as non-recursive/exact directories, since they must be

1488

# Add the parents as non-recursive/exact directories, since they must be

1500

# scanned to get to either the roots or the other exact directories.

1489

# scanned to get to either the roots or the other exact directories.

1501

p.update(pathutil.dirs(d))

1490

p.update(pathutil.dirs(d))

1502

p.update(pathutil.dirs(r))

1491

p.update(pathutil.dirs(r))

1503

1492

1504

# FIXME: all uses of this function convert these to sets, do so before

1493

# FIXME: all uses of this function convert these to sets, do so before

1505

# returning.

1494

# returning.

1506

# FIXME: all uses of this function do not need anything in 'roots' and

1495

# FIXME: all uses of this function do not need anything in 'roots' and

1507

# 'dirs' to also be in 'parents', consider removing them before returning.

1496

# 'dirs' to also be in 'parents', consider removing them before returning.

1508

return r, d, p

1497

return r, d, p

1509

1498

1510

1499

1511

def _explicitfiles(kindpats):

1500

def _explicitfiles(kindpats):

1512

'''Returns the potential explicit filenames from the patterns.

1501

'''Returns the potential explicit filenames from the patterns.

1513

1502

1514

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1503

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1515

['foo/bar']

1504

['foo/bar']

1516

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1505

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1517

[]

1506

[]

1518

'''

1507

'''

1519

# Keep only the pattern kinds where one can specify filenames (vs only

1508

# Keep only the pattern kinds where one can specify filenames (vs only

1520

# directory names).

1509

# directory names).

1521

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1510

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1522

return _roots(filable)

1511

return _roots(filable)

1523

1512

1524

1513

1525

def _prefix(kindpats):

1514

def _prefix(kindpats):

1526

'''Whether all the patterns match a prefix (i.e. recursively)'''

1515

'''Whether all the patterns match a prefix (i.e. recursively)'''

1527

for kind, pat, source in kindpats:

1516

for kind, pat, source in kindpats:

1528

if kind not in (b'path', b'relpath'):

1517

if kind not in (b'path', b'relpath'):

1529

return False

1518

return False

1530

return True

1519

return True

1531

1520

1532

1521

1533

_commentre = None

1522

_commentre = None

1534

1523

1535

1524

1536

def readpatternfile(filepath, warn, sourceinfo=False):

1525

def readpatternfile(filepath, warn, sourceinfo=False):

1537

'''parse a pattern file, returning a list of

1526

'''parse a pattern file, returning a list of

1538

patterns. These patterns should be given to compile()

1527

patterns. These patterns should be given to compile()

1539

to be validated and converted into a match function.

1528

to be validated and converted into a match function.

1540

1529

1541

trailing white space is dropped.

1530

trailing white space is dropped.

1542

the escape character is backslash.

1531

the escape character is backslash.

1543

comments start with #.

1532

comments start with #.

1544

empty lines are skipped.

1533

empty lines are skipped.

1545

1534

1546

lines can be of the following formats:

1535

lines can be of the following formats:

1547

1536

1548

syntax: regexp # defaults following lines to non-rooted regexps

1537

syntax: regexp # defaults following lines to non-rooted regexps

1549

syntax: glob # defaults following lines to non-rooted globs

1538

syntax: glob # defaults following lines to non-rooted globs

1550

re:pattern # non-rooted regular expression

1539

re:pattern # non-rooted regular expression

1551

glob:pattern # non-rooted glob

1540

glob:pattern # non-rooted glob

1552

rootglob:pat # rooted glob (same root as ^ in regexps)

1541

rootglob:pat # rooted glob (same root as ^ in regexps)

1553

pattern # pattern of the current default type

1542

pattern # pattern of the current default type

1554

1543

1555

if sourceinfo is set, returns a list of tuples:

1544

if sourceinfo is set, returns a list of tuples:

1556

(pattern, lineno, originalline).

1545

(pattern, lineno, originalline).

1557

This is useful to debug ignore patterns.

1546

This is useful to debug ignore patterns.

1558

'''

1547

'''

1559

1548

1560

if rustmod is not None:

1549

if rustmod is not None:

1561

result, warnings = rustmod.read_pattern_file(

1550

result, warnings = rustmod.read_pattern_file(

1562

filepath, bool(warn), sourceinfo,

1551

filepath, bool(warn), sourceinfo,

1563

)

1552

)

1564

1553

1565

for warning_params in warnings:

1554

for warning_params in warnings:

1566

# Can't be easily emitted from Rust, because it would require

1555

# Can't be easily emitted from Rust, because it would require

1567

# a mechanism for both gettext and calling the `warn` function.

1556

# a mechanism for both gettext and calling the `warn` function.

1568

warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)

1557

warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)

1569

1558

1570

return result

1559

return result

1571

1560

1572

syntaxes = {

1561

syntaxes = {

1573

b're': b'relre:',

1562

b're': b'relre:',

1574

b'regexp': b'relre:',

1563

b'regexp': b'relre:',

1575

b'glob': b'relglob:',

1564

b'glob': b'relglob:',

1576

b'rootglob': b'rootglob:',

1565

b'rootglob': b'rootglob:',

1577

b'include': b'include',

1566

b'include': b'include',

1578

b'subinclude': b'subinclude',

1567

b'subinclude': b'subinclude',

1579

}

1568

}

1580

syntax = b'relre:'

1569

syntax = b'relre:'

1581

patterns = []

1570

patterns = []

1582

1571

1583

fp = open(filepath, b'rb')

1572

fp = open(filepath, b'rb')

1584

for lineno, line in enumerate(util.iterfile(fp), start=1):

1573

for lineno, line in enumerate(util.iterfile(fp), start=1):

1585

if b"#" in line:

1574

if b"#" in line:

1586

global _commentre

1575

global _commentre

1587

if not _commentre:

1576

if not _commentre:

1588

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1577

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1589

# remove comments prefixed by an even number of escapes

1578

# remove comments prefixed by an even number of escapes

1590

m = _commentre.search(line)

1579

m = _commentre.search(line)

1591

if m:

1580

if m:

1592

line = line[: m.end(1)]

1581

line = line[: m.end(1)]

1593

# fixup properly escaped comments that survived the above

1582

# fixup properly escaped comments that survived the above

1594

line = line.replace(b"\\#", b"#")

1583

line = line.replace(b"\\#", b"#")

1595

line = line.rstrip()

1584

line = line.rstrip()

1596

if not line:

1585

if not line:

1597

continue

1586

continue

1598

1587

1599

if line.startswith(b'syntax:'):

1588

if line.startswith(b'syntax:'):

1600

s = line[7:].strip()

1589

s = line[7:].strip()

1601

try:

1590

try:

1602

syntax = syntaxes[s]

1591

syntax = syntaxes[s]

1603

except KeyError:

1592

except KeyError:

1604

if warn:

1593

if warn:

1605

warn(

1594

warn(

1606

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1595

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1607

)

1596

)

1608

continue

1597

continue

1609

1598

1610

linesyntax = syntax

1599

linesyntax = syntax

1611

for s, rels in pycompat.iteritems(syntaxes):

1600

for s, rels in pycompat.iteritems(syntaxes):

1612

if line.startswith(rels):

1601

if line.startswith(rels):

1613

linesyntax = rels

1602

linesyntax = rels

1614

line = line[len(rels) :]

1603

line = line[len(rels) :]

1615

break

1604

break

1616

elif line.startswith(s + b':'):

1605

elif line.startswith(s + b':'):

1617

linesyntax = rels

1606

linesyntax = rels

1618

line = line[len(s) + 1 :]

1607

line = line[len(s) + 1 :]

1619

break

1608

break

1620

if sourceinfo:

1609

if sourceinfo:

1621

patterns.append((linesyntax + line, lineno, line))

1610

patterns.append((linesyntax + line, lineno, line))

1622

else:

1611

else:

1623

patterns.append(linesyntax + line)

1612

patterns.append(linesyntax + line)

1624

fp.close()

1613

fp.close()

1625

return patterns

1614

return patterns

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import copy
             import itertools
             import os
             import re
             from .i18n import _
             from .pycompat import open
             from . import (
                 encoding,
                 error,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             rustmod = policy.importrust('filepatterns')
             allpatternkinds = (
                 b're',
                 b'glob',
                 b'path',
                 b'relglob',
                 b'relpath',
                 b'relre',
                 b'rootglob',
                 b'listfile',
                 b'listfile0',
                 b'set',
                 b'include',
                 b'subinclude',
                 b'rootfilesin',
             )
             cwdrelativepatternkinds = (b'relpath', b'glob')
             propertycache = util.propertycache
             def _rematcher(regex):
                 '''compile the regexp with the best available regexp engine and return a
                 matcher function'''
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
                 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
                 matchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'set':
                         if ctx is None:
                             raise error.ProgrammingError(
                                 b"fileset expression with no context"
                             )
                         matchers.append(ctx.matchfileset(pat, badfn=badfn))
                         if listsubrepos:
                             for subpath in ctx.substate:
                                 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
                                 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
                                 matchers.append(pm)
                         continue
                     other.append((kind, pat, source))
                 return matchers, other
             def _expandsubinclude(kindpats, root):
                 '''Returns the list of subinclude matcher args and the kindpats without the
                 subincludes in it.'''
                 relmatchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'subinclude':
                         sourceroot = pathutil.dirname(util.normpath(source))
                         pat = util.pconvert(pat)
                         path = pathutil.join(sourceroot, pat)
                         newroot = pathutil.dirname(path)
                         matcherargs = (newroot, b'', [], [b'include:%s' % path])
                         prefix = pathutil.canonpath(root, root, newroot)
                         if prefix:
                             prefix += b'/'
                         relmatchers.append((prefix, matcherargs))
                     else:
                         other.append((kind, pat, source))
                 return relmatchers, other
             def _kindpatsalwaysmatch(kindpats):
                 """"Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat, source in kindpats:
                     if pat != b'' or kind not in [b'relpath', b'glob']:
                         return False
                 return True
             def _buildkindpatsmatcher(
                 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
             ):
                 matchers = []
                 fms, kindpats = _expandsets(
                     kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
                 )
                 if kindpats:
                     m = matchercls(root, kindpats, badfn=badfn)
                     matchers.append(m)
                 if fms:
                     matchers.extend(fms)
                 if not matchers:
                     return nevermatcher(badfn=badfn)
                 if len(matchers) == 1:
                     return matchers[0]
                 return unionmatcher(matchers)
             def match(
                 root,
                 cwd,
                 patterns=None,
                 include=None,
                 exclude=None,
                 default=b'glob',
                 auditor=None,
                 ctx=None,
                 listsubrepos=False,
                 warn=None,
                 badfn=None,
                 icasefs=False,
             ):
                 r"""build an object to match a set of file patterns
                 arguments:
                 root - the canonical root of the tree you're matching against
                 cwd - the current working directory, if relevant
                 patterns - patterns to find
                 include - patterns to include (unless they are excluded)
                 exclude - patterns to exclude (even if they are included)
                 default - if a pattern in patterns has no explicit type, assume this one
                 auditor - optional path auditor
                 ctx - optional changecontext
                 listsubrepos - if True, recurse into subrepositories
                 warn - optional function used for printing warnings
                 badfn - optional bad() callback for this matcher instead of the default
                 icasefs - make a matcher for wdir on case insensitive filesystems, which
                     normalizes the given patterns to the case in the filesystem
                 a pattern is one of:
                 'glob:<glob>' - a glob relative to cwd
                 're:<regexp>' - a regular expression
                 'path:<path>' - a path relative to repository root, which is matched
                                 recursively
                 'rootfilesin:<path>' - a path relative to repository root, which is
                                 matched non-recursively (will not match subdirectories)
                 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                 'relpath:<path>' - a path relative to cwd
                 'relre:<regexp>' - a regexp that needn't match the start of a name
                 'set:<fileset>' - a fileset expression
                 'include:<path>' - a file of patterns to read and include
                 'subinclude:<path>' - a file of patterns to match against files under
                                       the same directory
                 '<something>' - a pattern of the specified default type
                 Usually a patternmatcher is returned:
                 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
                 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
                 Combining 'patterns' with 'include' (resp. 'exclude') gives an
                 intersectionmatcher (resp. a differencematcher):
                 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
                 <class 'mercurial.match.intersectionmatcher'>
                 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
                 <class 'mercurial.match.differencematcher'>
                 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
                 >>> match(b'foo', b'.', [])
                 <alwaysmatcher>
                 The 'default' argument determines which kind of pattern is assumed if a
                 pattern has no prefix:
                 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
                 <patternmatcher patterns='.*\\.c$'>
                 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
                 <patternmatcher patterns='main\\.py(?:/|$)'>
                 >>> match(b'foo', b'.', [b'main.py'], default=b're')
                 <patternmatcher patterns='main.py'>
                 The primary use of matchers is to check whether a value (usually a file
                 name) matches againset one of the patterns given at initialization. There
                 are two ways of doing this check.
                 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
 . Calling the matcher with a file name returns True if any pattern
                 matches that file name:
                 >>> m(b'a')
                 True
                 >>> m(b'main.c')
                 True
                 >>> m(b'test.py')
                 False
 . Using the exact() method only returns True if the file name matches one
                 of the exact patterns (i.e. not re: or glob: patterns):
                 >>> m.exact(b'a')
                 True
                 >>> m.exact(b'main.c')
                 False
                 """
                 normalize = _donormalize
                 if icasefs:
                     dirstate = ctx.repo().dirstate
                     dsnormalize = dirstate.normalize
                     def normalize(patterns, default, root, cwd, auditor, warn):
                         kp = _donormalize(patterns, default, root, cwd, auditor, warn)
                         kindpats = []
                         for kind, pats, source in kp:
                             if kind not in (b're', b'relre'):  # regex can't be normalized
                                 p = pats
                                 pats = dsnormalize(pats)
                                 # Preserve the original to handle a case only rename.
                                 if p != pats and p in dirstate:
                                     kindpats.append((kind, p, source))
                             kindpats.append((kind, pats, source))
                         return kindpats
                 if patterns:
                     kindpats = normalize(patterns, default, root, cwd, auditor, warn)
                     if _kindpatsalwaysmatch(kindpats):
                         m = alwaysmatcher(badfn)
                     else:
                         m = _buildkindpatsmatcher(
                             patternmatcher,
                             root,
                             kindpats,
                             ctx=ctx,
                             listsubrepos=listsubrepos,
                             badfn=badfn,
                         )
                 else:
                     # It's a little strange that no patterns means to match everything.
                     # Consider changing this to match nothing (probably using nevermatcher).
                     m = alwaysmatcher(badfn)
                 if include:
                     kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
                     im = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = intersectmatchers(m, im)
                 if exclude:
                     kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
                     em = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = differencematcher(m, em)
                 return m
             def exact(files, badfn=None):
                 return exactmatcher(files, badfn=badfn)
             def always(badfn=None):
                 return alwaysmatcher(badfn)
             def never(badfn=None):
                 return nevermatcher(badfn)
             def badmatch(match, badfn):
                 """Make a copy of the given matcher, replacing its bad method with the given
                 one.
                 """
                 m = copy.copy(match)
                 m.bad = badfn
                 return m
             def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
                 '''Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded.'''
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in cwdrelativepatternkinds:
                         pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
                     elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
                         pat = util.normpath(pat)
                     elif kind in (b'listfile', b'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == b'listfile0':
                                 files = files.split(b'\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise error.Abort(_(b"unable to read file list (%s)") % pat)
                         for k, p, source in _donormalize(
                             files, default, root, cwd, auditor, warn
                         ):
                             kindpats.append((k, p, pat))
                         continue
                     elif kind == b'include':
                         try:
                             fullpath = os.path.join(root, util.localpath(pat))
                             includepats = readpatternfile(fullpath, warn)
                             for k, p, source in _donormalize(
                                 includepats, default, root, cwd, auditor, warn
                             ):
                                 kindpats.append((k, p, source or pat))
                         except error.Abort as inst:
                             raise error.Abort(
                                 b'%s: %s'
                                 % (pat, inst[0])  # pytype: disable=unsupported-operands
                             )
                         except IOError as inst:
                             if warn:
                                 warn(
                                     _(b"skipping unreadable pattern file '%s': %s\n")
                                     % (pat, stringutil.forcebytestr(inst.strerror))
                                 )
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat, b''))
                 return kindpats
             class basematcher(object):
                 def __init__(self, badfn=None):
                     if badfn is not None:
                         self.bad = badfn
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     '''Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message.'''
-                # If an explicitdir is set, it will be called when an explicitly listed
-                # directory is visited.
-                explicitdir = None
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 @propertycache
                 def _files(self):
                     return []
                 def files(self):
                     '''Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots'''
                     return self._files
                 @propertycache
                 def _fileset(self):
                     return set(self._files)
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fileset
                 def matchfn(self, f):
                     return False
                 def visitdir(self, dir):
                     '''Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories. This is
                     based on the match's primary, included, and excluded patterns.
                     Returns the string 'all' if the given directory and all subdirectories
                     should be visited. Otherwise returns True or False indicating whether
                     the given directory should be visited.
                     '''
                     return True
                 def visitchildrenset(self, dir):
                     '''Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories, and
                     potentially lists which subdirectories of that directory should be
                     visited. This is based on the match's primary, included, and excluded
                     patterns.
                     This function is very similar to 'visitdir', and the following mapping
                     can be applied:
                          visitdir | visitchildrenlist
                         ----------+-------------------
                          False    | set()
                          'all'    | 'all'
                          True     | 'this' OR non-empty set of subdirs -or files- to visit
                     Example:
                       Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
                       the following values (assuming the implementation of visitchildrenset
                       is capable of recognizing this; some implementations are not).
                       '' -> {'foo', 'qux'}
                       'baz' -> set()
                       'foo' -> {'bar'}
                       # Ideally this would be 'all', but since the prefix nature of matchers
                       # is applied to the entire matcher, we have to downgrade this to
                       # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
                       # in.
                       'foo/bar' -> 'this'
                       'qux' -> 'this'
                     Important:
                       Most matchers do not know if they're representing files or
                       directories. They see ['path:dir/f'] and don't know whether 'f' is a
                       file or a directory, so visitchildrenset('dir') for most matchers will
                       return {'f'}, but if the matcher knows it's a file (like exactmatcher
                       does), it may return 'this'. Do not rely on the return being a set
                       indicating that there are no files in this dir to investigate (or
                       equivalently that if there are files to investigate in 'dir' that it
                       will always return 'this').
                     '''
                     return b'this'
                 def always(self):
                     '''Matcher will match everything and .files() will be empty --
                     optimization might be possible.'''
                     return False
                 def isexact(self):
                     '''Matcher will match exactly the list of files in .files() --
                     optimization might be possible.'''
                     return False
                 def prefix(self):
                     '''Matcher will match the paths in .files() recursively --
                     optimization might be possible.'''
                     return False
                 def anypats(self):
                     '''None of .always(), .isexact(), and .prefix() is true --
                     optimizations will be difficult.'''
                     return not self.always() and not self.isexact() and not self.prefix()
             class alwaysmatcher(basematcher):
                 '''Matches everything.'''
                 def __init__(self, badfn=None):
                     super(alwaysmatcher, self).__init__(badfn)
                 def always(self):
                     return True
                 def matchfn(self, f):
                     return True
                 def visitdir(self, dir):
                     return b'all'
                 def visitchildrenset(self, dir):
                     return b'all'
                 def __repr__(self):
                     return r'<alwaysmatcher>'
             class nevermatcher(basematcher):
                 '''Matches nothing.'''
                 def __init__(self, badfn=None):
                     super(nevermatcher, self).__init__(badfn)
                 # It's a little weird to say that the nevermatcher is an exact matcher
                 # or a prefix matcher, but it seems to make sense to let callers take
                 # fast paths based on either. There will be no exact matches, nor any
                 # prefixes (files() returns []), so fast paths iterating over them should
                 # be efficient (and correct).
                 def isexact(self):
                     return True
                 def prefix(self):
                     return True
                 def visitdir(self, dir):
                     return False
                 def visitchildrenset(self, dir):
                     return set()
                 def __repr__(self):
                     return r'<nevermatcher>'
             class predicatematcher(basematcher):
                 """A matcher adapter for a simple boolean function"""
                 def __init__(self, predfn, predrepr=None, badfn=None):
                     super(predicatematcher, self).__init__(badfn)
                     self.matchfn = predfn
                     self._predrepr = predrepr
                 @encoding.strmethod
                 def __repr__(self):
                     s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
                         self.matchfn
                     )
                     return b'<predicatenmatcher pred=%s>' % s
             class patternmatcher(basematcher):
                 """Matches a set of (kind, pat, source) against a 'root' directory.
                 >>> kindpats = [
                 ...     (b're', br'.*\.c$', b''),
                 ...     (b'path', b'foo/a', b''),
                 ...     (b'relpath', b'b', b''),
                 ...     (b'glob', b'*.h', b''),
                 ... ]
                 >>> m = patternmatcher(b'foo', kindpats)
                 >>> m(b'main.c')  # matches re:.*\.c$
                 True
                 >>> m(b'b.txt')
                 False
                 >>> m(b'foo/a')  # matches path:foo/a
                 True
                 >>> m(b'a')  # does not match path:b, since 'root' is 'foo'
                 False
                 >>> m(b'b')  # matches relpath:b, since 'root' is 'foo'
                 True
                 >>> m(b'lib.h')  # matches glob:*.h
                 True
                 >>> m.files()
                 ['', 'foo/a', 'b', '']
                 >>> m.exact(b'foo/a')
                 True
                 >>> m.exact(b'b')
                 True
                 >>> m.exact(b'lib.h')  # exact matches are for (rel)path kinds
                 False
                 """
                 def __init__(self, root, kindpats, badfn=None):
                     super(patternmatcher, self).__init__(badfn)
                     self._files = _explicitfiles(kindpats)
                     self._prefix = _prefix(kindpats)
                     self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     if self._prefix and dir in self._fileset:
                         return b'all'
                     return (
                         dir in self._fileset
                         or dir in self._dirs
                         or any(
                             parentdir in self._fileset
                             for parentdir in pathutil.finddirs(dir)
                         )
                     )
                 def visitchildrenset(self, dir):
                     ret = self.visitdir(dir)
                     if ret is True:
                         return b'this'
                     elif not ret:
                         return set()
                     assert ret == b'all'
                     return b'all'
                 def prefix(self):
                     return self._prefix
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
             # This is basically a reimplementation of pathutil.dirs that stores the
             # children instead of just a count of them, plus a small optional optimization
             # to avoid some directories we don't need.
             class _dirchildren(object):
                 def __init__(self, paths, onlyinclude=None):
                     self._dirs = {}
                     self._onlyinclude = onlyinclude or []
                     addpath = self.addpath
                     for f in paths:
                         addpath(f)
                 def addpath(self, path):
                     if path == b'':
                         return
                     dirs = self._dirs
                     findsplitdirs = _dirchildren._findsplitdirs
                     for d, b in findsplitdirs(path):
                         if d not in self._onlyinclude:
                             continue
                         dirs.setdefault(d, set()).add(b)
                 @staticmethod
                 def _findsplitdirs(path):
                     # yields (dirname, basename) tuples, walking back to the root.  This is
                     # very similar to pathutil.finddirs, except:
                     #  - produces a (dirname, basename) tuple, not just 'dirname'
                     # Unlike manifest._splittopdir, this does not suffix `dirname` with a
                     # slash.
                     oldpos = len(path)
                     pos = path.rfind(b'/')
                     while pos != -1:
                         yield path[:pos], path[pos + 1 : oldpos]
                         oldpos = pos
                         pos = path.rfind(b'/', 0, pos)
                     yield b'', path[:oldpos]
                 def get(self, path):
                     return self._dirs.get(path, set())
             class includematcher(basematcher):
                 def __init__(self, root, kindpats, badfn=None):
                     super(includematcher, self).__init__(badfn)
                     self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
                     self._prefix = _prefix(kindpats)
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     # roots are directories which are recursively included.
                     self._roots = set(roots)
                     # dirs are directories which are non-recursively included.
                     self._dirs = set(dirs)
                     # parents are directories which are non-recursively included because
                     # they are needed to get to items in _dirs or _roots.
                     self._parents = parents
                 def visitdir(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     return (
                         dir in self._roots
                         or dir in self._dirs
                         or dir in self._parents
                         or any(
                             parentdir in self._roots for parentdir in pathutil.finddirs(dir)
                         )
                     )
                 @propertycache
                 def _allparentschildren(self):
                     # It may seem odd that we add dirs, roots, and parents, and then
                     # restrict to only parents. This is to catch the case of:
                     #   dirs = ['foo/bar']
                     #   parents = ['foo']
                     # if we asked for the children of 'foo', but had only added
                     # self._parents, we wouldn't be able to respond ['bar'].
                     return _dirchildren(
                         itertools.chain(self._dirs, self._roots, self._parents),
                         onlyinclude=self._parents,
                     )
                 def visitchildrenset(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     # Note: this does *not* include the 'dir in self._parents' case from
                     # visitdir, that's handled below.
                     if (
                         b'' in self._roots
                         or dir in self._roots
                         or dir in self._dirs
                         or any(
                             parentdir in self._roots for parentdir in pathutil.finddirs(dir)
                         )
                     ):
                         return b'this'
                     if dir in self._parents:
                         return self._allparentschildren.get(dir) or set()
                     return set()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
             class exactmatcher(basematcher):
                 r'''Matches the input files exactly. They are interpreted as paths, not
                 patterns (so no kind-prefixes).
                 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
                 >>> m(b'a.txt')
                 True
                 >>> m(b'b.txt')
                 False
                 Input files that would be matched are exactly those returned by .files()
                 >>> m.files()
                 ['a.txt', 're:.*\\.c$']
                 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
                 >>> m(b'main.c')
                 False
                 >>> m(br're:.*\.c$')
                 True
                 '''
                 def __init__(self, files, badfn=None):
                     super(exactmatcher, self).__init__(badfn)
                     if isinstance(files, list):
                         self._files = files
                     else:
                         self._files = list(files)
                 matchfn = basematcher.exact
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     return dir in self._dirs
                 def visitchildrenset(self, dir):
                     if not self._fileset or dir not in self._dirs:
                         return set()
                     candidates = self._fileset | self._dirs - {b''}
                     if dir != b'':
                         d = dir + b'/'
                         candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
                     # self._dirs includes all of the directories, recursively, so if
                     # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
                     # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     # '/' in it, indicating a it's for a subdir-of-a-subdir; the
                     # immediate subdir will be in there without a slash.
                     ret = {c for c in candidates if b'/' not in c}
                     # We really do not expect ret to be empty, since that would imply that
                     # there's something in _dirs that didn't have a file in _fileset.
                     assert ret
                     return ret
                 def isexact(self):
                     return True
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<exactmatcher files=%r>' % self._files
             class differencematcher(basematcher):
                 '''Composes two matchers by matching if the first matches and the second
                 does not.
-                The second matcher's non-matching-attributes (bad, explicitdir,
+                The second matcher's non-matching-attributes (bad, traversedir) are ignored.
-                traversedir) are ignored.
                 '''
                 def __init__(self, m1, m2):
                     super(differencematcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
-                    self.explicitdir = m1.explicitdir
                     self.traversedir = m1.traversedir
                 def matchfn(self, f):
                     return self._m1(f) and not self._m2(f)
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         return [f for f in self._m1.files() if self(f)]
                     # If m1 is not an exact matcher, we can't easily figure out the set of
                     # files, because its files() are not always files. For example, if
                     # m1 is "path:dir" and m2 is "rootfileins:.", we don't
                     # want to remove "dir" from the set even though it would match m2,
                     # because the "dir" in m1 may not be a file.
                     return self._m1.files()
                 def visitdir(self, dir):
                     if self._m2.visitdir(dir) == b'all':
                         return False
                     elif not self._m2.visitdir(dir):
                         # m2 does not match dir, we can return 'all' here if possible
                         return self._m1.visitdir(dir)
                     return bool(self._m1.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m2_set = self._m2.visitchildrenset(dir)
                     if m2_set == b'all':
                         return set()
                     m1_set = self._m1.visitchildrenset(dir)
                     # Possible values for m1: 'all', 'this', set(...), set()
                     # Possible values for m2:        'this', set(...), set()
                     # If m2 has nothing under here that we care about, return m1, even if
                     # it's 'all'. This is a change in behavior from visitdir, which would
                     # return True, not 'all', for some reason.
                     if not m2_set:
                         return m1_set
                     if m1_set in [b'all', b'this']:
                         # Never return 'all' here if m2_set is any kind of non-empty (either
                         # 'this' or set(foo)), since m2 might return set() for a
                         # subdirectory.
                         return b'this'
                     # Possible values for m1:         set(...), set()
                     # Possible values for m2: 'this', set(...)
                     # We ignore m2's set results. They're possibly incorrect:
                     #  m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
                     #    m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
                     #    return set(), which is *not* correct, we still need to visit 'dir'!
                     return m1_set
                 def isexact(self):
                     return self._m1.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
             def intersectmatchers(m1, m2):
                 '''Composes two matchers by matching if both of them match.
-                The second matcher's non-matching-attributes (bad, explicitdir,
+                The second matcher's non-matching-attributes (bad, traversedir) are ignored.
-                traversedir) are ignored.
                 '''
                 if m1 is None or m2 is None:
                     return m1 or m2
                 if m1.always():
                     m = copy.copy(m2)
                     # TODO: Consider encapsulating these things in a class so there's only
                     # one thing to copy from m1.
                     m.bad = m1.bad
-                    m.explicitdir = m1.explicitdir
                     m.traversedir = m1.traversedir
                     return m
                 if m2.always():
                     m = copy.copy(m1)
                     return m
                 return intersectionmatcher(m1, m2)
             class intersectionmatcher(basematcher):
                 def __init__(self, m1, m2):
                     super(intersectionmatcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
-                    self.explicitdir = m1.explicitdir
                     self.traversedir = m1.traversedir
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         m1, m2 = self._m1, self._m2
                         if not m1.isexact():
                             m1, m2 = m2, m1
                         return [f for f in m1.files() if m2(f)]
                     # It neither m1 nor m2 is an exact matcher, we can't easily intersect
                     # the set of files, because their files() are not always files. For
                     # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
                     # "path:dir2", we don't want to remove "dir2" from the set.
                     return self._m1.files() + self._m2.files()
                 def matchfn(self, f):
                     return self._m1(f) and self._m2(f)
                 def visitdir(self, dir):
                     visit1 = self._m1.visitdir(dir)
                     if visit1 == b'all':
                         return self._m2.visitdir(dir)
                     # bool() because visit1=True + visit2='all' should not be 'all'
                     return bool(visit1 and self._m2.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m1_set = self._m1.visitchildrenset(dir)
                     if not m1_set:
                         return set()
                     m2_set = self._m2.visitchildrenset(dir)
                     if not m2_set:
                         return set()
                     if m1_set == b'all':
                         return m2_set
                     elif m2_set == b'all':
                         return m1_set
                     if m1_set == b'this' or m2_set == b'this':
                         return b'this'
                     assert isinstance(m1_set, set) and isinstance(m2_set, set)
                     return m1_set.intersection(m2_set)
                 def always(self):
                     return self._m1.always() and self._m2.always()
                 def isexact(self):
                     return self._m1.isexact() or self._m2.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
             class subdirmatcher(basematcher):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> from . import pycompat
                 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
                 >>> m2 = subdirmatcher(b'sub', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'b.txt')
                 True
                 >>> m2.matchfn(b'a.txt')
                 False
                 >>> m2.matchfn(b'b.txt')
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact(b'b.txt')
                 True
                 >>> def bad(f, msg):
                 ...     print(pycompat.sysstr(b"%s: %s" % (f, msg)))
                 >>> m1.bad = bad
                 >>> m2.bad(b'x.txt', b'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     super(subdirmatcher, self).__init__()
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher.always()
                     self._files = [
                         f[len(path) + 1 :]
                         for f in matcher._files
                         if f.startswith(path + b"/")
                     ]
                     # If the parent repo had a path to this subrepo and the matcher is
                     # a prefix matcher, this submatcher always matches.
                     if matcher.prefix():
                         self._always = any(f == path for f in matcher._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + b"/" + f, msg)
                 def matchfn(self, f):
                     # Some information is lost in the superclass's constructor, so we
                     # can not accurately create the matching function for the subdirectory
                     # from the inputs. Instead, we override matchfn() and visitdir() to
                     # call the original matcher with the subdirectory path prepended.
                     return self._matcher.matchfn(self._path + b"/" + f)
                 def visitdir(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitdir(dir)
                 def visitchildrenset(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitchildrenset(dir)
                 def always(self):
                     return self._always
                 def prefix(self):
                     return self._matcher.prefix() and not self._always
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<subdirmatcher path=%r, matcher=%r>' % (
                         self._path,
                         self._matcher,
                     )
             class prefixdirmatcher(basematcher):
                 """Adapt a matcher to work on a parent directory.
-                The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
+                The matcher's non-matching-attributes (bad, traversedir) are ignored.
-                ignored.
                 The prefix path should usually be the relative path from the root of
                 this matcher to the root of the wrapped matcher.
                 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
                 >>> m2 = prefixdirmatcher(b'd/e', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'd/e/a.txt')
                 True
                 >>> m2(b'd/e/b.txt')
                 False
                 >>> m2.files()
                 ['d/e/a.txt', 'd/e/f/b.txt']
                 >>> m2.exact(b'd/e/a.txt')
                 True
                 >>> m2.visitdir(b'd')
                 True
                 >>> m2.visitdir(b'd/e')
                 True
                 >>> m2.visitdir(b'd/e/f')
                 True
                 >>> m2.visitdir(b'd/e/g')
                 False
                 >>> m2.visitdir(b'd/ef')
                 False
                 """
                 def __init__(self, path, matcher, badfn=None):
                     super(prefixdirmatcher, self).__init__(badfn)
                     if not path:
                         raise error.ProgrammingError(b'prefix path must not be empty')
                     self._path = path
                     self._pathprefix = path + b'/'
                     self._matcher = matcher
                 @propertycache
                 def _files(self):
                     return [self._pathprefix + f for f in self._matcher._files]
                 def matchfn(self, f):
                     if not f.startswith(self._pathprefix):
                         return False
                     return self._matcher.matchfn(f[len(self._pathprefix) :])
                 @propertycache
                 def _pathdirs(self):
                     return set(pathutil.finddirs(self._path))
                 def visitdir(self, dir):
                     if dir == self._path:
                         return self._matcher.visitdir(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitdir(dir[len(self._pathprefix) :])
                     return dir in self._pathdirs
                 def visitchildrenset(self, dir):
                     if dir == self._path:
                         return self._matcher.visitchildrenset(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
                     if dir in self._pathdirs:
                         return b'this'
                     return set()
                 def isexact(self):
                     return self._matcher.isexact()
                 def prefix(self):
                     return self._matcher.prefix()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<prefixdirmatcher path=%r, matcher=%r>' % (
                         pycompat.bytestr(self._path),
                         self._matcher,
                     )
             class unionmatcher(basematcher):
                 """A matcher that is the union of several matchers.
-                The non-matching-attributes (bad, explicitdir, traversedir) are taken from
+                The non-matching-attributes (bad, traversedir) are taken from the first
-                the first matcher.
+                matcher.
                 """
                 def __init__(self, matchers):
                     m1 = matchers[0]
                     super(unionmatcher, self).__init__()
-                    self.explicitdir = m1.explicitdir
                     self.traversedir = m1.traversedir
                     self._matchers = matchers
                 def matchfn(self, f):
                     for match in self._matchers:
                         if match(f):
                             return True
                     return False
                 def visitdir(self, dir):
                     r = False
                     for m in self._matchers:
                         v = m.visitdir(dir)
                         if v == b'all':
                             return v
                         r |= v
                     return r
                 def visitchildrenset(self, dir):
                     r = set()
                     this = False
                     for m in self._matchers:
                         v = m.visitchildrenset(dir)
                         if not v:
                             continue
                         if v == b'all':
                             return v
                         if this or v == b'this':
                             this = True
                             # don't break, we might have an 'all' in here.
                             continue
                         assert isinstance(v, set)
                         r = r.union(v)
                     if this:
                         return b'this'
                     return r
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<unionmatcher matchers=%r>' % self._matchers
             def patkind(pattern, default=None):
                 '''If pattern is 'kind:pat' with a known kind, return kind.
                 >>> patkind(br're:.*\.c$')
                 're'
                 >>> patkind(b'glob:*.c')
                 'glob'
                 >>> patkind(b'relpath:test.py')
                 'relpath'
                 >>> patkind(b'main.py')
                 >>> patkind(b'main.py', default=b're')
                 're'
                 '''
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if b':' in pattern:
                     kind, pat = pattern.split(b':', 1)
                     if kind in allpatternkinds:
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r'''Convert an extended glob string to a regexp string.
                 >>> from . import pycompat
                 >>> def bprint(s):
                 ...     print(pycompat.sysstr(s))
                 >>> bprint(_globre(br'?'))
                 .
                 >>> bprint(_globre(br'*'))
                 [^/]*
                 >>> bprint(_globre(br'**'))
                 .*
                 >>> bprint(_globre(br'**/a'))
                 (?:.*/)?a
                 >>> bprint(_globre(br'a/**/b'))
                 a/(?:.*/)?b
                 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
                 [a*?!^][\^b][^c]
                 >>> bprint(_globre(br'{a,b}'))
                 (?:a|b)
                 >>> bprint(_globre(br'.\*\?'))
                 \.\*\?
                 '''
                 i, n = 0, len(pat)
                 res = b''
                 group = 0
                 escape = util.stringutil.regexbytesescapemap.get
                 def peek():
                     return i < n and pat[i : i + 1]
                 while i < n:
                     c = pat[i : i + 1]
                     i += 1
                     if c not in b'*?[{},\\':
                         res += escape(c, c)
                     elif c == b'*':
                         if peek() == b'*':
                             i += 1
                             if peek() == b'/':
                                 i += 1
                                 res += b'(?:.*/)?'
                             else:
                                 res += b'.*'
                         else:
                             res += b'[^/]*'
                     elif c == b'?':
                         res += b'.'
                     elif c == b'[':
                         j = i
                         if j < n and pat[j : j + 1] in b'!]':
                             j += 1
                         while j < n and pat[j : j + 1] != b']':
                             j += 1
                         if j >= n:
                             res += b'\\['
                         else:
                             stuff = pat[i:j].replace(b'\\', b'\\\\')
                             i = j + 1
                             if stuff[0:1] == b'!':
                                 stuff = b'^' + stuff[1:]
                             elif stuff[0:1] == b'^':
                                 stuff = b'\\' + stuff
                             res = b'%s[%s]' % (res, stuff)
                     elif c == b'{':
                         group += 1
                         res += b'(?:'
                     elif c == b'}' and group:
                         res += b')'
                         group -= 1
                     elif c == b',' and group:
                         res += b'|'
                     elif c == b'\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p, p)
                         else:
                             res += escape(c, c)
                     else:
                         res += escape(c, c)
                 return res
             def _regex(kind, pat, globsuffix):
                 '''Convert a (normalized) pattern of any kind into a
                 regular expression.
                 globsuffix is appended to the regexp of globs.'''
                 if rustmod is not None:
                     try:
                         return rustmod.build_single_regex(kind, pat, globsuffix)
                     except rustmod.PatternError:
                         raise error.ProgrammingError(
                             b'not a regex pattern: %s:%s' % (kind, pat)
                         )
                 if not pat and kind in (b'glob', b'relpath'):
                     return b''
                 if kind == b're':
                     return pat
                 if kind in (b'path', b'relpath'):
                     if pat == b'.':
                         return b''
                     return util.stringutil.reescape(pat) + b'(?:/|$)'
                 if kind == b'rootfilesin':
                     if pat == b'.':
                         escaped = b''
                     else:
                         # Pattern is a directory name.
                         escaped = util.stringutil.reescape(pat) + b'/'
                     # Anything after the pattern must be a non-directory.
                     return escaped + b'[^/]+$'
                 if kind == b'relglob':
                     globre = _globre(pat)
                     if globre.startswith(b'[^/]*'):
                         # When pat has the form *XYZ (common), make the returned regex more
                         # legible by returning the regex for **XYZ instead of **/*XYZ.
                         return b'.*' + globre[len(b'[^/]*') :] + globsuffix
                     return b'(?:|.*/)' + globre + globsuffix
                 if kind == b'relre':
                     if pat.startswith(b'^'):
                         return pat
                     return b'.*' + pat
                 if kind in (b'glob', b'rootglob'):
                     return _globre(pat) + globsuffix
                 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
             def _buildmatch(kindpats, globsuffix, root):
                 '''Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs.'''
                 matchfuncs = []
                 subincludes, kindpats = _expandsubinclude(kindpats, root)
                 if subincludes:
                     submatchers = {}
                     def matchsubinclude(f):
                         for prefix, matcherargs in subincludes:
                             if f.startswith(prefix):
                                 mf = submatchers.get(prefix)
                                 if mf is None:
                                     mf = match(*matcherargs)
                                     submatchers[prefix] = mf
                                 if mf(f[len(prefix) :]):
                                     return True
                         return False
                     matchfuncs.append(matchsubinclude)
                 regex = b''
                 if kindpats:
                     if all(k == b'rootfilesin' for k, p, s in kindpats):
                         dirs = {p for k, p, s in kindpats}
                         def mf(f):
                             i = f.rfind(b'/')
                             if i >= 0:
                                 dir = f[:i]
                             else:
                                 dir = b'.'
                             return dir in dirs
                         regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
                         matchfuncs.append(mf)
                     else:
                         regex, mf = _buildregexmatch(kindpats, globsuffix)
                         matchfuncs.append(mf)
                 if len(matchfuncs) == 1:
                     return regex, matchfuncs[0]
                 else:
                     return regex, lambda f: any(mf(f) for mf in matchfuncs)
             MAX_RE_SIZE = 20000
             def _joinregexes(regexps):
                 """gather multiple regular expressions into a single one"""
                 return b'|'.join(regexps)
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function.
                 Test too large input
                 >>> _buildregexmatch([
                 ...     (b'relglob', b'?' * MAX_RE_SIZE, b'')
                 ... ], b'$')
                 Traceback (most recent call last):
                 ...
                 Abort: matcher pattern is too long (20009 bytes)
                 """
                 try:
                     allgroups = []
                     regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
                     fullregexp = _joinregexes(regexps)
                     startidx = 0
                     groupsize = 0
                     for idx, r in enumerate(regexps):
                         piecesize = len(r)
                         if piecesize > MAX_RE_SIZE:
                             msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
                             raise error.Abort(msg)
                         elif (groupsize + piecesize) > MAX_RE_SIZE:
                             group = regexps[startidx:idx]
                             allgroups.append(_joinregexes(group))
                             startidx = idx
                             groupsize = 0
                         groupsize += piecesize + 1
                     if startidx == 0:
                         matcher = _rematcher(fullregexp)
                         func = lambda s: bool(matcher(s))
                     else:
                         group = regexps[startidx:]
                         allgroups.append(_joinregexes(group))
                         allmatchers = [_rematcher(g) for g in allgroups]
                         func = lambda s: any(m(s) for m in allmatchers)
                     return fullregexp, func
                 except re.error:
                     for k, p, s in kindpats:
                         try:
                             _rematcher(_regex(k, p, globsuffix))
                         except re.error:
                             if s:
                                 raise error.Abort(
                                     _(b"%s: invalid pattern (%s): %s") % (s, k, p)
                                 )
                             else:
                                 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
                     raise error.Abort(_(b"invalid pattern"))
             def _patternrootsanddirs(kindpats):
                 '''Returns roots and directories corresponding to each pattern.
                 This calculates the roots and directories exactly matching the patterns and
                 returns a tuple of (roots, dirs) for each. It does not return other
                 directories which may also need to be considered, like the parent
                 directories.
                 '''
                 r = []
                 d = []
                 for kind, pat, source in kindpats:
                     if kind in (b'glob', b'rootglob'):  # find the non-glob prefix
                         root = []
                         for p in pat.split(b'/'):
                             if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
                                 break
                             root.append(p)
                         r.append(b'/'.join(root))
                     elif kind in (b'relpath', b'path'):
                         if pat == b'.':
                             pat = b''
                         r.append(pat)
                     elif kind in (b'rootfilesin',):
                         if pat == b'.':
                             pat = b''
                         d.append(pat)
                     else:  # relglob, re, relre
                         r.append(b'')
                 return r, d
             def _roots(kindpats):
                 '''Returns root directories to match recursively from the given patterns.'''
                 roots, dirs = _patternrootsanddirs(kindpats)
                 return roots
             def _rootsdirsandparents(kindpats):
                 '''Returns roots and exact directories from patterns.
                 `roots` are directories to match recursively, `dirs` should
                 be matched non-recursively, and `parents` are the implicitly required
                 directories to walk to items in either roots or dirs.
                 Returns a tuple of (roots, dirs, parents).
                 >>> r = _rootsdirsandparents(
                 ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
                 ...      (b'glob', b'g*', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['g/h', 'g/h', ''], []) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 ([], ['g/h', '']) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
                 ...      (b'path', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['r', 'p/p', ''], []) ['', 'p']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
                 ...      (b'relre', b'rr', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['', '', ''], []) ['']
                 '''
                 r, d = _patternrootsanddirs(kindpats)
                 p = set()
                 # Add the parents as non-recursive/exact directories, since they must be
                 # scanned to get to either the roots or the other exact directories.
                 p.update(pathutil.dirs(d))
                 p.update(pathutil.dirs(r))
                 # FIXME: all uses of this function convert these to sets, do so before
                 # returning.
                 # FIXME: all uses of this function do not need anything in 'roots' and
                 # 'dirs' to also be in 'parents', consider removing them before returning.
                 return r, d, p
             def _explicitfiles(kindpats):
                 '''Returns the potential explicit filenames from the patterns.
                 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
                 ['foo/bar']
                 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
                 []
                 '''
                 # Keep only the pattern kinds where one can specify filenames (vs only
                 # directory names).
                 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
                 return _roots(filable)
             def _prefix(kindpats):
                 '''Whether all the patterns match a prefix (i.e. recursively)'''
                 for kind, pat, source in kindpats:
                     if kind not in (b'path', b'relpath'):
                         return False
                 return True
             _commentre = None
             def readpatternfile(filepath, warn, sourceinfo=False):
                 '''parse a pattern file, returning a list of
                 patterns. These patterns should be given to compile()
                 to be validated and converted into a match function.
                 trailing white space is dropped.
                 the escape character is backslash.
                 comments start with #.
                 empty lines are skipped.
                 lines can be of the following formats:
                 syntax: regexp # defaults following lines to non-rooted regexps
                 syntax: glob   # defaults following lines to non-rooted globs
                 re:pattern     # non-rooted regular expression
                 glob:pattern   # non-rooted glob
                 rootglob:pat   # rooted glob (same root as ^ in regexps)
                 pattern        # pattern of the current default type
                 if sourceinfo is set, returns a list of tuples:
                 (pattern, lineno, originalline).
                 This is useful to debug ignore patterns.
                 '''
                 if rustmod is not None:
                     result, warnings = rustmod.read_pattern_file(
                         filepath, bool(warn), sourceinfo,
                     )
                     for warning_params in warnings:
                         # Can't be easily emitted from Rust, because it would require
                         # a mechanism for both gettext and calling the `warn` function.
                         warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
                     return result
                 syntaxes = {
                     b're': b'relre:',
                     b'regexp': b'relre:',
                     b'glob': b'relglob:',
                     b'rootglob': b'rootglob:',
                     b'include': b'include',
                     b'subinclude': b'subinclude',
                 }
                 syntax = b'relre:'
                 patterns = []
                 fp = open(filepath, b'rb')
                 for lineno, line in enumerate(util.iterfile(fp), start=1):
                     if b"#" in line:
                         global _commentre
                         if not _commentre:
                             _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
                         # remove comments prefixed by an even number of escapes
                         m = _commentre.search(line)
                         if m:
                             line = line[: m.end(1)]
                         # fixup properly escaped comments that survived the above
                         line = line.replace(b"\\#", b"#")
                     line = line.rstrip()
                     if not line:
                         continue
                     if line.startswith(b'syntax:'):
                         s = line[7:].strip()
                         try:
                             syntax = syntaxes[s]
                         except KeyError:
                             if warn:
                                 warn(
                                     _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
                                 )
                         continue
                     linesyntax = syntax
                     for s, rels in pycompat.iteritems(syntaxes):
                         if line.startswith(rels):
                             linesyntax = rels
                             line = line[len(rels) :]
                             break
                         elif line.startswith(s + b':'):
                             linesyntax = rels
                             line = line[len(s) + 1 :]
                             break
                     if sourceinfo:
                         patterns.append((linesyntax + line, lineno, line))
                     else:
                         patterns.append(linesyntax + line)
                 fp.close()
                 return patterns