upstream/mercurial-mirror Commit - r52819:70fe33bd

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import annotations

8

from __future__ import annotations

9

10

import bisect

10

import bisect

11

import copy

11

import copy

12

import itertools

12

import itertools

13

import os

13

import os

14

import re

14

import re

15

import typing

16

17

from typing import (

18

Any,

19

Callable,

20

List,

21

Tuple,

22

Union,

23

overload,

24

)

15

25

16

from .i18n import _

26

from .i18n import _

17

from .pycompat import open

27

from .pycompat import open

18

from . import (

28

from . import (

19

encoding,

29

encoding,

20

error,

30

error,

21

pathutil,

31

pathutil,

22

policy,

32

policy,

23

pycompat,

33

pycompat,

24

util,

34

util,

25

)

35

)

26

from .utils import stringutil

36

from .utils import stringutil

27

37

28

rustmod = policy.importrust('dirstate')

38

rustmod = policy.importrust('dirstate')

29

39

30

allpatternkinds = (

40

allpatternkinds = (

31

b're',

41

b're',

32

b'glob',

42

b'glob',

33

b'path',

43

b'path',

34

b'filepath',

44

b'filepath',

35

b'relglob',

45

b'relglob',

36

b'relpath',

46

b'relpath',

37

b'relre',

47

b'relre',

38

b'rootglob',

48

b'rootglob',

39

b'listfile',

49

b'listfile',

40

b'listfile0',

50

b'listfile0',

41

b'set',

51

b'set',

42

b'include',

52

b'include',

43

b'subinclude',

53

b'subinclude',

44

b'rootfilesin',

54

b'rootfilesin',

45

)

55

)

46

cwdrelativepatternkinds = (b'relpath', b'glob')

56

cwdrelativepatternkinds = (b'relpath', b'glob')

47

57

48

propertycache = util.propertycache

58

propertycache = util.propertycache

49

59

50

60

51

def _rematcher(regex):

61

def _rematcher(regex):

52

"""compile the regexp with the best available regexp engine and return a

62

"""compile the regexp with the best available regexp engine and return a

53

matcher function"""

63

matcher function"""

54

m = util.re.compile(regex)

64

m = util.re.compile(regex)

55

try:

65

try:

56

# slightly faster, provided by facebook's re2 bindings

66

# slightly faster, provided by facebook's re2 bindings

57

return m.test_match

67

return m.test_match

58

except AttributeError:

68

except AttributeError:

59

return m.match

69

return m.match

60

70

61

71

62

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

72

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

63

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

73

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

64

matchers = []

74

matchers = []

65

other = []

75

other = []

66

76

67

for kind, pat, source in kindpats:

77

for kind, pat, source in kindpats:

68

if kind == b'set':

78

if kind == b'set':

69

if ctx is None:

79

if ctx is None:

70

raise error.ProgrammingError(

80

raise error.ProgrammingError(

71

b"fileset expression with no context"

81

b"fileset expression with no context"

72

)

82

)

73

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

83

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

74

84

75

if listsubrepos:

85

if listsubrepos:

76

for subpath in ctx.substate:

86

for subpath in ctx.substate:

77

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

87

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

78

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

88

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

79

matchers.append(pm)

89

matchers.append(pm)

80

90

81

continue

91

continue

82

other.append((kind, pat, source))

92

other.append((kind, pat, source))

83

return matchers, other

93

return matchers, other

84

94

85

95

86

def _expandsubinclude(kindpats, root):

96

def _expandsubinclude(kindpats, root):

87

"""Returns the list of subinclude matcher args and the kindpats without the

97

"""Returns the list of subinclude matcher args and the kindpats without the

88

subincludes in it."""

98

subincludes in it."""

89

relmatchers = []

99

relmatchers = []

90

other = []

100

other = []

91

101

92

for kind, pat, source in kindpats:

102

for kind, pat, source in kindpats:

93

if kind == b'subinclude':

103

if kind == b'subinclude':

94

sourceroot = pathutil.dirname(util.normpath(source))

104

sourceroot = pathutil.dirname(util.normpath(source))

95

pat = util.pconvert(pat)

105

pat = util.pconvert(pat)

96

path = pathutil.join(sourceroot, pat)

106

path = pathutil.join(sourceroot, pat)

97

107

98

newroot = pathutil.dirname(path)

108

newroot = pathutil.dirname(path)

99

matcherargs = (newroot, b'', [], [b'include:%s' % path])

109

matcherargs = (newroot, b'', [], [b'include:%s' % path])

100

110

101

prefix = pathutil.canonpath(root, root, newroot)

111

prefix = pathutil.canonpath(root, root, newroot)

102

if prefix:

112

if prefix:

103

prefix += b'/'

113

prefix += b'/'

104

relmatchers.append((prefix, matcherargs))

114

relmatchers.append((prefix, matcherargs))

105

else:

115

else:

106

other.append((kind, pat, source))

116

other.append((kind, pat, source))

107

117

108

return relmatchers, other

118

return relmatchers, other

109

119

110

120

111

def _kindpatsalwaysmatch(kindpats):

121

def _kindpatsalwaysmatch(kindpats):

112

"""Checks whether the kindspats match everything, as e.g.

122

"""Checks whether the kindspats match everything, as e.g.

113

'relpath:.' does.

123

'relpath:.' does.

114

"""

124

"""

115

for kind, pat, source in kindpats:

125

for kind, pat, source in kindpats:

116

if pat != b'' or kind not in [b'relpath', b'glob']:

126

if pat != b'' or kind not in [b'relpath', b'glob']:

117

return False

127

return False

118

return True

128

return True

119

129

120

130

121

def _buildkindpatsmatcher(

131

def _buildkindpatsmatcher(

122

matchercls,

132

matchercls,

123

root,

133

root,

124

cwd,

134

cwd,

125

kindpats,

135

kindpats,

126

ctx=None,

136

ctx=None,

127

listsubrepos=False,

137

listsubrepos=False,

128

badfn=None,

138

badfn=None,

129

):

139

):

130

matchers = []

140

matchers = []

131

fms, kindpats = _expandsets(

141

fms, kindpats = _expandsets(

132

cwd,

142

cwd,

133

kindpats,

143

kindpats,

134

ctx=ctx,

144

ctx=ctx,

135

listsubrepos=listsubrepos,

145

listsubrepos=listsubrepos,

136

badfn=badfn,

146

badfn=badfn,

137

)

147

)

138

if kindpats:

148

if kindpats:

139

m = matchercls(root, kindpats, badfn=badfn)

149

m = matchercls(root, kindpats, badfn=badfn)

140

matchers.append(m)

150

matchers.append(m)

141

if fms:

151

if fms:

142

matchers.extend(fms)

152

matchers.extend(fms)

143

if not matchers:

153

if not matchers:

144

return nevermatcher(badfn=badfn)

154

return nevermatcher(badfn=badfn)

145

if len(matchers) == 1:

155

if len(matchers) == 1:

146

return matchers[0]

156

return matchers[0]

147

return unionmatcher(matchers)

157

return unionmatcher(matchers)

148

158

149

159

150

def match(

160

def match(

151

root,

161

root,

152

cwd,

162

cwd,

153

patterns=None,

163

patterns=None,

154

include=None,

164

include=None,

155

exclude=None,

165

exclude=None,

156

default=b'glob',

166

default=b'glob',

157

auditor=None,

167

auditor=None,

158

ctx=None,

168

ctx=None,

159

listsubrepos=False,

169

listsubrepos=False,

160

warn=None,

170

warn=None,

161

badfn=None,

171

badfn=None,

162

icasefs=False,

172

icasefs=False,

163

):

173

):

164

r"""build an object to match a set of file patterns

174

r"""build an object to match a set of file patterns

165

175

166

arguments:

176

arguments:

167

root - the canonical root of the tree you're matching against

177

root - the canonical root of the tree you're matching against

168

cwd - the current working directory, if relevant

178

cwd - the current working directory, if relevant

169

patterns - patterns to find

179

patterns - patterns to find

170

include - patterns to include (unless they are excluded)

180

include - patterns to include (unless they are excluded)

171

exclude - patterns to exclude (even if they are included)

181

exclude - patterns to exclude (even if they are included)

172

default - if a pattern in patterns has no explicit type, assume this one

182

default - if a pattern in patterns has no explicit type, assume this one

173

auditor - optional path auditor

183

auditor - optional path auditor

174

ctx - optional changecontext

184

ctx - optional changecontext

175

listsubrepos - if True, recurse into subrepositories

185

listsubrepos - if True, recurse into subrepositories

176

warn - optional function used for printing warnings

186

warn - optional function used for printing warnings

177

badfn - optional bad() callback for this matcher instead of the default

187

badfn - optional bad() callback for this matcher instead of the default

178

icasefs - make a matcher for wdir on case insensitive filesystems, which

188

icasefs - make a matcher for wdir on case insensitive filesystems, which

179

normalizes the given patterns to the case in the filesystem

189

normalizes the given patterns to the case in the filesystem

180

190

181

a pattern is one of:

191

a pattern is one of:

182

'glob:<glob>' - a glob relative to cwd

192

'glob:<glob>' - a glob relative to cwd

183

're:<regexp>' - a regular expression

193

're:<regexp>' - a regular expression

184

'path:<path>' - a path relative to repository root, which is matched

194

'path:<path>' - a path relative to repository root, which is matched

185

recursively

195

recursively

186

'filepath:<path>' - an exact path to a single file, relative to the

196

'filepath:<path>' - an exact path to a single file, relative to the

187

repository root

197

repository root

188

'rootfilesin:<path>' - a path relative to repository root, which is

198

'rootfilesin:<path>' - a path relative to repository root, which is

189

matched non-recursively (will not match subdirectories)

199

matched non-recursively (will not match subdirectories)

190

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

200

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

191

'relpath:<path>' - a path relative to cwd

201

'relpath:<path>' - a path relative to cwd

192

'relre:<regexp>' - a regexp that needn't match the start of a name

202

'relre:<regexp>' - a regexp that needn't match the start of a name

193

'set:<fileset>' - a fileset expression

203

'set:<fileset>' - a fileset expression

194

'include:<path>' - a file of patterns to read and include

204

'include:<path>' - a file of patterns to read and include

195

'subinclude:<path>' - a file of patterns to match against files under

205

'subinclude:<path>' - a file of patterns to match against files under

196

the same directory

206

the same directory

197

'<something>' - a pattern of the specified default type

207

'<something>' - a pattern of the specified default type

198

208

199

>>> def _match(root, *args, **kwargs):

209

>>> def _match(root, *args, **kwargs):

200

... return match(util.localpath(root), *args, **kwargs)

210

... return match(util.localpath(root), *args, **kwargs)

201

211

202

Usually a patternmatcher is returned:

212

Usually a patternmatcher is returned:

203

>>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])

213

>>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])

204

214

205

215

206

Combining 'patterns' with 'include' (resp. 'exclude') gives an

216

Combining 'patterns' with 'include' (resp. 'exclude') gives an

207

intersectionmatcher (resp. a differencematcher):

217

intersectionmatcher (resp. a differencematcher):

208

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))

218

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))

209

219

210

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))

220

>>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))

211

221

212

222

213

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

223

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

214

>>> _match(b'/foo', b'.', [])

224

>>> _match(b'/foo', b'.', [])

215

225

216

226

217

The 'default' argument determines which kind of pattern is assumed if a

227

The 'default' argument determines which kind of pattern is assumed if a

218

pattern has no prefix:

228

pattern has no prefix:

219

>>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')

229

>>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')

220

230

221

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

231

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

222

232

223

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

233

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

224

234

225

235

226

The primary use of matchers is to check whether a value (usually a file

236

The primary use of matchers is to check whether a value (usually a file

227

name) matches againset one of the patterns given at initialization. There

237

name) matches againset one of the patterns given at initialization. There

228

are two ways of doing this check.

238

are two ways of doing this check.

229

239

230

>>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])

240

>>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])

231

241

232

1. Calling the matcher with a file name returns True if any pattern

242

1. Calling the matcher with a file name returns True if any pattern

233

matches that file name:

243

matches that file name:

234

>>> m(b'a')

244

>>> m(b'a')

235

True

245

True

236

>>> m(b'main.c')

246

>>> m(b'main.c')

237

True

247

True

238

>>> m(b'test.py')

248

>>> m(b'test.py')

239

False

249

False

240

250

241

2. Using the exact() method only returns True if the file name matches one

251

2. Using the exact() method only returns True if the file name matches one

242

of the exact patterns (i.e. not re: or glob: patterns):

252

of the exact patterns (i.e. not re: or glob: patterns):

243

>>> m.exact(b'a')

253

>>> m.exact(b'a')

244

True

254

True

245

>>> m.exact(b'main.c')

255

>>> m.exact(b'main.c')

246

False

256

False

247

"""

257

"""

248

assert os.path.isabs(root)

258

assert os.path.isabs(root)

249

cwd = os.path.join(root, util.localpath(cwd))

259

cwd = os.path.join(root, util.localpath(cwd))

250

normalize = _donormalize

260

normalize = _donormalize

251

if icasefs:

261

if icasefs:

252

dirstate = ctx.repo().dirstate

262

dirstate = ctx.repo().dirstate

253

dsnormalize = dirstate.normalize

263

dsnormalize = dirstate.normalize

254

264

255

def normalize(patterns, default, root, cwd, auditor, warn):

265

def normalize(patterns, default, root, cwd, auditor, warn):

256

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

266

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

257

kindpats = []

267

kindpats = []

258

for kind, pats, source in kp:

268

for kind, pats, source in kp:

259

if kind not in (b're', b'relre'): # regex can't be normalized

269

if kind not in (b're', b'relre'): # regex can't be normalized

260

p = pats

270

p = pats

261

pats = dsnormalize(pats)

271

pats = dsnormalize(pats)

262

272

263

# Preserve the original to handle a case only rename.

273

# Preserve the original to handle a case only rename.

264

if p != pats and p in dirstate:

274

if p != pats and p in dirstate:

265

kindpats.append((kind, p, source))

275

kindpats.append((kind, p, source))

266

276

267

kindpats.append((kind, pats, source))

277

kindpats.append((kind, pats, source))

268

return kindpats

278

return kindpats

269

279

270

if patterns:

280

if patterns:

271

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

281

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

272

if _kindpatsalwaysmatch(kindpats):

282

if _kindpatsalwaysmatch(kindpats):

273

m = alwaysmatcher(badfn)

283

m = alwaysmatcher(badfn)

274

else:

284

else:

275

m = _buildkindpatsmatcher(

285

m = _buildkindpatsmatcher(

276

patternmatcher,

286

patternmatcher,

277

root,

287

root,

278

cwd,

288

cwd,

279

kindpats,

289

kindpats,

280

ctx=ctx,

290

ctx=ctx,

281

listsubrepos=listsubrepos,

291

listsubrepos=listsubrepos,

282

badfn=badfn,

292

badfn=badfn,

283

)

293

)

284

else:

294

else:

285

# It's a little strange that no patterns means to match everything.

295

# It's a little strange that no patterns means to match everything.

286

# Consider changing this to match nothing (probably using nevermatcher).

296

# Consider changing this to match nothing (probably using nevermatcher).

287

m = alwaysmatcher(badfn)

297

m = alwaysmatcher(badfn)

288

298

289

if include:

299

if include:

290

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

300

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

291

im = _buildkindpatsmatcher(

301

im = _buildkindpatsmatcher(

292

includematcher,

302

includematcher,

293

root,

303

root,

294

cwd,

304

cwd,

295

kindpats,

305

kindpats,

296

ctx=ctx,

306

ctx=ctx,

297

listsubrepos=listsubrepos,

307

listsubrepos=listsubrepos,

298

badfn=None,

308

badfn=None,

299

)

309

)

300

m = intersectmatchers(m, im)

310

m = intersectmatchers(m, im)

301

if exclude:

311

if exclude:

302

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

312

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

303

em = _buildkindpatsmatcher(

313

em = _buildkindpatsmatcher(

304

includematcher,

314

includematcher,

305

root,

315

root,

306

cwd,

316

cwd,

307

kindpats,

317

kindpats,

308

ctx=ctx,

318

ctx=ctx,

309

listsubrepos=listsubrepos,

319

listsubrepos=listsubrepos,

310

badfn=None,

320

badfn=None,

311

)

321

)

312

m = differencematcher(m, em)

322

m = differencematcher(m, em)

313

return m

323

return m

314

324

315

325

316

def exact(files, badfn=None):

326

def exact(files, badfn=None):

317

return exactmatcher(files, badfn=badfn)

327

return exactmatcher(files, badfn=badfn)

318

328

319

329

320

def always(badfn=None):

330

def always(badfn=None):

321

return alwaysmatcher(badfn)

331

return alwaysmatcher(badfn)

322

332

323

333

324

def never(badfn=None):

334

def never(badfn=None):

325

return nevermatcher(badfn)

335

return nevermatcher(badfn)

326

336

327

337

328

def badmatch(match, badfn):

338

def badmatch(match, badfn):

329

"""Make a copy of the given matcher, replacing its bad method with the given

339

"""Make a copy of the given matcher, replacing its bad method with the given

330

one.

340

one.

331

"""

341

"""

332

m = copy.copy(match)

342

m = copy.copy(match)

333

m.bad = badfn

343

m.bad = badfn

334

return m

344

return m

335

345

336

346

337

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

347

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

338

"""Convert 'kind:pat' from the patterns list to tuples with kind and

348

"""Convert 'kind:pat' from the patterns list to tuples with kind and

339

normalized and rooted patterns and with listfiles expanded."""

349

normalized and rooted patterns and with listfiles expanded."""

340

kindpats = []

350

kindpats = []

341

kinds_to_normalize = (

351

kinds_to_normalize = (

342

b'relglob',

352

b'relglob',

343

b'path',

353

b'path',

344

b'filepath',

354

b'filepath',

345

b'rootfilesin',

355

b'rootfilesin',

346

b'rootglob',

356

b'rootglob',

347

)

357

)

348

358

349

for kind, pat in [_patsplit(p, default) for p in patterns]:

359

for kind, pat in [_patsplit(p, default) for p in patterns]:

350

if kind in cwdrelativepatternkinds:

360

if kind in cwdrelativepatternkinds:

351

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

361

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

352

elif kind in kinds_to_normalize:

362

elif kind in kinds_to_normalize:

353

pat = util.normpath(pat)

363

pat = util.normpath(pat)

354

elif kind in (b'listfile', b'listfile0'):

364

elif kind in (b'listfile', b'listfile0'):

355

try:

365

try:

356

files = util.readfile(pat)

366

files = util.readfile(pat)

357

if kind == b'listfile0':

367

if kind == b'listfile0':

358

files = files.split(b'\0')

368

files = files.split(b'\0')

359

else:

369

else:

360

files = files.splitlines()

370

files = files.splitlines()

361

files = [f for f in files if f]

371

files = [f for f in files if f]

362

except EnvironmentError:

372

except EnvironmentError:

363

raise error.Abort(_(b"unable to read file list (%s)") % pat)

373

raise error.Abort(_(b"unable to read file list (%s)") % pat)

364

for k, p, source in _donormalize(

374

for k, p, source in _donormalize(

365

files, default, root, cwd, auditor, warn

375

files, default, root, cwd, auditor, warn

366

):

376

):

367

kindpats.append((k, p, pat))

377

kindpats.append((k, p, pat))

368

continue

378

continue

369

elif kind == b'include':

379

elif kind == b'include':

370

try:

380

try:

371

fullpath = os.path.join(root, util.localpath(pat))

381

fullpath = os.path.join(root, util.localpath(pat))

372

includepats = readpatternfile(fullpath, warn)

382

includepats = readpatternfile(fullpath, warn)

373

for k, p, source in _donormalize(

383

for k, p, source in _donormalize(

374

includepats, default, root, cwd, auditor, warn

384

includepats, default, root, cwd, auditor, warn

375

):

385

):

376

kindpats.append((k, p, source or pat))

386

kindpats.append((k, p, source or pat))

377

except error.Abort as inst:

387

except error.Abort as inst:

378

raise error.Abort(

388

raise error.Abort(

379

b'%s: %s'

389

b'%s: %s'

380

% (

390

% (

381

pat,

391

pat,

382

inst.message,

392

inst.message,

383

)

393

)

384

)

394

)

385

except IOError as inst:

395

except IOError as inst:

386

if warn:

396

if warn:

387

warn(

397

warn(

388

_(b"skipping unreadable pattern file '%s': %s\n")

398

_(b"skipping unreadable pattern file '%s': %s\n")

389

% (pat, stringutil.forcebytestr(inst.strerror))

399

% (pat, stringutil.forcebytestr(inst.strerror))

390

)

400

)

391

continue

401

continue

392

# else: re or relre - which cannot be normalized

402

# else: re or relre - which cannot be normalized

393

kindpats.append((kind, pat, b''))

403

kindpats.append((kind, pat, b''))

394

return kindpats

404

return kindpats

395

405

396

406

397

class basematcher:

407

class basematcher:

398

def __init__(self, badfn=None):

408

def __init__(self, badfn=None):

399

self._was_tampered_with = False

409

self._was_tampered_with = False

400

if badfn is not None:

410

if badfn is not None:

401

self.bad = badfn

411

self.bad = badfn

402

412

403

def was_tampered_with_nonrec(self) -> bool:

413

def was_tampered_with_nonrec(self) -> bool:

404

# [_was_tampered_with] is used to track if when extensions changed the matcher

414

# [_was_tampered_with] is used to track if when extensions changed the matcher

405

# behavior (crazy stuff!), so we disable the rust fast path.

415

# behavior (crazy stuff!), so we disable the rust fast path.

406

return self._was_tampered_with

416

return self._was_tampered_with

407

417

408

def was_tampered_with(self) -> bool:

418

def was_tampered_with(self) -> bool:

409

return self.was_tampered_with_nonrec()

419

return self.was_tampered_with_nonrec()

410

420

411

def __call__(self, fn):

421

def __call__(self, fn):

412

return self.matchfn(fn)

422

return self.matchfn(fn)

413

423

414

# Callbacks related to how the matcher is used by dirstate.walk.

424

# Callbacks related to how the matcher is used by dirstate.walk.

415

# Subscribers to these events must monkeypatch the matcher object.

425

# Subscribers to these events must monkeypatch the matcher object.

416

def bad(self, f, msg):

426

def bad(self, f, msg):

417

"""Callback from dirstate.walk for each explicit file that can't be

427

"""Callback from dirstate.walk for each explicit file that can't be

418

found/accessed, with an error message."""

428

found/accessed, with an error message."""

419

429

420

# If an traversedir is set, it will be called when a directory discovered

430

# If an traversedir is set, it will be called when a directory discovered

421

# by recursive traversal is visited.

431

# by recursive traversal is visited.

422

traversedir = None

432

traversedir = None

423

433

424

@propertycache

434

@propertycache

425

def _files(self):

435

def _files(self):

426

return []

436

return []

427

437

428

def files(self):

438

def files(self):

429

"""Explicitly listed files or patterns or roots:

439

"""Explicitly listed files or patterns or roots:

430

if no patterns or .always(): empty list,

440

if no patterns or .always(): empty list,

431

if exact: list exact files,

441

if exact: list exact files,

432

if not .anypats(): list all files and dirs,

442

if not .anypats(): list all files and dirs,

433

else: optimal roots"""

443

else: optimal roots"""

434

return self._files

444

return self._files

435

445

436

@propertycache

446

@propertycache

437

def _fileset(self):

447

def _fileset(self):

438

return set(self._files)

448

return set(self._files)

439

449

440

def exact(self, f):

450

def exact(self, f):

441

'''Returns True if f is in .files().'''

451

'''Returns True if f is in .files().'''

442

return f in self._fileset

452

return f in self._fileset

443

453

444

def matchfn(self, f):

454

def matchfn(self, f):

445

return False

455

return False

446

456

447

def visitdir(self, dir):

457

def visitdir(self, dir):

448

"""Decides whether a directory should be visited based on whether it

458

"""Decides whether a directory should be visited based on whether it

449

has potential matches in it or one of its subdirectories. This is

459

has potential matches in it or one of its subdirectories. This is

450

based on the match's primary, included, and excluded patterns.

460

based on the match's primary, included, and excluded patterns.

451

461

452

Returns the string 'all' if the given directory and all subdirectories

462

Returns the string 'all' if the given directory and all subdirectories

453

should be visited. Otherwise returns True or False indicating whether

463

should be visited. Otherwise returns True or False indicating whether

454

the given directory should be visited.

464

the given directory should be visited.

455

"""

465

"""

456

return True

466

return True

457

467

458

def visitchildrenset(self, dir):

468

def visitchildrenset(self, dir):

459

"""Decides whether a directory should be visited based on whether it

469

"""Decides whether a directory should be visited based on whether it

460

has potential matches in it or one of its subdirectories, and

470

has potential matches in it or one of its subdirectories, and

461

potentially lists which subdirectories of that directory should be

471

potentially lists which subdirectories of that directory should be

462

visited. This is based on the match's primary, included, and excluded

472

visited. This is based on the match's primary, included, and excluded

463

patterns.

473

patterns.

464

474

465

This function is very similar to 'visitdir', and the following mapping

475

This function is very similar to 'visitdir', and the following mapping

466

can be applied:

476

can be applied:

467

477

468

visitdir | visitchildrenlist

478

visitdir | visitchildrenlist

469

----------+-------------------

479

----------+-------------------

470

False | set()

480

False | set()

471

'all' | 'all'

481

'all' | 'all'

472

True | 'this' OR non-empty set of subdirs -or files- to visit

482

True | 'this' OR non-empty set of subdirs -or files- to visit

473

483

474

Example:

484

Example:

475

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

485

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

476

the following values (assuming the implementation of visitchildrenset

486

the following values (assuming the implementation of visitchildrenset

477

is capable of recognizing this; some implementations are not).

487

is capable of recognizing this; some implementations are not).

478

488

479

'' -> {'foo', 'qux'}

489

'' -> {'foo', 'qux'}

480

'baz' -> set()

490

'baz' -> set()

481

'foo' -> {'bar'}

491

'foo' -> {'bar'}

482

# Ideally this would be 'all', but since the prefix nature of matchers

492

# Ideally this would be 'all', but since the prefix nature of matchers

483

# is applied to the entire matcher, we have to downgrade this to

493

# is applied to the entire matcher, we have to downgrade this to

484

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

494

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

485

# in.

495

# in.

486

'foo/bar' -> 'this'

496

'foo/bar' -> 'this'

487

'qux' -> 'this'

497

'qux' -> 'this'

488

498

489

Important:

499

Important:

490

Most matchers do not know if they're representing files or

500

Most matchers do not know if they're representing files or

491

directories. They see ['path:dir/f'] and don't know whether 'f' is a

501

directories. They see ['path:dir/f'] and don't know whether 'f' is a

492

file or a directory, so visitchildrenset('dir') for most matchers will

502

file or a directory, so visitchildrenset('dir') for most matchers will

493

return {'f'}, but if the matcher knows it's a file (like exactmatcher

503

return {'f'}, but if the matcher knows it's a file (like exactmatcher

494

does), it may return 'this'. Do not rely on the return being a set

504

does), it may return 'this'. Do not rely on the return being a set

495

indicating that there are no files in this dir to investigate (or

505

indicating that there are no files in this dir to investigate (or

496

equivalently that if there are files to investigate in 'dir' that it

506

equivalently that if there are files to investigate in 'dir' that it

497

will always return 'this').

507

will always return 'this').

498

"""

508

"""

499

return b'this'

509

return b'this'

500

510

501

def always(self):

511

def always(self):

502

"""Matcher will match everything and .files() will be empty --

512

"""Matcher will match everything and .files() will be empty --

503

optimization might be possible."""

513

optimization might be possible."""

504

return False

514

return False

505

515

506

def isexact(self):

516

def isexact(self):

507

"""Matcher will match exactly the list of files in .files() --

517

"""Matcher will match exactly the list of files in .files() --

508

optimization might be possible."""

518

optimization might be possible."""

509

return False

519

return False

510

520

511

def prefix(self):

521

def prefix(self):

512

"""Matcher will match the paths in .files() recursively --

522

"""Matcher will match the paths in .files() recursively --

513

optimization might be possible."""

523

optimization might be possible."""

514

return False

524

return False

515

525

516

def anypats(self):

526

def anypats(self):

517

"""None of .always(), .isexact(), and .prefix() is true --

527

"""None of .always(), .isexact(), and .prefix() is true --

518

optimizations will be difficult."""

528

optimizations will be difficult."""

519

return not self.always() and not self.isexact() and not self.prefix()

529

return not self.always() and not self.isexact() and not self.prefix()

520

530

521

531

522

class alwaysmatcher(basematcher):

532

class alwaysmatcher(basematcher):

523

'''Matches everything.'''

533

'''Matches everything.'''

524

534

525

def __init__(self, badfn=None):

535

def __init__(self, badfn=None):

526

super(alwaysmatcher, self).__init__(badfn)

536

super(alwaysmatcher, self).__init__(badfn)

527

537

528

def always(self):

538

def always(self):

529

return True

539

return True

530

540

531

def matchfn(self, f):

541

def matchfn(self, f):

532

return True

542

return True

533

543

534

def visitdir(self, dir):

544

def visitdir(self, dir):

535

return b'all'

545

return b'all'

536

546

537

def visitchildrenset(self, dir):

547

def visitchildrenset(self, dir):

538

return b'all'

548

return b'all'

539

549

540

def __repr__(self):

550

def __repr__(self):

541

return r'<alwaysmatcher>'

551

return r'<alwaysmatcher>'

542

552

543

553

544

class nevermatcher(basematcher):

554

class nevermatcher(basematcher):

545

'''Matches nothing.'''

555

'''Matches nothing.'''

546

556

547

def __init__(self, badfn=None):

557

def __init__(self, badfn=None):

548

super(nevermatcher, self).__init__(badfn)

558

super(nevermatcher, self).__init__(badfn)

549

559

550

# It's a little weird to say that the nevermatcher is an exact matcher

560

# It's a little weird to say that the nevermatcher is an exact matcher

551

# or a prefix matcher, but it seems to make sense to let callers take

561

# or a prefix matcher, but it seems to make sense to let callers take

552

# fast paths based on either. There will be no exact matches, nor any

562

# fast paths based on either. There will be no exact matches, nor any

553

# prefixes (files() returns []), so fast paths iterating over them should

563

# prefixes (files() returns []), so fast paths iterating over them should

554

# be efficient (and correct).

564

# be efficient (and correct).

555

def isexact(self):

565

def isexact(self):

556

return True

566

return True

557

567

558

def prefix(self):

568

def prefix(self):

559

return True

569

return True

560

570

561

def visitdir(self, dir):

571

def visitdir(self, dir):

562

return False

572

return False

563

573

564

def visitchildrenset(self, dir):

574

def visitchildrenset(self, dir):

565

return set()

575

return set()

566

576

567

def __repr__(self):

577

def __repr__(self):

568

return r'<nevermatcher>'

578

return r'<nevermatcher>'

569

579

570

580

571

class predicatematcher(basematcher):

581

class predicatematcher(basematcher):

572

"""A matcher adapter for a simple boolean function"""

582

"""A matcher adapter for a simple boolean function"""

573

583

574

def __init__(self, predfn, predrepr=None, badfn=None):

584

def __init__(self, predfn, predrepr=None, badfn=None):

575

super(predicatematcher, self).__init__(badfn)

585

super(predicatematcher, self).__init__(badfn)

576

self.matchfn = predfn

586

self.matchfn = predfn

577

self._predrepr = predrepr

587

self._predrepr = predrepr

578

588

579

@encoding.strmethod

589

@encoding.strmethod

580

def __repr__(self):

590

def __repr__(self):

581

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

591

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

582

self.matchfn

592

self.matchfn

583

)

593

)

584

return b'<predicatenmatcher pred=%s>' % s

594

return b'<predicatenmatcher pred=%s>' % s

585

595

586

596

587

def path_or_parents_in_set(path, prefix_set):

597

def path_or_parents_in_set(path, prefix_set):

588

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

598

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

589

l = len(prefix_set)

599

l = len(prefix_set)

590

if l == 0:

600

if l == 0:

591

return False

601

return False

592

if path in prefix_set:

602

if path in prefix_set:

593

return True

603

return True

594

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

604

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

595

# "walk up" the directory hierarchy instead, with the assumption that most

605

# "walk up" the directory hierarchy instead, with the assumption that most

596

# directory hierarchies are relatively shallow and hash lookup is cheap.

606

# directory hierarchies are relatively shallow and hash lookup is cheap.

597

if l > 5:

607

if l > 5:

598

return any(

608

return any(

599

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

609

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

600

)

610

)

601

611

602

# FIXME: Ideally we'd never get to this point if this is the case - we'd

612

# FIXME: Ideally we'd never get to this point if this is the case - we'd

603

# recognize ourselves as an 'always' matcher and skip this.

613

# recognize ourselves as an 'always' matcher and skip this.

604

if b'' in prefix_set:

614

if b'' in prefix_set:

605

return True

615

return True

606

616

607

sl = ord(b'/')

617

sl = ord(b'/')

608

618

609

# We already checked that path isn't in prefix_set exactly, so

619

# We already checked that path isn't in prefix_set exactly, so

610

# `path[len(pf)] should never raise IndexError.

620

# `path[len(pf)] should never raise IndexError.

611

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

621

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

612

622

613

623

614

class patternmatcher(basematcher):

624

class patternmatcher(basematcher):

615

r"""Matches a set of (kind, pat, source) against a 'root' directory.

625

r"""Matches a set of (kind, pat, source) against a 'root' directory.

616

626

617

>>> kindpats = [

627

>>> kindpats = [

618

... (b're', br'.*\.c$', b''),

628

... (b're', br'.*\.c$', b''),

619

... (b'path', b'foo/a', b''),

629

... (b'path', b'foo/a', b''),

620

... (b'relpath', b'b', b''),

630

... (b'relpath', b'b', b''),

621

... (b'glob', b'*.h', b''),

631

... (b'glob', b'*.h', b''),

622

... ]

632

... ]

623

>>> m = patternmatcher(b'foo', kindpats)

633

>>> m = patternmatcher(b'foo', kindpats)

624

>>> m(b'main.c') # matches re:.*\.c$

634

>>> m(b'main.c') # matches re:.*\.c$

625

True

635

True

626

>>> m(b'b.txt')

636

>>> m(b'b.txt')

627

False

637

False

628

>>> m(b'foo/a') # matches path:foo/a

638

>>> m(b'foo/a') # matches path:foo/a

629

True

639

True

630

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

640

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

631

False

641

False

632

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

642

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

633

True

643

True

634

>>> m(b'lib.h') # matches glob:*.h

644

>>> m(b'lib.h') # matches glob:*.h

635

True

645

True

636

646

637

>>> m.files()

647

>>> m.files()

638

[b'', b'foo/a', b'', b'b']

648

[b'', b'foo/a', b'', b'b']

639

>>> m.exact(b'foo/a')

649

>>> m.exact(b'foo/a')

640

True

650

True

641

>>> m.exact(b'b')

651

>>> m.exact(b'b')

642

True

652

True

643

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

653

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

644

False

654

False

645

"""

655

"""

646

656

647

def __init__(self, root, kindpats, badfn=None):

657

def __init__(self, root, kindpats, badfn=None):

648

super(patternmatcher, self).__init__(badfn)

658

super(patternmatcher, self).__init__(badfn)

649

kindpats.sort()

659

kindpats.sort()

650

660

651

if rustmod is not None:

661

if rustmod is not None:

652

# We need to pass the patterns to Rust because they can contain

662

# We need to pass the patterns to Rust because they can contain

653

# patterns from the user interface

663

# patterns from the user interface

654

self._kindpats = kindpats

664

self._kindpats = kindpats

655

665

656

roots, dirs, parents = _rootsdirsandparents(kindpats)

666

roots, dirs, parents = _rootsdirsandparents(kindpats)

657

self._files = _explicitfiles(kindpats)

667

self._files = _explicitfiles(kindpats)

658

self._dirs_explicit = set(dirs)

668

self._dirs_explicit = set(dirs)

659

self._dirs = parents

669

self._dirs = parents

660

self._prefix = _prefix(kindpats)

670

self._prefix = _prefix(kindpats)

661

self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)

671

self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)

662

672

663

def matchfn(self, fn):

673

def matchfn(self, fn):

664

if fn in self._fileset:

674

if fn in self._fileset:

665

return True

675

return True

666

return self._matchfn(fn)

676

return self._matchfn(fn)

667

677

668

def visitdir(self, dir):

678

def visitdir(self, dir):

669

if self._prefix and dir in self._fileset:

679

if self._prefix and dir in self._fileset:

670

return b'all'

680

return b'all'

671

return (

681

return (

672

dir in self._dirs

682

dir in self._dirs

673

or path_or_parents_in_set(dir, self._fileset)

683

or path_or_parents_in_set(dir, self._fileset)

674

or path_or_parents_in_set(dir, self._dirs_explicit)

684

or path_or_parents_in_set(dir, self._dirs_explicit)

675

)

685

)

676

686

677

def visitchildrenset(self, dir):

687

def visitchildrenset(self, dir):

678

ret = self.visitdir(dir)

688

ret = self.visitdir(dir)

679

if ret is True:

689

if ret is True:

680

return b'this'

690

return b'this'

681

elif not ret:

691

elif not ret:

682

return set()

692

return set()

683

assert ret == b'all'

693

assert ret == b'all'

684

return b'all'

694

return b'all'

685

695

686

def prefix(self):

696

def prefix(self):

687

return self._prefix

697

return self._prefix

688

698

689

@encoding.strmethod

699

@encoding.strmethod

690

def __repr__(self):

700

def __repr__(self):

691

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

701

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

692

702

693

703

694

# This is basically a reimplementation of pathutil.dirs that stores the

704

# This is basically a reimplementation of pathutil.dirs that stores the

695

# children instead of just a count of them, plus a small optional optimization

705

# children instead of just a count of them, plus a small optional optimization

696

# to avoid some directories we don't need.

706

# to avoid some directories we don't need.

697

class _dirchildren:

707

class _dirchildren:

698

def __init__(self, paths, onlyinclude=None):

708

def __init__(self, paths, onlyinclude=None):

699

self._dirs = {}

709

self._dirs = {}

700

self._onlyinclude = onlyinclude or []

710

self._onlyinclude = onlyinclude or []

701

addpath = self.addpath

711

addpath = self.addpath

702

for f in paths:

712

for f in paths:

703

addpath(f)

713

addpath(f)

704

714

705

def addpath(self, path):

715

def addpath(self, path):

706

if path == b'':

716

if path == b'':

707

return

717

return

708

dirs = self._dirs

718

dirs = self._dirs

709

findsplitdirs = _dirchildren._findsplitdirs

719

findsplitdirs = _dirchildren._findsplitdirs

710

for d, b in findsplitdirs(path):

720

for d, b in findsplitdirs(path):

711

if d not in self._onlyinclude:

721

if d not in self._onlyinclude:

712

continue

722

continue

713

dirs.setdefault(d, set()).add(b)

723

dirs.setdefault(d, set()).add(b)

714

724

715

@staticmethod

725

@staticmethod

716

def _findsplitdirs(path):

726

def _findsplitdirs(path):

717

# yields (dirname, basename) tuples, walking back to the root. This is

727

# yields (dirname, basename) tuples, walking back to the root. This is

718

# very similar to pathutil.finddirs, except:

728

# very similar to pathutil.finddirs, except:

719

# - produces a (dirname, basename) tuple, not just 'dirname'

729

# - produces a (dirname, basename) tuple, not just 'dirname'

720

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

730

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

721

# slash.

731

# slash.

722

oldpos = len(path)

732

oldpos = len(path)

723

pos = path.rfind(b'/')

733

pos = path.rfind(b'/')

724

while pos != -1:

734

while pos != -1:

725

yield path[:pos], path[pos + 1 : oldpos]

735

yield path[:pos], path[pos + 1 : oldpos]

726

oldpos = pos

736

oldpos = pos

727

pos = path.rfind(b'/', 0, pos)

737

pos = path.rfind(b'/', 0, pos)

728

yield b'', path[:oldpos]

738

yield b'', path[:oldpos]

729

739

730

def get(self, path):

740

def get(self, path):

731

return self._dirs.get(path, set())

741

return self._dirs.get(path, set())

732

742

733

743

734

class includematcher(basematcher):

744

class includematcher(basematcher):

735

def __init__(self, root, kindpats, badfn=None):

745

def __init__(self, root, kindpats, badfn=None):

736

super(includematcher, self).__init__(badfn)

746

super(includematcher, self).__init__(badfn)

737

if rustmod is not None:

747

if rustmod is not None:

738

# We need to pass the patterns to Rust because they can contain

748

# We need to pass the patterns to Rust because they can contain

739

# patterns from the user interface

749

# patterns from the user interface

740

self._kindpats = kindpats

750

self._kindpats = kindpats

741

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

751

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

742

self._prefix = _prefix(kindpats)

752

self._prefix = _prefix(kindpats)

743

roots, dirs, parents = _rootsdirsandparents(kindpats)

753

roots, dirs, parents = _rootsdirsandparents(kindpats)

744

# roots are directories which are recursively included.

754

# roots are directories which are recursively included.

745

self._roots = set(roots)

755

self._roots = set(roots)

746

# dirs are directories which are non-recursively included.

756

# dirs are directories which are non-recursively included.

747

self._dirs = set(dirs)

757

self._dirs = set(dirs)

748

# parents are directories which are non-recursively included because

758

# parents are directories which are non-recursively included because

749

# they are needed to get to items in _dirs or _roots.

759

# they are needed to get to items in _dirs or _roots.

750

self._parents = parents

760

self._parents = parents

751

761

752

def visitdir(self, dir):

762

def visitdir(self, dir):

753

if self._prefix and dir in self._roots:

763

if self._prefix and dir in self._roots:

754

return b'all'

764

return b'all'

755

return (

765

return (

756

dir in self._dirs

766

dir in self._dirs

757

or dir in self._parents

767

or dir in self._parents

758

or path_or_parents_in_set(dir, self._roots)

768

or path_or_parents_in_set(dir, self._roots)

759

)

769

)

760

770

761

@propertycache

771

@propertycache

762

def _allparentschildren(self):

772

def _allparentschildren(self):

763

# It may seem odd that we add dirs, roots, and parents, and then

773

# It may seem odd that we add dirs, roots, and parents, and then

764

# restrict to only parents. This is to catch the case of:

774

# restrict to only parents. This is to catch the case of:

765

# dirs = ['foo/bar']

775

# dirs = ['foo/bar']

766

# parents = ['foo']

776

# parents = ['foo']

767

# if we asked for the children of 'foo', but had only added

777

# if we asked for the children of 'foo', but had only added

768

# self._parents, we wouldn't be able to respond ['bar'].

778

# self._parents, we wouldn't be able to respond ['bar'].

769

return _dirchildren(

779

return _dirchildren(

770

itertools.chain(self._dirs, self._roots, self._parents),

780

itertools.chain(self._dirs, self._roots, self._parents),

771

onlyinclude=self._parents,

781

onlyinclude=self._parents,

772

)

782

)

773

783

774

def visitchildrenset(self, dir):

784

def visitchildrenset(self, dir):

775

if self._prefix and dir in self._roots:

785

if self._prefix and dir in self._roots:

776

return b'all'

786

return b'all'

777

# Note: this does *not* include the 'dir in self._parents' case from

787

# Note: this does *not* include the 'dir in self._parents' case from

778

# visitdir, that's handled below.

788

# visitdir, that's handled below.

779

if (

789

if (

780

b'' in self._roots

790

b'' in self._roots

781

or dir in self._dirs

791

or dir in self._dirs

782

or path_or_parents_in_set(dir, self._roots)

792

or path_or_parents_in_set(dir, self._roots)

783

):

793

):

784

return b'this'

794

return b'this'

785

795

786

if dir in self._parents:

796

if dir in self._parents:

787

return self._allparentschildren.get(dir) or set()

797

return self._allparentschildren.get(dir) or set()

788

return set()

798

return set()

789

799

790

@encoding.strmethod

800

@encoding.strmethod

791

def __repr__(self):

801

def __repr__(self):

792

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

802

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

793

803

794

804

795

class exactmatcher(basematcher):

805

class exactmatcher(basematcher):

796

r"""Matches the input files exactly. They are interpreted as paths, not

806

r"""Matches the input files exactly. They are interpreted as paths, not

797

patterns (so no kind-prefixes).

807

patterns (so no kind-prefixes).

798

808

799

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

809

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

800

>>> m(b'a.txt')

810

>>> m(b'a.txt')

801

True

811

True

802

>>> m(b'b.txt')

812

>>> m(b'b.txt')

803

False

813

False

804

814

805

Input files that would be matched are exactly those returned by .files()

815

Input files that would be matched are exactly those returned by .files()

806

>>> m.files()

816

>>> m.files()

807

['a.txt', 're:.*\\.c$']

817

['a.txt', 're:.*\\.c$']

808

818

809

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

819

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

810

>>> m(b'main.c')

820

>>> m(b'main.c')

811

False

821

False

812

>>> m(br're:.*\.c$')

822

>>> m(br're:.*\.c$')

813

True

823

True

814

"""

824

"""

815

825

816

def __init__(self, files, badfn=None):

826

def __init__(self, files, badfn=None):

817

super(exactmatcher, self).__init__(badfn)

827

super(exactmatcher, self).__init__(badfn)

818

828

819

if isinstance(files, list):

829

if isinstance(files, list):

820

self._files = files

830

self._files = files

821

else:

831

else:

822

self._files = list(files)

832

self._files = list(files)

823

833

824

matchfn = basematcher.exact

834

matchfn = basematcher.exact

825

835

826

@propertycache

836

@propertycache

827

def _dirs(self):

837

def _dirs(self):

828

return set(pathutil.dirs(self._fileset))

838

return set(pathutil.dirs(self._fileset))

829

839

830

def visitdir(self, dir):

840

def visitdir(self, dir):

831

return dir in self._dirs

841

return dir in self._dirs

832

842

833

@propertycache

843

@propertycache

834

def _visitchildrenset_candidates(self):

844

def _visitchildrenset_candidates(self):

835

"""A memoized set of candidates for visitchildrenset."""

845

"""A memoized set of candidates for visitchildrenset."""

836

return self._fileset | self._dirs - {b''}

846

return self._fileset | self._dirs - {b''}

837

847

838

@propertycache

848

@propertycache

839

def _sorted_visitchildrenset_candidates(self):

849

def _sorted_visitchildrenset_candidates(self):

840

"""A memoized sorted list of candidates for visitchildrenset."""

850

"""A memoized sorted list of candidates for visitchildrenset."""

841

return sorted(self._visitchildrenset_candidates)

851

return sorted(self._visitchildrenset_candidates)

842

852

843

def visitchildrenset(self, dir):

853

def visitchildrenset(self, dir):

844

if not self._fileset or dir not in self._dirs:

854

if not self._fileset or dir not in self._dirs:

845

return set()

855

return set()

846

856

847

if dir == b'':

857

if dir == b'':

848

candidates = self._visitchildrenset_candidates

858

candidates = self._visitchildrenset_candidates

849

else:

859

else:

850

candidates = self._sorted_visitchildrenset_candidates

860

candidates = self._sorted_visitchildrenset_candidates

851

d = dir + b'/'

861

d = dir + b'/'

852

# Use bisect to find the first element potentially starting with d

862

# Use bisect to find the first element potentially starting with d

853

# (i.e. >= d). This should always find at least one element (we'll

863

# (i.e. >= d). This should always find at least one element (we'll

854

# assert later if this is not the case).

864

# assert later if this is not the case).

855

first = bisect.bisect_left(candidates, d)

865

first = bisect.bisect_left(candidates, d)

856

# We need a representation of the first element that is > d that

866

# We need a representation of the first element that is > d that

857

# does not start with d, so since we added a `/` on the end of dir,

867

# does not start with d, so since we added a `/` on the end of dir,

858

# we'll add whatever comes after slash (we could probably assume

868

# we'll add whatever comes after slash (we could probably assume

859

# that `0` is after `/`, but let's not) to the end of dir instead.

869

# that `0` is after `/`, but let's not) to the end of dir instead.

860

dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))

870

dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))

861

# Use bisect to find the first element >= d_next

871

# Use bisect to find the first element >= d_next

862

last = bisect.bisect_left(candidates, dnext, lo=first)

872

last = bisect.bisect_left(candidates, dnext, lo=first)

863

dlen = len(d)

873

dlen = len(d)

864

candidates = {c[dlen:] for c in candidates[first:last]}

874

candidates = {c[dlen:] for c in candidates[first:last]}

865

# self._dirs includes all of the directories, recursively, so if

875

# self._dirs includes all of the directories, recursively, so if

866

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

876

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

867

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

877

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

868

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

878

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

869

# immediate subdir will be in there without a slash.

879

# immediate subdir will be in there without a slash.

870

ret = {c for c in candidates if b'/' not in c}

880

ret = {c for c in candidates if b'/' not in c}

871

# We really do not expect ret to be empty, since that would imply that

881

# We really do not expect ret to be empty, since that would imply that

872

# there's something in _dirs that didn't have a file in _fileset.

882

# there's something in _dirs that didn't have a file in _fileset.

873

assert ret

883

assert ret

874

return ret

884

return ret

875

885

876

def isexact(self):

886

def isexact(self):

877

return True

887

return True

878

888

879

@encoding.strmethod

889

@encoding.strmethod

880

def __repr__(self):

890

def __repr__(self):

881

return b'<exactmatcher files=%r>' % self._files

891

return b'<exactmatcher files=%r>' % self._files

882

892

883

893

884

class differencematcher(basematcher):

894

class differencematcher(basematcher):

885

"""Composes two matchers by matching if the first matches and the second

895

"""Composes two matchers by matching if the first matches and the second

886

does not.

896

does not.

887

897

888

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

898

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

889

"""

899

"""

890

900

891

def __init__(self, m1, m2):

901

def __init__(self, m1, m2):

892

super(differencematcher, self).__init__()

902

super(differencematcher, self).__init__()

893

self._m1 = m1

903

self._m1 = m1

894

self._m2 = m2

904

self._m2 = m2

895

self.bad = m1.bad

905

self.bad = m1.bad

896

self.traversedir = m1.traversedir

906

self.traversedir = m1.traversedir

897

907

898

def was_tampered_with(self) -> bool:

908

def was_tampered_with(self) -> bool:

899

return (

909

return (

900

self.was_tampered_with_nonrec()

910

self.was_tampered_with_nonrec()

901

or self._m1.was_tampered_with()

911

or self._m1.was_tampered_with()

902

or self._m2.was_tampered_with()

912

or self._m2.was_tampered_with()

903

)

913

)

904

914

905

def matchfn(self, f):

915

def matchfn(self, f):

906

return self._m1(f) and not self._m2(f)

916

return self._m1(f) and not self._m2(f)

907

917

908

@propertycache

918

@propertycache

909

def _files(self):

919

def _files(self):

910

if self.isexact():

920

if self.isexact():

911

return [f for f in self._m1.files() if self(f)]

921

return [f for f in self._m1.files() if self(f)]

912

# If m1 is not an exact matcher, we can't easily figure out the set of

922

# If m1 is not an exact matcher, we can't easily figure out the set of

913

# files, because its files() are not always files. For example, if

923

# files, because its files() are not always files. For example, if

914

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

924

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

915

# want to remove "dir" from the set even though it would match m2,

925

# want to remove "dir" from the set even though it would match m2,

916

# because the "dir" in m1 may not be a file.

926

# because the "dir" in m1 may not be a file.

917

return self._m1.files()

927

return self._m1.files()

918

928

919

def visitdir(self, dir):

929

def visitdir(self, dir):

920

if self._m2.visitdir(dir) == b'all':

930

if self._m2.visitdir(dir) == b'all':

921

return False

931

return False

922

elif not self._m2.visitdir(dir):

932

elif not self._m2.visitdir(dir):

923

# m2 does not match dir, we can return 'all' here if possible

933

# m2 does not match dir, we can return 'all' here if possible

924

return self._m1.visitdir(dir)

934

return self._m1.visitdir(dir)

925

return bool(self._m1.visitdir(dir))

935

return bool(self._m1.visitdir(dir))

926

936

927

def visitchildrenset(self, dir):

937

def visitchildrenset(self, dir):

928

m2_set = self._m2.visitchildrenset(dir)

938

m2_set = self._m2.visitchildrenset(dir)

929

if m2_set == b'all':

939

if m2_set == b'all':

930

return set()

940

return set()

931

m1_set = self._m1.visitchildrenset(dir)

941

m1_set = self._m1.visitchildrenset(dir)

932

# Possible values for m1: 'all', 'this', set(...), set()

942

# Possible values for m1: 'all', 'this', set(...), set()

933

# Possible values for m2: 'this', set(...), set()

943

# Possible values for m2: 'this', set(...), set()

934

# If m2 has nothing under here that we care about, return m1, even if

944

# If m2 has nothing under here that we care about, return m1, even if

935

# it's 'all'. This is a change in behavior from visitdir, which would

945

# it's 'all'. This is a change in behavior from visitdir, which would

936

# return True, not 'all', for some reason.

946

# return True, not 'all', for some reason.

937

if not m2_set:

947

if not m2_set:

938

return m1_set

948

return m1_set

939

if m1_set in [b'all', b'this']:

949

if m1_set in [b'all', b'this']:

940

# Never return 'all' here if m2_set is any kind of non-empty (either

950

# Never return 'all' here if m2_set is any kind of non-empty (either

941

# 'this' or set(foo)), since m2 might return set() for a

951

# 'this' or set(foo)), since m2 might return set() for a

942

# subdirectory.

952

# subdirectory.

943

return b'this'

953

return b'this'

944

# Possible values for m1: set(...), set()

954

# Possible values for m1: set(...), set()

945

# Possible values for m2: 'this', set(...)

955

# Possible values for m2: 'this', set(...)

946

# We ignore m2's set results. They're possibly incorrect:

956

# We ignore m2's set results. They're possibly incorrect:

947

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

957

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

948

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

958

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

949

# return set(), which is *not* correct, we still need to visit 'dir'!

959

# return set(), which is *not* correct, we still need to visit 'dir'!

950

return m1_set

960

return m1_set

951

961

952

def isexact(self):

962

def isexact(self):

953

return self._m1.isexact()

963

return self._m1.isexact()

954

964

955

@encoding.strmethod

965

@encoding.strmethod

956

def __repr__(self):

966

def __repr__(self):

957

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

967

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

958

968

959

969

960

def intersectmatchers(m1, m2):

970

def intersectmatchers(m1, m2):

961

"""Composes two matchers by matching if both of them match.

971

"""Composes two matchers by matching if both of them match.

962

972

963

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

973

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

964

"""

974

"""

965

if m1 is None or m2 is None:

975

if m1 is None or m2 is None:

966

return m1 or m2

976

return m1 or m2

967

if m1.always():

977

if m1.always():

968

m = copy.copy(m2)

978

m = copy.copy(m2)

969

# TODO: Consider encapsulating these things in a class so there's only

979

# TODO: Consider encapsulating these things in a class so there's only

970

# one thing to copy from m1.

980

# one thing to copy from m1.

971

m.bad = m1.bad

981

m.bad = m1.bad

972

m.traversedir = m1.traversedir

982

m.traversedir = m1.traversedir

973

return m

983

return m

974

if m2.always():

984

if m2.always():

975

m = copy.copy(m1)

985

m = copy.copy(m1)

976

return m

986

return m

977

return intersectionmatcher(m1, m2)

987

return intersectionmatcher(m1, m2)

978

988

979

989

980

class intersectionmatcher(basematcher):

990

class intersectionmatcher(basematcher):

981

def __init__(self, m1, m2):

991

def __init__(self, m1, m2):

982

super(intersectionmatcher, self).__init__()

992

super(intersectionmatcher, self).__init__()

983

self._m1 = m1

993

self._m1 = m1

984

self._m2 = m2

994

self._m2 = m2

985

self.bad = m1.bad

995

self.bad = m1.bad

986

self.traversedir = m1.traversedir

996

self.traversedir = m1.traversedir

987

997

988

def was_tampered_with(self) -> bool:

998

def was_tampered_with(self) -> bool:

989

return (

999

return (

990

self.was_tampered_with_nonrec()

1000

self.was_tampered_with_nonrec()

991

or self._m1.was_tampered_with()

1001

or self._m1.was_tampered_with()

992

or self._m2.was_tampered_with()

1002

or self._m2.was_tampered_with()

993

)

1003

)

994

1004

995

@propertycache

1005

@propertycache

996

def _files(self):

1006

def _files(self):

997

if self.isexact():

1007

if self.isexact():

998

m1, m2 = self._m1, self._m2

1008

m1, m2 = self._m1, self._m2

999

if not m1.isexact():

1009

if not m1.isexact():

1000

m1, m2 = m2, m1

1010

m1, m2 = m2, m1

1001

return [f for f in m1.files() if m2(f)]

1011

return [f for f in m1.files() if m2(f)]

1002

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

1012

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

1003

# the set of files, because their files() are not always files. For

1013

# the set of files, because their files() are not always files. For

1004

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

1014

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

1005

# "path:dir2", we don't want to remove "dir2" from the set.

1015

# "path:dir2", we don't want to remove "dir2" from the set.

1006

return self._m1.files() + self._m2.files()

1016

return self._m1.files() + self._m2.files()

1007

1017

1008

def matchfn(self, f):

1018

def matchfn(self, f):

1009

return self._m1(f) and self._m2(f)

1019

return self._m1(f) and self._m2(f)

1010

1020

1011

def visitdir(self, dir):

1021

def visitdir(self, dir):

1012

visit1 = self._m1.visitdir(dir)

1022

visit1 = self._m1.visitdir(dir)

1013

if visit1 == b'all':

1023

if visit1 == b'all':

1014

return self._m2.visitdir(dir)

1024

return self._m2.visitdir(dir)

1015

# bool() because visit1=True + visit2='all' should not be 'all'

1025

# bool() because visit1=True + visit2='all' should not be 'all'

1016

return bool(visit1 and self._m2.visitdir(dir))

1026

return bool(visit1 and self._m2.visitdir(dir))

1017

1027

1018

def visitchildrenset(self, dir):

1028

def visitchildrenset(self, dir):

1019

m1_set = self._m1.visitchildrenset(dir)

1029

m1_set = self._m1.visitchildrenset(dir)

1020

if not m1_set:

1030

if not m1_set:

1021

return set()

1031

return set()

1022

m2_set = self._m2.visitchildrenset(dir)

1032

m2_set = self._m2.visitchildrenset(dir)

1023

if not m2_set:

1033

if not m2_set:

1024

return set()

1034

return set()

1025

1035

1026

if m1_set == b'all':

1036

if m1_set == b'all':

1027

return m2_set

1037

return m2_set

1028

elif m2_set == b'all':

1038

elif m2_set == b'all':

1029

return m1_set

1039

return m1_set

1030

1040

1031

if m1_set == b'this' or m2_set == b'this':

1041

if m1_set == b'this' or m2_set == b'this':

1032

return b'this'

1042

return b'this'

1033

1043

1034

assert isinstance(m1_set, set) and isinstance(m2_set, set)

1044

assert isinstance(m1_set, set) and isinstance(m2_set, set)

1035

return m1_set.intersection(m2_set)

1045

return m1_set.intersection(m2_set)

1036

1046

1037

def always(self):

1047

def always(self):

1038

return self._m1.always() and self._m2.always()

1048

return self._m1.always() and self._m2.always()

1039

1049

1040

def isexact(self):

1050

def isexact(self):

1041

return self._m1.isexact() or self._m2.isexact()

1051

return self._m1.isexact() or self._m2.isexact()

1042

1052

1043

@encoding.strmethod

1053

@encoding.strmethod

1044

def __repr__(self):

1054

def __repr__(self):

1045

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

1055

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

1046

1056

1047

1057

1048

class subdirmatcher(basematcher):

1058

class subdirmatcher(basematcher):

1049

"""Adapt a matcher to work on a subdirectory only.

1059

"""Adapt a matcher to work on a subdirectory only.

1050

1060

1051

The paths are remapped to remove/insert the path as needed:

1061

The paths are remapped to remove/insert the path as needed:

1052

1062

1053

>>> from . import pycompat

1063

>>> from . import pycompat

1054

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

1064

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

1055

>>> m2 = subdirmatcher(b'sub', m1)

1065

>>> m2 = subdirmatcher(b'sub', m1)

1056

>>> m2(b'a.txt')

1066

>>> m2(b'a.txt')

1057

False

1067

False

1058

>>> m2(b'b.txt')

1068

>>> m2(b'b.txt')

1059

True

1069

True

1060

>>> m2.matchfn(b'a.txt')

1070

>>> m2.matchfn(b'a.txt')

1061

False

1071

False

1062

>>> m2.matchfn(b'b.txt')

1072

>>> m2.matchfn(b'b.txt')

1063

True

1073

True

1064

>>> m2.files()

1074

>>> m2.files()

1065

['b.txt']

1075

['b.txt']

1066

>>> m2.exact(b'b.txt')

1076

>>> m2.exact(b'b.txt')

1067

True

1077

True

1068

>>> def bad(f, msg):

1078

>>> def bad(f, msg):

1069

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1079

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1070

>>> m1.bad = bad

1080

>>> m1.bad = bad

1071

>>> m2.bad(b'x.txt', b'No such file')

1081

>>> m2.bad(b'x.txt', b'No such file')

1072

sub/x.txt: No such file

1082

sub/x.txt: No such file

1073

"""

1083

"""

1074

1084

1075

def __init__(self, path: bytes, matcher: basematcher) -> None:

1085

def __init__(self, path: bytes, matcher: basematcher) -> None:

1076

super(subdirmatcher, self).__init__()

1086

super(subdirmatcher, self).__init__()

1077

self._path = path

1087

self._path = path

1078

self._matcher = matcher

1088

self._matcher = matcher

1079

self._always = matcher.always()

1089

self._always = matcher.always()

1080

1090

1081

self._files = [

1091

self._files = [

1082

f[len(path) + 1 :]

1092

f[len(path) + 1 :]

1083

for f in matcher._files

1093

for f in matcher._files

1084

if f.startswith(path + b"/")

1094

if f.startswith(path + b"/")

1085

]

1095

]

1086

1096

1087

# If the parent repo had a path to this subrepo and the matcher is

1097

# If the parent repo had a path to this subrepo and the matcher is

1088

# a prefix matcher, this submatcher always matches.

1098

# a prefix matcher, this submatcher always matches.

1089

if matcher.prefix():

1099

if matcher.prefix():

1090

self._always = any(f == path for f in matcher._files)

1100

self._always = any(f == path for f in matcher._files)

1091

1101

1092

def was_tampered_with(self) -> bool:

1102

def was_tampered_with(self) -> bool:

1093

return (

1103

return (

1094

self.was_tampered_with_nonrec() or self._matcher.was_tampered_with()

1104

self.was_tampered_with_nonrec() or self._matcher.was_tampered_with()

1095

)

1105

)

1096

1106

1097

def bad(self, f, msg):

1107

def bad(self, f, msg):

1098

self._matcher.bad(self._path + b"/" + f, msg)

1108

self._matcher.bad(self._path + b"/" + f, msg)

1099

1109

1100

def matchfn(self, f):

1110

def matchfn(self, f):

1101

# Some information is lost in the superclass's constructor, so we

1111

# Some information is lost in the superclass's constructor, so we

1102

# can not accurately create the matching function for the subdirectory

1112

# can not accurately create the matching function for the subdirectory

1103

# from the inputs. Instead, we override matchfn() and visitdir() to

1113

# from the inputs. Instead, we override matchfn() and visitdir() to

1104

# call the original matcher with the subdirectory path prepended.

1114

# call the original matcher with the subdirectory path prepended.

1105

return self._matcher.matchfn(self._path + b"/" + f)

1115

return self._matcher.matchfn(self._path + b"/" + f)

1106

1116

1107

def visitdir(self, dir):

1117

def visitdir(self, dir):

1108

if dir == b'':

1118

if dir == b'':

1109

dir = self._path

1119

dir = self._path

1110

else:

1120

else:

1111

dir = self._path + b"/" + dir

1121

dir = self._path + b"/" + dir

1112

return self._matcher.visitdir(dir)

1122

return self._matcher.visitdir(dir)

1113

1123

1114

def visitchildrenset(self, dir):

1124

def visitchildrenset(self, dir):

1115

if dir == b'':

1125

if dir == b'':

1116

dir = self._path

1126

dir = self._path

1117

else:

1127

else:

1118

dir = self._path + b"/" + dir

1128

dir = self._path + b"/" + dir

1119

return self._matcher.visitchildrenset(dir)

1129

return self._matcher.visitchildrenset(dir)

1120

1130

1121

def always(self):

1131

def always(self):

1122

return self._always

1132

return self._always

1123

1133

1124

def prefix(self):

1134

def prefix(self):

1125

return self._matcher.prefix() and not self._always

1135

return self._matcher.prefix() and not self._always

1126

1136

1127

@encoding.strmethod

1137

@encoding.strmethod

1128

def __repr__(self):

1138

def __repr__(self):

1129

return b'<subdirmatcher path=%r, matcher=%r>' % (

1139

return b'<subdirmatcher path=%r, matcher=%r>' % (

1130

self._path,

1140

self._path,

1131

self._matcher,

1141

self._matcher,

1132

)

1142

)

1133

1143

1134

1144

1135

class prefixdirmatcher(basematcher):

1145

class prefixdirmatcher(basematcher):

1136

"""Adapt a matcher to work on a parent directory.

1146

"""Adapt a matcher to work on a parent directory.

1137

1147

1138

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1148

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1139

1149

1140

The prefix path should usually be the relative path from the root of

1150

The prefix path should usually be the relative path from the root of

1141

this matcher to the root of the wrapped matcher.

1151

this matcher to the root of the wrapped matcher.

1142

1152

1143

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1153

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1144

>>> m2 = prefixdirmatcher(b'd/e', m1)

1154

>>> m2 = prefixdirmatcher(b'd/e', m1)

1145

>>> m2(b'a.txt')

1155

>>> m2(b'a.txt')

1146

False

1156

False

1147

>>> m2(b'd/e/a.txt')

1157

>>> m2(b'd/e/a.txt')

1148

True

1158

True

1149

>>> m2(b'd/e/b.txt')

1159

>>> m2(b'd/e/b.txt')

1150

False

1160

False

1151

>>> m2.files()

1161

>>> m2.files()

1152

['d/e/a.txt', 'd/e/f/b.txt']

1162

['d/e/a.txt', 'd/e/f/b.txt']

1153

>>> m2.exact(b'd/e/a.txt')

1163

>>> m2.exact(b'd/e/a.txt')

1154

True

1164

True

1155

>>> m2.visitdir(b'd')

1165

>>> m2.visitdir(b'd')

1156

True

1166

True

1157

>>> m2.visitdir(b'd/e')

1167

>>> m2.visitdir(b'd/e')

1158

True

1168

True

1159

>>> m2.visitdir(b'd/e/f')

1169

>>> m2.visitdir(b'd/e/f')

1160

True

1170

True

1161

>>> m2.visitdir(b'd/e/g')

1171

>>> m2.visitdir(b'd/e/g')

1162

False

1172

False

1163

>>> m2.visitdir(b'd/ef')

1173

>>> m2.visitdir(b'd/ef')

1164

False

1174

False

1165

"""

1175

"""

1166

1176

1167

def __init__(self, path, matcher, badfn=None):

1177

def __init__(self, path, matcher, badfn=None):

1168

super(prefixdirmatcher, self).__init__(badfn)

1178

super(prefixdirmatcher, self).__init__(badfn)

1169

if not path:

1179

if not path:

1170

raise error.ProgrammingError(b'prefix path must not be empty')

1180

raise error.ProgrammingError(b'prefix path must not be empty')

1171

self._path = path

1181

self._path = path

1172

self._pathprefix = path + b'/'

1182

self._pathprefix = path + b'/'

1173

self._matcher = matcher

1183

self._matcher = matcher

1174

1184

1175

@propertycache

1185

@propertycache

1176

def _files(self):

1186

def _files(self):

1177

return [self._pathprefix + f for f in self._matcher._files]

1187

return [self._pathprefix + f for f in self._matcher._files]

1178

1188

1179

def matchfn(self, f):

1189

def matchfn(self, f):

1180

if not f.startswith(self._pathprefix):

1190

if not f.startswith(self._pathprefix):

1181

return False

1191

return False

1182

return self._matcher.matchfn(f[len(self._pathprefix) :])

1192

return self._matcher.matchfn(f[len(self._pathprefix) :])

1183

1193

1184

@propertycache

1194

@propertycache

1185

def _pathdirs(self):

1195

def _pathdirs(self):

1186

return set(pathutil.finddirs(self._path))

1196

return set(pathutil.finddirs(self._path))

1187

1197

1188

def visitdir(self, dir):

1198

def visitdir(self, dir):

1189

if dir == self._path:

1199

if dir == self._path:

1190

return self._matcher.visitdir(b'')

1200

return self._matcher.visitdir(b'')

1191

if dir.startswith(self._pathprefix):

1201

if dir.startswith(self._pathprefix):

1192

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1202

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1193

return dir in self._pathdirs

1203

return dir in self._pathdirs

1194

1204

1195

def visitchildrenset(self, dir):

1205

def visitchildrenset(self, dir):

1196

if dir == self._path:

1206

if dir == self._path:

1197

return self._matcher.visitchildrenset(b'')

1207

return self._matcher.visitchildrenset(b'')

1198

if dir.startswith(self._pathprefix):

1208

if dir.startswith(self._pathprefix):

1199

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1209

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1200

if dir in self._pathdirs:

1210

if dir in self._pathdirs:

1201

return b'this'

1211

return b'this'

1202

return set()

1212

return set()

1203

1213

1204

def isexact(self):

1214

def isexact(self):

1205

return self._matcher.isexact()

1215

return self._matcher.isexact()

1206

1216

1207

def prefix(self):

1217

def prefix(self):

1208

return self._matcher.prefix()

1218

return self._matcher.prefix()

1209

1219

1210

@encoding.strmethod

1220

@encoding.strmethod

1211

def __repr__(self):

1221

def __repr__(self):

1212

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1222

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1213

pycompat.bytestr(self._path),

1223

pycompat.bytestr(self._path),

1214

self._matcher,

1224

self._matcher,

1215

)

1225

)

1216

1226

1217

1227

1218

class unionmatcher(basematcher):

1228

class unionmatcher(basematcher):

1219

"""A matcher that is the union of several matchers.

1229

"""A matcher that is the union of several matchers.

1220

1230

1221

The non-matching-attributes (bad, traversedir) are taken from the first

1231

The non-matching-attributes (bad, traversedir) are taken from the first

1222

matcher.

1232

matcher.

1223

"""

1233

"""

1224

1234

1225

def __init__(self, matchers):

1235

def __init__(self, matchers):

1226

m1 = matchers[0]

1236

m1 = matchers[0]

1227

super(unionmatcher, self).__init__()

1237

super(unionmatcher, self).__init__()

1228

self.traversedir = m1.traversedir

1238

self.traversedir = m1.traversedir

1229

self._matchers = matchers

1239

self._matchers = matchers

1230

1240

1231

def was_tampered_with(self) -> bool:

1241

def was_tampered_with(self) -> bool:

1232

return self.was_tampered_with_nonrec() or any(

1242

return self.was_tampered_with_nonrec() or any(

1233

map(lambda m: m.was_tampered_with(), self._matchers)

1243

map(lambda m: m.was_tampered_with(), self._matchers)

1234

)

1244

)

1235

1245

1236

def matchfn(self, f):

1246

def matchfn(self, f):

1237

for match in self._matchers:

1247

for match in self._matchers:

1238

if match(f):

1248

if match(f):

1239

return True

1249

return True

1240

return False

1250

return False

1241

1251

1242

def visitdir(self, dir):

1252

def visitdir(self, dir):

1243

r = False

1253

r = False

1244

for m in self._matchers:

1254

for m in self._matchers:

1245

v = m.visitdir(dir)

1255

v = m.visitdir(dir)

1246

if v == b'all':

1256

if v == b'all':

1247

return v

1257

return v

1248

r |= v

1258

r |= v

1249

return r

1259

return r

1250

1260

1251

def visitchildrenset(self, dir):

1261

def visitchildrenset(self, dir):

1252

r = set()

1262

r = set()

1253

this = False

1263

this = False

1254

for m in self._matchers:

1264

for m in self._matchers:

1255

v = m.visitchildrenset(dir)

1265

v = m.visitchildrenset(dir)

1256

if not v:

1266

if not v:

1257

continue

1267

continue

1258

if v == b'all':

1268

if v == b'all':

1259

return v

1269

return v

1260

if this or v == b'this':

1270

if this or v == b'this':

1261

this = True

1271

this = True

1262

# don't break, we might have an 'all' in here.

1272

# don't break, we might have an 'all' in here.

1263

continue

1273

continue

1264

assert isinstance(v, set)

1274

assert isinstance(v, set)

1265

r = r.union(v)

1275

r = r.union(v)

1266

if this:

1276

if this:

1267

return b'this'

1277

return b'this'

1268

return r

1278

return r

1269

1279

1270

@encoding.strmethod

1280

@encoding.strmethod

1271

def __repr__(self):

1281

def __repr__(self):

1272

return b'<unionmatcher matchers=%r>' % self._matchers

1282

return b'<unionmatcher matchers=%r>' % self._matchers

1273

1283

1274

1284

1275

def patkind(pattern, default=None):

1285

def patkind(pattern, default=None):

1276

r"""If pattern is 'kind:pat' with a known kind, return kind.

1286

r"""If pattern is 'kind:pat' with a known kind, return kind.

1277

1287

1278

>>> patkind(br're:.*\.c$')

1288

>>> patkind(br're:.*\.c$')

1279

're'

1289

're'

1280

>>> patkind(b'glob:*.c')

1290

>>> patkind(b'glob:*.c')

1281

'glob'

1291

'glob'

1282

>>> patkind(b'relpath:test.py')

1292

>>> patkind(b'relpath:test.py')

1283

'relpath'

1293

'relpath'

1284

>>> patkind(b'main.py')

1294

>>> patkind(b'main.py')

1285

>>> patkind(b'main.py', default=b're')

1295

>>> patkind(b'main.py', default=b're')

1286

're'

1296

're'

1287

"""

1297

"""

1288

return _patsplit(pattern, default)[0]

1298

return _patsplit(pattern, default)[0]

1289

1299

1290

1300

1291

def _patsplit(pattern, default):

1301

def _patsplit(pattern, default):

1292

"""Split a string into the optional pattern kind prefix and the actual

1302

"""Split a string into the optional pattern kind prefix and the actual

1293

pattern."""

1303

pattern."""

1294

if b':' in pattern:

1304

if b':' in pattern:

1295

kind, pat = pattern.split(b':', 1)

1305

kind, pat = pattern.split(b':', 1)

1296

if kind in allpatternkinds:

1306

if kind in allpatternkinds:

1297

return kind, pat

1307

return kind, pat

1298

return default, pattern

1308

return default, pattern

1299

1309

1300

1310

1301

def _globre(pat):

1311

def _globre(pat):

1302

r"""Convert an extended glob string to a regexp string.

1312

r"""Convert an extended glob string to a regexp string.

1303

1313

1304

>>> from . import pycompat

1314

>>> from . import pycompat

1305

>>> def bprint(s):

1315

>>> def bprint(s):

1306

... print(pycompat.sysstr(s))

1316

... print(pycompat.sysstr(s))

1307

>>> bprint(_globre(br'?'))

1317

>>> bprint(_globre(br'?'))

1308

.

1318

.

1309

>>> bprint(_globre(br'*'))

1319

>>> bprint(_globre(br'*'))

1310

[^/]*

1320

[^/]*

1311

>>> bprint(_globre(br'**'))

1321

>>> bprint(_globre(br'**'))

1312

.*

1322

.*

1313

>>> bprint(_globre(br'**/a'))

1323

>>> bprint(_globre(br'**/a'))

1314

(?:.*/)?a

1324

(?:.*/)?a

1315

>>> bprint(_globre(br'a/**/b'))

1325

>>> bprint(_globre(br'a/**/b'))

1316

a/(?:.*/)?b

1326

a/(?:.*/)?b

1317

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1327

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1318

[a*?!^][\^b][^c]

1328

[a*?!^][\^b][^c]

1319

>>> bprint(_globre(br'{a,b}'))

1329

>>> bprint(_globre(br'{a,b}'))

1320

(?:a|b)

1330

(?:a|b)

1321

>>> bprint(_globre(br'.\*\?'))

1331

>>> bprint(_globre(br'.\*\?'))

1322

\.\*\?

1332

\.\*\?

1323

"""

1333

"""

1324

i, n = 0, len(pat)

1334

i, n = 0, len(pat)

1325

res = b''

1335

res = b''

1326

group = 0

1336

group = 0

1327

escape = util.stringutil.regexbytesescapemap.get

1337

escape = util.stringutil.regexbytesescapemap.get

1328

1338

1329

def peek():

1339

def peek():

1330

return i < n and pat[i : i + 1]

1340

return i < n and pat[i : i + 1]

1331

1341

1332

while i < n:

1342

while i < n:

1333

c = pat[i : i + 1]

1343

c = pat[i : i + 1]

1334

i += 1

1344

i += 1

1335

if c not in b'*?[{},\\':

1345

if c not in b'*?[{},\\':

1336

res += escape(c, c)

1346

res += escape(c, c)

1337

elif c == b'*':

1347

elif c == b'*':

1338

if peek() == b'*':

1348

if peek() == b'*':

1339

i += 1

1349

i += 1

1340

if peek() == b'/':

1350

if peek() == b'/':

1341

i += 1

1351

i += 1

1342

res += b'(?:.*/)?'

1352

res += b'(?:.*/)?'

1343

else:

1353

else:

1344

res += b'.*'

1354

res += b'.*'

1345

else:

1355

else:

1346

res += b'[^/]*'

1356

res += b'[^/]*'

1347

elif c == b'?':

1357

elif c == b'?':

1348

res += b'.'

1358

res += b'.'

1349

elif c == b'[':

1359

elif c == b'[':

1350

j = i

1360

j = i

1351

if j < n and pat[j : j + 1] in b'!]':

1361

if j < n and pat[j : j + 1] in b'!]':

1352

j += 1

1362

j += 1

1353

while j < n and pat[j : j + 1] != b']':

1363

while j < n and pat[j : j + 1] != b']':

1354

j += 1

1364

j += 1

1355

if j >= n:

1365

if j >= n:

1356

res += b'\\['

1366

res += b'\\['

1357

else:

1367

else:

1358

stuff = pat[i:j].replace(b'\\', b'\\\\')

1368

stuff = pat[i:j].replace(b'\\', b'\\\\')

1359

i = j + 1

1369

i = j + 1

1360

if stuff[0:1] == b'!':

1370

if stuff[0:1] == b'!':

1361

stuff = b'^' + stuff[1:]

1371

stuff = b'^' + stuff[1:]

1362

elif stuff[0:1] == b'^':

1372

elif stuff[0:1] == b'^':

1363

stuff = b'\\' + stuff

1373

stuff = b'\\' + stuff

1364

res = b'%s[%s]' % (res, stuff)

1374

res = b'%s[%s]' % (res, stuff)

1365

elif c == b'{':

1375

elif c == b'{':

1366

group += 1

1376

group += 1

1367

res += b'(?:'

1377

res += b'(?:'

1368

elif c == b'}' and group:

1378

elif c == b'}' and group:

1369

res += b')'

1379

res += b')'

1370

group -= 1

1380

group -= 1

1371

elif c == b',' and group:

1381

elif c == b',' and group:

1372

res += b'|'

1382

res += b'|'

1373

elif c == b'\\':

1383

elif c == b'\\':

1374

p = peek()

1384

p = peek()

1375

if p:

1385

if p:

1376

i += 1

1386

i += 1

1377

res += escape(p, p)

1387

res += escape(p, p)

1378

else:

1388

else:

1379

res += escape(c, c)

1389

res += escape(c, c)

1380

else:

1390

else:

1381

res += escape(c, c)

1391

res += escape(c, c)

1382

return res

1392

return res

1383

1393

1384

1394

1385

FLAG_RE = util.re.compile(br'^$\?([aiLmsux]+)$(.*)')

1395

FLAG_RE = util.re.compile(br'^$\?([aiLmsux]+)$(.*)')

1386

1396

1387

1397

1388

def _regex(kind, pat, globsuffix):

1398

def _regex(kind, pat, globsuffix):

1389

"""Convert a (normalized) pattern of any kind into a

1399

"""Convert a (normalized) pattern of any kind into a

1390

regular expression.

1400

regular expression.

1391

globsuffix is appended to the regexp of globs."""

1401

globsuffix is appended to the regexp of globs."""

1392

if not pat and kind in (b'glob', b'relpath'):

1402

if not pat and kind in (b'glob', b'relpath'):

1393

return b''

1403

return b''

1394

if kind == b're':

1404

if kind == b're':

1395

return pat

1405

return pat

1396

if kind == b'filepath':

1406

if kind == b'filepath':

1397

raise error.ProgrammingError(

1407

raise error.ProgrammingError(

1398

"'filepath:' patterns should not be converted to a regex"

1408

"'filepath:' patterns should not be converted to a regex"

1399

)

1409

)

1400

if kind in (b'path', b'relpath'):

1410

if kind in (b'path', b'relpath'):

1401

if pat == b'.':

1411

if pat == b'.':

1402

return b''

1412

return b''

1403

return util.stringutil.reescape(pat) + b'(?:/|$)'

1413

return util.stringutil.reescape(pat) + b'(?:/|$)'

1404

if kind == b'rootfilesin':

1414

if kind == b'rootfilesin':

1405

if pat == b'.':

1415

if pat == b'.':

1406

escaped = b''

1416

escaped = b''

1407

else:

1417

else:

1408

# Pattern is a directory name.

1418

# Pattern is a directory name.

1409

escaped = util.stringutil.reescape(pat) + b'/'

1419

escaped = util.stringutil.reescape(pat) + b'/'

1410

# Anything after the pattern must be a non-directory.

1420

# Anything after the pattern must be a non-directory.

1411

return escaped + b'[^/]+$'

1421

return escaped + b'[^/]+$'

1412

if kind == b'relglob':

1422

if kind == b'relglob':

1413

globre = _globre(pat)

1423

globre = _globre(pat)

1414

if globre.startswith(b'[^/]*'):

1424

if globre.startswith(b'[^/]*'):

1415

# When pat has the form *XYZ (common), make the returned regex more

1425

# When pat has the form *XYZ (common), make the returned regex more

1416

# legible by returning the regex for **XYZ instead of **/*XYZ.

1426

# legible by returning the regex for **XYZ instead of **/*XYZ.

1417

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1427

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1418

return b'(?:|.*/)' + globre + globsuffix

1428

return b'(?:|.*/)' + globre + globsuffix

1419

if kind == b'relre':

1429

if kind == b'relre':

1420

flag = None

1430

flag = None

1421

m = FLAG_RE.match(pat)

1431

m = FLAG_RE.match(pat)

1422

if m:

1432

if m:

1423

flag, pat = m.groups()

1433

flag, pat = m.groups()

1424

if not pat.startswith(b'^'):

1434

if not pat.startswith(b'^'):

1425

pat = b'.*' + pat

1435

pat = b'.*' + pat

1426

if flag is not None:

1436

if flag is not None:

1427

pat = br'(?%s:%s)' % (flag, pat)

1437

pat = br'(?%s:%s)' % (flag, pat)

1428

return pat

1438

return pat

1429

if kind in (b'glob', b'rootglob'):

1439

if kind in (b'glob', b'rootglob'):

1430

return _globre(pat) + globsuffix

1440

return _globre(pat) + globsuffix

1431

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1441

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1432

1442

1433

1443

1434

def _buildmatch(kindpats, globsuffix, root):

1444

def _buildmatch(kindpats, globsuffix, root):

1435

"""Return regexp string and a matcher function for kindpats.

1445

"""Return regexp string and a matcher function for kindpats.

1436

globsuffix is appended to the regexp of globs."""

1446

globsuffix is appended to the regexp of globs."""

1437

matchfuncs = []

1447

matchfuncs = []

1438

1448

1439

subincludes, kindpats = _expandsubinclude(kindpats, root)

1449

subincludes, kindpats = _expandsubinclude(kindpats, root)

1440

if subincludes:

1450

if subincludes:

1441

submatchers = {}

1451

submatchers = {}

1442

1452

1443

def matchsubinclude(f):

1453

def matchsubinclude(f):

1444

for prefix, matcherargs in subincludes:

1454

for prefix, matcherargs in subincludes:

1445

if f.startswith(prefix):

1455

if f.startswith(prefix):

1446

mf = submatchers.get(prefix)

1456

mf = submatchers.get(prefix)

1447

if mf is None:

1457

if mf is None:

1448

mf = match(*matcherargs)

1458

mf = match(*matcherargs)

1449

submatchers[prefix] = mf

1459

submatchers[prefix] = mf

1450

1460

1451

if mf(f[len(prefix) :]):

1461

if mf(f[len(prefix) :]):

1452

return True

1462

return True

1453

return False

1463

return False

1454

1464

1455

matchfuncs.append(matchsubinclude)

1465

matchfuncs.append(matchsubinclude)

1456

1466

1457

regex = b''

1467

regex = b''

1458

if kindpats:

1468

if kindpats:

1459

if all(k == b'rootfilesin' for k, p, s in kindpats):

1469

if all(k == b'rootfilesin' for k, p, s in kindpats):

1460

dirs = {p for k, p, s in kindpats}

1470

dirs = {p for k, p, s in kindpats}

1461

1471

1462

def mf(f):

1472

def mf(f):

1463

i = f.rfind(b'/')

1473

i = f.rfind(b'/')

1464

if i >= 0:

1474

if i >= 0:

1465

dir = f[:i]

1475

dir = f[:i]

1466

else:

1476

else:

1467

dir = b'.'

1477

dir = b'.'

1468

return dir in dirs

1478

return dir in dirs

1469

1479

1470

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1480

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1471

matchfuncs.append(mf)

1481

matchfuncs.append(mf)

1472

else:

1482

else:

1473

regex, mf = _buildregexmatch(kindpats, globsuffix)

1483

regex, mf = _buildregexmatch(kindpats, globsuffix)

1474

matchfuncs.append(mf)

1484

matchfuncs.append(mf)

1475

1485

1476

if len(matchfuncs) == 1:

1486

if len(matchfuncs) == 1:

1477

return regex, matchfuncs[0]

1487

return regex, matchfuncs[0]

1478

else:

1488

else:

1479

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1489

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1480

1490

1481

1491

1482

MAX_RE_SIZE = 20000

1492

MAX_RE_SIZE = 20000

1483

1493

1484

1494

1485

def _joinregexes(regexps):

1495

def _joinregexes(regexps):

1486

"""gather multiple regular expressions into a single one"""

1496

"""gather multiple regular expressions into a single one"""

1487

return b'|'.join(regexps)

1497

return b'|'.join(regexps)

1488

1498

1489

1499

1490

def _buildregexmatch(kindpats, globsuffix):

1500

def _buildregexmatch(kindpats, globsuffix):

1491

"""Build a match function from a list of kinds and kindpats,

1501

"""Build a match function from a list of kinds and kindpats,

1492

return regexp string and a matcher function.

1502

return regexp string and a matcher function.

1493

1503

1494

Test too large input

1504

Test too large input

1495

>>> _buildregexmatch([

1505

>>> _buildregexmatch([

1496

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1506

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1497

... ], b'$')

1507

... ], b'$')

1498

Traceback (most recent call last):

1508

Traceback (most recent call last):

1499

...

1509

...

1500

Abort: matcher pattern is too long (20009 bytes)

1510

Abort: matcher pattern is too long (20009 bytes)

1501

"""

1511

"""

1502

try:

1512

try:

1503

allgroups = []

1513

allgroups = []

1504

regexps = []

1514

regexps = []

1505

exact = set()

1515

exact = set()

1506

for kind, pattern, _source in kindpats:

1516

for kind, pattern, _source in kindpats:

1507

if kind == b'filepath':

1517

if kind == b'filepath':

1508

exact.add(pattern)

1518

exact.add(pattern)

1509

continue

1519

continue

1510

regexps.append(_regex(kind, pattern, globsuffix))

1520

regexps.append(_regex(kind, pattern, globsuffix))

1511

1521

1512

fullregexp = _joinregexes(regexps)

1522

fullregexp = _joinregexes(regexps)

1513

1523

1514

startidx = 0

1524

startidx = 0

1515

groupsize = 0

1525

groupsize = 0

1516

for idx, r in enumerate(regexps):

1526

for idx, r in enumerate(regexps):

1517

piecesize = len(r)

1527

piecesize = len(r)

1518

if piecesize > MAX_RE_SIZE:

1528

if piecesize > MAX_RE_SIZE:

1519

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1529

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1520

raise error.Abort(msg)

1530

raise error.Abort(msg)

1521

elif (groupsize + piecesize) > MAX_RE_SIZE:

1531

elif (groupsize + piecesize) > MAX_RE_SIZE:

1522

group = regexps[startidx:idx]

1532

group = regexps[startidx:idx]

1523

allgroups.append(_joinregexes(group))

1533

allgroups.append(_joinregexes(group))

1524

startidx = idx

1534

startidx = idx

1525

groupsize = 0

1535

groupsize = 0

1526

groupsize += piecesize + 1

1536

groupsize += piecesize + 1

1527

1537

1528

if startidx == 0:

1538

if startidx == 0:

1529

matcher = _rematcher(fullregexp)

1539

matcher = _rematcher(fullregexp)

1530

func = lambda s: bool(matcher(s))

1540

func = lambda s: bool(matcher(s))

1531

else:

1541

else:

1532

group = regexps[startidx:]

1542

group = regexps[startidx:]

1533

allgroups.append(_joinregexes(group))

1543

allgroups.append(_joinregexes(group))

1534

allmatchers = [_rematcher(g) for g in allgroups]

1544

allmatchers = [_rematcher(g) for g in allgroups]

1535

func = lambda s: any(m(s) for m in allmatchers)

1545

func = lambda s: any(m(s) for m in allmatchers)

1536

1546

1537

actualfunc = func

1547

actualfunc = func

1538

if exact:

1548

if exact:

1539

# An empty regex will always match, so only call the regex if

1549

# An empty regex will always match, so only call the regex if

1540

# there were any actual patterns to match.

1550

# there were any actual patterns to match.

1541

if not regexps:

1551

if not regexps:

1542

actualfunc = lambda s: s in exact

1552

actualfunc = lambda s: s in exact

1543

else:

1553

else:

1544

actualfunc = lambda s: s in exact or func(s)

1554

actualfunc = lambda s: s in exact or func(s)

1545

return fullregexp, actualfunc

1555

return fullregexp, actualfunc

1546

except re.error:

1556

except re.error:

1547

for k, p, s in kindpats:

1557

for k, p, s in kindpats:

1548

if k == b'filepath':

1558

if k == b'filepath':

1549

continue

1559

continue

1550

try:

1560

try:

1551

_rematcher(_regex(k, p, globsuffix))

1561

_rematcher(_regex(k, p, globsuffix))

1552

except re.error:

1562

except re.error:

1553

if s:

1563

if s:

1554

raise error.Abort(

1564

raise error.Abort(

1555

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1565

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1556

)

1566

)

1557

else:

1567

else:

1558

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1568

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1559

raise error.Abort(_(b"invalid pattern"))

1569

raise error.Abort(_(b"invalid pattern"))

1560

1570

1561

1571

1562

def _patternrootsanddirs(kindpats):

1572

def _patternrootsanddirs(kindpats):

1563

"""Returns roots and directories corresponding to each pattern.

1573

"""Returns roots and directories corresponding to each pattern.

1564

1574

1565

This calculates the roots and directories exactly matching the patterns and

1575

This calculates the roots and directories exactly matching the patterns and

1566

returns a tuple of (roots, dirs) for each. It does not return other

1576

returns a tuple of (roots, dirs) for each. It does not return other

1567

directories which may also need to be considered, like the parent

1577

directories which may also need to be considered, like the parent

1568

directories.

1578

directories.

1569

"""

1579

"""

1570

r = []

1580

r = []

1571

d = []

1581

d = []

1572

for kind, pat, source in kindpats:

1582

for kind, pat, source in kindpats:

1573

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1583

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1574

root = []

1584

root = []

1575

for p in pat.split(b'/'):

1585

for p in pat.split(b'/'):

1576

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1586

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1577

break

1587

break

1578

root.append(p)

1588

root.append(p)

1579

r.append(b'/'.join(root))

1589

r.append(b'/'.join(root))

1580

elif kind in (b'relpath', b'path', b'filepath'):

1590

elif kind in (b'relpath', b'path', b'filepath'):

1581

if pat == b'.':

1591

if pat == b'.':

1582

pat = b''

1592

pat = b''

1583

r.append(pat)

1593

r.append(pat)

1584

elif kind in (b'rootfilesin',):

1594

elif kind in (b'rootfilesin',):

1585

if pat == b'.':

1595

if pat == b'.':

1586

pat = b''

1596

pat = b''

1587

d.append(pat)

1597

d.append(pat)

1588

else: # relglob, re, relre

1598

else: # relglob, re, relre

1589

r.append(b'')

1599

r.append(b'')

1590

return r, d

1600

return r, d

1591

1601

1592

1602

1593

def _roots(kindpats):

1603

def _roots(kindpats):

1594

'''Returns root directories to match recursively from the given patterns.'''

1604

'''Returns root directories to match recursively from the given patterns.'''

1595

roots, dirs = _patternrootsanddirs(kindpats)

1605

roots, dirs = _patternrootsanddirs(kindpats)

1596

return roots

1606

return roots

1597

1607

1598

1608

1599

def _rootsdirsandparents(kindpats):

1609

def _rootsdirsandparents(kindpats):

1600

"""Returns roots and exact directories from patterns.

1610

"""Returns roots and exact directories from patterns.

1601

1611

1602

`roots` are directories to match recursively, `dirs` should

1612

`roots` are directories to match recursively, `dirs` should

1603

be matched non-recursively, and `parents` are the implicitly required

1613

be matched non-recursively, and `parents` are the implicitly required

1604

directories to walk to items in either roots or dirs.

1614

directories to walk to items in either roots or dirs.

1605

1615

1606

Returns a tuple of (roots, dirs, parents).

1616

Returns a tuple of (roots, dirs, parents).

1607

1617

1608

>>> r = _rootsdirsandparents(

1618

>>> r = _rootsdirsandparents(

1609

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1619

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1610

... (b'glob', b'g*', b'')])

1620

... (b'glob', b'g*', b'')])

1611

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1621

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1612

(['g/h', 'g/h', ''], []) ['', 'g']

1622

(['g/h', 'g/h', ''], []) ['', 'g']

1613

>>> r = _rootsdirsandparents(

1623

>>> r = _rootsdirsandparents(

1614

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1624

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1615

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1625

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1616

([], ['g/h', '']) ['', 'g']

1626

([], ['g/h', '']) ['', 'g']

1617

>>> r = _rootsdirsandparents(

1627

>>> r = _rootsdirsandparents(

1618

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1628

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1619

... (b'path', b'', b'')])

1629

... (b'path', b'', b'')])

1620

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1630

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1621

(['r', 'p/p', ''], []) ['', 'p']

1631

(['r', 'p/p', ''], []) ['', 'p']

1622

>>> r = _rootsdirsandparents(

1632

>>> r = _rootsdirsandparents(

1623

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1633

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1624

... (b'relre', b'rr', b'')])

1634

... (b'relre', b'rr', b'')])

1625

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1635

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1626

(['', '', ''], []) ['']

1636

(['', '', ''], []) ['']

1627

"""

1637

"""

1628

r, d = _patternrootsanddirs(kindpats)

1638

r, d = _patternrootsanddirs(kindpats)

1629

1639

1630

p = set()

1640

p = set()

1631

# Add the parents as non-recursive/exact directories, since they must be

1641

# Add the parents as non-recursive/exact directories, since they must be

1632

# scanned to get to either the roots or the other exact directories.

1642

# scanned to get to either the roots or the other exact directories.

1633

p.update(pathutil.dirs(d))

1643

p.update(pathutil.dirs(d))

1634

p.update(pathutil.dirs(r))

1644

p.update(pathutil.dirs(r))

1635

1645

1636

# FIXME: all uses of this function convert these to sets, do so before

1646

# FIXME: all uses of this function convert these to sets, do so before

1637

# returning.

1647

# returning.

1638

# FIXME: all uses of this function do not need anything in 'roots' and

1648

# FIXME: all uses of this function do not need anything in 'roots' and

1639

# 'dirs' to also be in 'parents', consider removing them before returning.

1649

# 'dirs' to also be in 'parents', consider removing them before returning.

1640

return r, d, p

1650

return r, d, p

1641

1651

1642

1652

1643

def _explicitfiles(kindpats):

1653

def _explicitfiles(kindpats):

1644

"""Returns the potential explicit filenames from the patterns.

1654

"""Returns the potential explicit filenames from the patterns.

1645

1655

1646

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1656

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1647

['foo/bar']

1657

['foo/bar']

1648

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1658

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1649

[]

1659

[]

1650

"""

1660

"""

1651

# Keep only the pattern kinds where one can specify filenames (vs only

1661

# Keep only the pattern kinds where one can specify filenames (vs only

1652

# directory names).

1662

# directory names).

1653

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1663

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1654

return _roots(filable)

1664

return _roots(filable)

1655

1665

1656

1666

1657

def _prefix(kindpats):

1667

def _prefix(kindpats):

1658

'''Whether all the patterns match a prefix (i.e. recursively)'''

1668

'''Whether all the patterns match a prefix (i.e. recursively)'''

1659

for kind, pat, source in kindpats:

1669

for kind, pat, source in kindpats:

1660

if kind not in (b'path', b'relpath'):

1670

if kind not in (b'path', b'relpath'):

1661

return False

1671

return False

1662

return True

1672

return True

1663

1673

1664

1674

1665

_commentre = None

1675

_commentre = None

1666

1676

1677

if typing.TYPE_CHECKING:

1678

from typing_extensions import (

1679

Literal,

1680

)

1681

1682

@overload

1683

def readpatternfile(

1684

filepath: bytes, warn: Callable[[bytes], Any], sourceinfo: Literal[True]

1685

) -> List[Tuple[bytes, int, bytes]]:

1686

...

1687

1688

@overload

1689

def readpatternfile(

1690

filepath: bytes,

1691

warn: Callable[[bytes], Any],

1692

sourceinfo: Literal[False],

1693

) -> List[bytes]:

1694

...

1695

1696

@overload

1697

def readpatternfile(

1698

filepath: bytes,

1699

warn: Callable[[bytes], Any],

1700

sourceinfo: bool = False,

1701

) -> List[Union[Tuple[bytes, int, bytes], bytes]]:

1702

...

1703

1667

1704

1668

def readpatternfile(filepath, warn, sourceinfo=False):

1705

def readpatternfile(filepath, warn, sourceinfo=False):

1669

"""parse a pattern file, returning a list of

1706

"""parse a pattern file, returning a list of

1670

patterns. These patterns should be given to compile()

1707

patterns. These patterns should be given to compile()

1671

to be validated and converted into a match function.

1708

to be validated and converted into a match function.

1672

1709

1673

trailing white space is dropped.

1710

trailing white space is dropped.

1674

the escape character is backslash.

1711

the escape character is backslash.

1675

comments start with #.

1712

comments start with #.

1676

empty lines are skipped.

1713

empty lines are skipped.

1677

1714

1678

lines can be of the following formats:

1715

lines can be of the following formats:

1679

1716

1680

syntax: regexp # defaults following lines to non-rooted regexps

1717

syntax: regexp # defaults following lines to non-rooted regexps

1681

syntax: glob # defaults following lines to non-rooted globs

1718

syntax: glob # defaults following lines to non-rooted globs

1682

re:pattern # non-rooted regular expression

1719

re:pattern # non-rooted regular expression

1683

glob:pattern # non-rooted glob

1720

glob:pattern # non-rooted glob

1684

rootglob:pat # rooted glob (same root as ^ in regexps)

1721

rootglob:pat # rooted glob (same root as ^ in regexps)

1685

pattern # pattern of the current default type

1722

pattern # pattern of the current default type

1686

1723

1687

if sourceinfo is set, returns a list of tuples:

1724

if sourceinfo is set, returns a list of tuples:

1688

(pattern, lineno, originalline).

1725

(pattern, lineno, originalline).

1689

This is useful to debug ignore patterns.

1726

This is useful to debug ignore patterns.

1690

"""

1727

"""

1691

1728

1692

syntaxes = {

1729

syntaxes = {

1693

b're': b'relre:',

1730

b're': b'relre:',

1694

b'regexp': b'relre:',

1731

b'regexp': b'relre:',

1695

b'glob': b'relglob:',

1732

b'glob': b'relglob:',

1696

b'rootglob': b'rootglob:',

1733

b'rootglob': b'rootglob:',

1697

b'include': b'include',

1734

b'include': b'include',

1698

b'subinclude': b'subinclude',

1735

b'subinclude': b'subinclude',

1699

}

1736

}

1700

syntax = b'relre:'

1737

syntax = b'relre:'

1701

patterns = []

1738

patterns = []

1702

1739

1703

fp = open(filepath, b'rb')

1740

fp = open(filepath, b'rb')

1704

for lineno, line in enumerate(fp, start=1):

1741

for lineno, line in enumerate(fp, start=1):

1705

if b"#" in line:

1742

if b"#" in line:

1706

global _commentre

1743

global _commentre

1707

if not _commentre:

1744

if not _commentre:

1708

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1745

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1709

# remove comments prefixed by an even number of escapes

1746

# remove comments prefixed by an even number of escapes

1710

m = _commentre.search(line)

1747

m = _commentre.search(line)

1711

if m:

1748

if m:

1712

line = line[: m.end(1)]

1749

line = line[: m.end(1)]

1713

# fixup properly escaped comments that survived the above

1750

# fixup properly escaped comments that survived the above

1714

line = line.replace(b"\\#", b"#")

1751

line = line.replace(b"\\#", b"#")

1715

line = line.rstrip()

1752

line = line.rstrip()

1716

if not line:

1753

if not line:

1717

continue

1754

continue

1718

1755

1719

if line.startswith(b'syntax:'):

1756

if line.startswith(b'syntax:'):

1720

s = line[7:].strip()

1757

s = line[7:].strip()

1721

try:

1758

try:

1722

syntax = syntaxes[s]

1759

syntax = syntaxes[s]

1723

except KeyError:

1760

except KeyError:

1724

if warn:

1761

if warn:

1725

warn(

1762

warn(

1726

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1763

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1727

)

1764

)

1728

continue

1765

continue

1729

1766

1730

linesyntax = syntax

1767

linesyntax = syntax

1731

for s, rels in syntaxes.items():

1768

for s, rels in syntaxes.items():

1732

if line.startswith(rels):

1769

if line.startswith(rels):

1733

linesyntax = rels

1770

linesyntax = rels

1734

line = line[len(rels) :]

1771

line = line[len(rels) :]

1735

break

1772

break

1736

elif line.startswith(s + b':'):

1773

elif line.startswith(s + b':'):

1737

linesyntax = rels

1774

linesyntax = rels

1738

line = line[len(s) + 1 :]

1775

line = line[len(s) + 1 :]

1739

break

1776

break

1740

if sourceinfo:

1777

if sourceinfo:

1741

patterns.append((linesyntax + line, lineno, line))

1778

patterns.append((linesyntax + line, lineno, line))

1742

else:

1779

else:

1743

patterns.append(linesyntax + line)

1780

patterns.append(linesyntax + line)

1744

fp.close()

1781

fp.close()

1745

return patterns

1782

return patterns

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import annotations
             import bisect
             import copy
             import itertools
             import os
             import re
+            import typing
+            from typing import (
+                Any,
+                Callable,
+                List,
+                Tuple,
+                Union,
+                overload,
+            )
             from .i18n import _
             from .pycompat import open
             from . import (
                 encoding,
                 error,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             rustmod = policy.importrust('dirstate')
             allpatternkinds = (
                 b're',
                 b'glob',
                 b'path',
                 b'filepath',
                 b'relglob',
                 b'relpath',
                 b'relre',
                 b'rootglob',
                 b'listfile',
                 b'listfile0',
                 b'set',
                 b'include',
                 b'subinclude',
                 b'rootfilesin',
             )
             cwdrelativepatternkinds = (b'relpath', b'glob')
             propertycache = util.propertycache
             def _rematcher(regex):
                 """compile the regexp with the best available regexp engine and return a
                 matcher function"""
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
                 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
                 matchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'set':
                         if ctx is None:
                             raise error.ProgrammingError(
                                 b"fileset expression with no context"
                             )
                         matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
                         if listsubrepos:
                             for subpath in ctx.substate:
                                 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
                                 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
                                 matchers.append(pm)
                         continue
                     other.append((kind, pat, source))
                 return matchers, other
             def _expandsubinclude(kindpats, root):
                 """Returns the list of subinclude matcher args and the kindpats without the
                 subincludes in it."""
                 relmatchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'subinclude':
                         sourceroot = pathutil.dirname(util.normpath(source))
                         pat = util.pconvert(pat)
                         path = pathutil.join(sourceroot, pat)
                         newroot = pathutil.dirname(path)
                         matcherargs = (newroot, b'', [], [b'include:%s' % path])
                         prefix = pathutil.canonpath(root, root, newroot)
                         if prefix:
                             prefix += b'/'
                         relmatchers.append((prefix, matcherargs))
                     else:
                         other.append((kind, pat, source))
                 return relmatchers, other
             def _kindpatsalwaysmatch(kindpats):
                 """Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat, source in kindpats:
                     if pat != b'' or kind not in [b'relpath', b'glob']:
                         return False
                 return True
             def _buildkindpatsmatcher(
                 matchercls,
                 root,
                 cwd,
                 kindpats,
                 ctx=None,
                 listsubrepos=False,
                 badfn=None,
             ):
                 matchers = []
                 fms, kindpats = _expandsets(
                     cwd,
                     kindpats,
                     ctx=ctx,
                     listsubrepos=listsubrepos,
                     badfn=badfn,
                 )
                 if kindpats:
                     m = matchercls(root, kindpats, badfn=badfn)
                     matchers.append(m)
                 if fms:
                     matchers.extend(fms)
                 if not matchers:
                     return nevermatcher(badfn=badfn)
                 if len(matchers) == 1:
                     return matchers[0]
                 return unionmatcher(matchers)
             def match(
                 root,
                 cwd,
                 patterns=None,
                 include=None,
                 exclude=None,
                 default=b'glob',
                 auditor=None,
                 ctx=None,
                 listsubrepos=False,
                 warn=None,
                 badfn=None,
                 icasefs=False,
             ):
                 r"""build an object to match a set of file patterns
                 arguments:
                 root - the canonical root of the tree you're matching against
                 cwd - the current working directory, if relevant
                 patterns - patterns to find
                 include - patterns to include (unless they are excluded)
                 exclude - patterns to exclude (even if they are included)
                 default - if a pattern in patterns has no explicit type, assume this one
                 auditor - optional path auditor
                 ctx - optional changecontext
                 listsubrepos - if True, recurse into subrepositories
                 warn - optional function used for printing warnings
                 badfn - optional bad() callback for this matcher instead of the default
                 icasefs - make a matcher for wdir on case insensitive filesystems, which
                     normalizes the given patterns to the case in the filesystem
                 a pattern is one of:
                 'glob:<glob>' - a glob relative to cwd
                 're:<regexp>' - a regular expression
                 'path:<path>' - a path relative to repository root, which is matched
                                 recursively
                 'filepath:<path>' - an exact path to a single file, relative to the
                                     repository root
                 'rootfilesin:<path>' - a path relative to repository root, which is
                                 matched non-recursively (will not match subdirectories)
                 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                 'relpath:<path>' - a path relative to cwd
                 'relre:<regexp>' - a regexp that needn't match the start of a name
                 'set:<fileset>' - a fileset expression
                 'include:<path>' - a file of patterns to read and include
                 'subinclude:<path>' - a file of patterns to match against files under
                                       the same directory
                 '<something>' - a pattern of the specified default type
                 >>> def _match(root, *args, **kwargs):
                 ...     return match(util.localpath(root), *args, **kwargs)
                 Usually a patternmatcher is returned:
                 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
                 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
                 Combining 'patterns' with 'include' (resp. 'exclude') gives an
                 intersectionmatcher (resp. a differencematcher):
                 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
                 <class 'mercurial.match.intersectionmatcher'>
                 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
                 <class 'mercurial.match.differencematcher'>
                 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
                 >>> _match(b'/foo', b'.', [])
                 <alwaysmatcher>
                 The 'default' argument determines which kind of pattern is assumed if a
                 pattern has no prefix:
                 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
                 <patternmatcher patterns='.*\\.c$'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
                 <patternmatcher patterns='main\\.py(?:/|$)'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
                 <patternmatcher patterns='main.py'>
                 The primary use of matchers is to check whether a value (usually a file
                 name) matches againset one of the patterns given at initialization. There
                 are two ways of doing this check.
                 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
 . Calling the matcher with a file name returns True if any pattern
                 matches that file name:
                 >>> m(b'a')
                 True
                 >>> m(b'main.c')
                 True
                 >>> m(b'test.py')
                 False
 . Using the exact() method only returns True if the file name matches one
                 of the exact patterns (i.e. not re: or glob: patterns):
                 >>> m.exact(b'a')
                 True
                 >>> m.exact(b'main.c')
                 False
                 """
                 assert os.path.isabs(root)
                 cwd = os.path.join(root, util.localpath(cwd))
                 normalize = _donormalize
                 if icasefs:
                     dirstate = ctx.repo().dirstate
                     dsnormalize = dirstate.normalize
                     def normalize(patterns, default, root, cwd, auditor, warn):
                         kp = _donormalize(patterns, default, root, cwd, auditor, warn)
                         kindpats = []
                         for kind, pats, source in kp:
                             if kind not in (b're', b'relre'):  # regex can't be normalized
                                 p = pats
                                 pats = dsnormalize(pats)
                                 # Preserve the original to handle a case only rename.
                                 if p != pats and p in dirstate:
                                     kindpats.append((kind, p, source))
                             kindpats.append((kind, pats, source))
                         return kindpats
                 if patterns:
                     kindpats = normalize(patterns, default, root, cwd, auditor, warn)
                     if _kindpatsalwaysmatch(kindpats):
                         m = alwaysmatcher(badfn)
                     else:
                         m = _buildkindpatsmatcher(
                             patternmatcher,
                             root,
                             cwd,
                             kindpats,
                             ctx=ctx,
                             listsubrepos=listsubrepos,
                             badfn=badfn,
                         )
                 else:
                     # It's a little strange that no patterns means to match everything.
                     # Consider changing this to match nothing (probably using nevermatcher).
                     m = alwaysmatcher(badfn)
                 if include:
                     kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
                     im = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = intersectmatchers(m, im)
                 if exclude:
                     kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
                     em = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = differencematcher(m, em)
                 return m
             def exact(files, badfn=None):
                 return exactmatcher(files, badfn=badfn)
             def always(badfn=None):
                 return alwaysmatcher(badfn)
             def never(badfn=None):
                 return nevermatcher(badfn)
             def badmatch(match, badfn):
                 """Make a copy of the given matcher, replacing its bad method with the given
                 one.
                 """
                 m = copy.copy(match)
                 m.bad = badfn
                 return m
             def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
                 """Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded."""
                 kindpats = []
                 kinds_to_normalize = (
                     b'relglob',
                     b'path',
                     b'filepath',
                     b'rootfilesin',
                     b'rootglob',
                 )
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in cwdrelativepatternkinds:
                         pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
                     elif kind in kinds_to_normalize:
                         pat = util.normpath(pat)
                     elif kind in (b'listfile', b'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == b'listfile0':
                                 files = files.split(b'\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise error.Abort(_(b"unable to read file list (%s)") % pat)
                         for k, p, source in _donormalize(
                             files, default, root, cwd, auditor, warn
                         ):
                             kindpats.append((k, p, pat))
                         continue
                     elif kind == b'include':
                         try:
                             fullpath = os.path.join(root, util.localpath(pat))
                             includepats = readpatternfile(fullpath, warn)
                             for k, p, source in _donormalize(
                                 includepats, default, root, cwd, auditor, warn
                             ):
                                 kindpats.append((k, p, source or pat))
                         except error.Abort as inst:
                             raise error.Abort(
                                 b'%s: %s'
                                 % (
                                     pat,
                                     inst.message,
                                 )
                             )
                         except IOError as inst:
                             if warn:
                                 warn(
                                     _(b"skipping unreadable pattern file '%s': %s\n")
                                     % (pat, stringutil.forcebytestr(inst.strerror))
                                 )
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat, b''))
                 return kindpats
             class basematcher:
                 def __init__(self, badfn=None):
                     self._was_tampered_with = False
                     if badfn is not None:
                         self.bad = badfn
                 def was_tampered_with_nonrec(self) -> bool:
                     # [_was_tampered_with] is used to track if when extensions changed the matcher
                     # behavior (crazy stuff!), so we disable the rust fast path.
                     return self._was_tampered_with
                 def was_tampered_with(self) -> bool:
                     return self.was_tampered_with_nonrec()
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     """Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message."""
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 @propertycache
                 def _files(self):
                     return []
                 def files(self):
                     """Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots"""
                     return self._files
                 @propertycache
                 def _fileset(self):
                     return set(self._files)
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fileset
                 def matchfn(self, f):
                     return False
                 def visitdir(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories. This is
                     based on the match's primary, included, and excluded patterns.
                     Returns the string 'all' if the given directory and all subdirectories
                     should be visited. Otherwise returns True or False indicating whether
                     the given directory should be visited.
                     """
                     return True
                 def visitchildrenset(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories, and
                     potentially lists which subdirectories of that directory should be
                     visited. This is based on the match's primary, included, and excluded
                     patterns.
                     This function is very similar to 'visitdir', and the following mapping
                     can be applied:
                          visitdir | visitchildrenlist
                         ----------+-------------------
                          False    | set()
                          'all'    | 'all'
                          True     | 'this' OR non-empty set of subdirs -or files- to visit
                     Example:
                       Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
                       the following values (assuming the implementation of visitchildrenset
                       is capable of recognizing this; some implementations are not).
                       '' -> {'foo', 'qux'}
                       'baz' -> set()
                       'foo' -> {'bar'}
                       # Ideally this would be 'all', but since the prefix nature of matchers
                       # is applied to the entire matcher, we have to downgrade this to
                       # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
                       # in.
                       'foo/bar' -> 'this'
                       'qux' -> 'this'
                     Important:
                       Most matchers do not know if they're representing files or
                       directories. They see ['path:dir/f'] and don't know whether 'f' is a
                       file or a directory, so visitchildrenset('dir') for most matchers will
                       return {'f'}, but if the matcher knows it's a file (like exactmatcher
                       does), it may return 'this'. Do not rely on the return being a set
                       indicating that there are no files in this dir to investigate (or
                       equivalently that if there are files to investigate in 'dir' that it
                       will always return 'this').
                     """
                     return b'this'
                 def always(self):
                     """Matcher will match everything and .files() will be empty --
                     optimization might be possible."""
                     return False
                 def isexact(self):
                     """Matcher will match exactly the list of files in .files() --
                     optimization might be possible."""
                     return False
                 def prefix(self):
                     """Matcher will match the paths in .files() recursively --
                     optimization might be possible."""
                     return False
                 def anypats(self):
                     """None of .always(), .isexact(), and .prefix() is true --
                     optimizations will be difficult."""
                     return not self.always() and not self.isexact() and not self.prefix()
             class alwaysmatcher(basematcher):
                 '''Matches everything.'''
                 def __init__(self, badfn=None):
                     super(alwaysmatcher, self).__init__(badfn)
                 def always(self):
                     return True
                 def matchfn(self, f):
                     return True
                 def visitdir(self, dir):
                     return b'all'
                 def visitchildrenset(self, dir):
                     return b'all'
                 def __repr__(self):
                     return r'<alwaysmatcher>'
             class nevermatcher(basematcher):
                 '''Matches nothing.'''
                 def __init__(self, badfn=None):
                     super(nevermatcher, self).__init__(badfn)
                 # It's a little weird to say that the nevermatcher is an exact matcher
                 # or a prefix matcher, but it seems to make sense to let callers take
                 # fast paths based on either. There will be no exact matches, nor any
                 # prefixes (files() returns []), so fast paths iterating over them should
                 # be efficient (and correct).
                 def isexact(self):
                     return True
                 def prefix(self):
                     return True
                 def visitdir(self, dir):
                     return False
                 def visitchildrenset(self, dir):
                     return set()
                 def __repr__(self):
                     return r'<nevermatcher>'
             class predicatematcher(basematcher):
                 """A matcher adapter for a simple boolean function"""
                 def __init__(self, predfn, predrepr=None, badfn=None):
                     super(predicatematcher, self).__init__(badfn)
                     self.matchfn = predfn
                     self._predrepr = predrepr
                 @encoding.strmethod
                 def __repr__(self):
                     s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
                         self.matchfn
                     )
                     return b'<predicatenmatcher pred=%s>' % s
             def path_or_parents_in_set(path, prefix_set):
                 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
                 l = len(prefix_set)
                 if l == 0:
                     return False
                 if path in prefix_set:
                     return True
                 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
                 # "walk up" the directory hierarchy instead, with the assumption that most
                 # directory hierarchies are relatively shallow and hash lookup is cheap.
                 if l > 5:
                     return any(
                         parentdir in prefix_set for parentdir in pathutil.finddirs(path)
                     )
                 # FIXME: Ideally we'd never get to this point if this is the case - we'd
                 # recognize ourselves as an 'always' matcher and skip this.
                 if b'' in prefix_set:
                     return True
                 sl = ord(b'/')
                 # We already checked that path isn't in prefix_set exactly, so
                 # `path[len(pf)] should never raise IndexError.
                 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
             class patternmatcher(basematcher):
                 r"""Matches a set of (kind, pat, source) against a 'root' directory.
                 >>> kindpats = [
                 ...     (b're', br'.*\.c$', b''),
                 ...     (b'path', b'foo/a', b''),
                 ...     (b'relpath', b'b', b''),
                 ...     (b'glob', b'*.h', b''),
                 ... ]
                 >>> m = patternmatcher(b'foo', kindpats)
                 >>> m(b'main.c')  # matches re:.*\.c$
                 True
                 >>> m(b'b.txt')
                 False
                 >>> m(b'foo/a')  # matches path:foo/a
                 True
                 >>> m(b'a')  # does not match path:b, since 'root' is 'foo'
                 False
                 >>> m(b'b')  # matches relpath:b, since 'root' is 'foo'
                 True
                 >>> m(b'lib.h')  # matches glob:*.h
                 True
                 >>> m.files()
                 [b'', b'foo/a', b'', b'b']
                 >>> m.exact(b'foo/a')
                 True
                 >>> m.exact(b'b')
                 True
                 >>> m.exact(b'lib.h')  # exact matches are for (rel)path kinds
                 False
                 """
                 def __init__(self, root, kindpats, badfn=None):
                     super(patternmatcher, self).__init__(badfn)
                     kindpats.sort()
                     if rustmod is not None:
                         # We need to pass the patterns to Rust because they can contain
                         # patterns from the user interface
                         self._kindpats = kindpats
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     self._files = _explicitfiles(kindpats)
                     self._dirs_explicit = set(dirs)
                     self._dirs = parents
                     self._prefix = _prefix(kindpats)
                     self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
                 def matchfn(self, fn):
                     if fn in self._fileset:
                         return True
                     return self._matchfn(fn)
                 def visitdir(self, dir):
                     if self._prefix and dir in self._fileset:
                         return b'all'
                     return (
                         dir in self._dirs
                         or path_or_parents_in_set(dir, self._fileset)
                         or path_or_parents_in_set(dir, self._dirs_explicit)
                     )
                 def visitchildrenset(self, dir):
                     ret = self.visitdir(dir)
                     if ret is True:
                         return b'this'
                     elif not ret:
                         return set()
                     assert ret == b'all'
                     return b'all'
                 def prefix(self):
                     return self._prefix
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
             # This is basically a reimplementation of pathutil.dirs that stores the
             # children instead of just a count of them, plus a small optional optimization
             # to avoid some directories we don't need.
             class _dirchildren:
                 def __init__(self, paths, onlyinclude=None):
                     self._dirs = {}
                     self._onlyinclude = onlyinclude or []
                     addpath = self.addpath
                     for f in paths:
                         addpath(f)
                 def addpath(self, path):
                     if path == b'':
                         return
                     dirs = self._dirs
                     findsplitdirs = _dirchildren._findsplitdirs
                     for d, b in findsplitdirs(path):
                         if d not in self._onlyinclude:
                             continue
                         dirs.setdefault(d, set()).add(b)
                 @staticmethod
                 def _findsplitdirs(path):
                     # yields (dirname, basename) tuples, walking back to the root.  This is
                     # very similar to pathutil.finddirs, except:
                     #  - produces a (dirname, basename) tuple, not just 'dirname'
                     # Unlike manifest._splittopdir, this does not suffix `dirname` with a
                     # slash.
                     oldpos = len(path)
                     pos = path.rfind(b'/')
                     while pos != -1:
                         yield path[:pos], path[pos + 1 : oldpos]
                         oldpos = pos
                         pos = path.rfind(b'/', 0, pos)
                     yield b'', path[:oldpos]
                 def get(self, path):
                     return self._dirs.get(path, set())
             class includematcher(basematcher):
                 def __init__(self, root, kindpats, badfn=None):
                     super(includematcher, self).__init__(badfn)
                     if rustmod is not None:
                         # We need to pass the patterns to Rust because they can contain
                         # patterns from the user interface
                         self._kindpats = kindpats
                     self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
                     self._prefix = _prefix(kindpats)
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     # roots are directories which are recursively included.
                     self._roots = set(roots)
                     # dirs are directories which are non-recursively included.
                     self._dirs = set(dirs)
                     # parents are directories which are non-recursively included because
                     # they are needed to get to items in _dirs or _roots.
                     self._parents = parents
                 def visitdir(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     return (
                         dir in self._dirs
                         or dir in self._parents
                         or path_or_parents_in_set(dir, self._roots)
                     )
                 @propertycache
                 def _allparentschildren(self):
                     # It may seem odd that we add dirs, roots, and parents, and then
                     # restrict to only parents. This is to catch the case of:
                     #   dirs = ['foo/bar']
                     #   parents = ['foo']
                     # if we asked for the children of 'foo', but had only added
                     # self._parents, we wouldn't be able to respond ['bar'].
                     return _dirchildren(
                         itertools.chain(self._dirs, self._roots, self._parents),
                         onlyinclude=self._parents,
                     )
                 def visitchildrenset(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     # Note: this does *not* include the 'dir in self._parents' case from
                     # visitdir, that's handled below.
                     if (
                         b'' in self._roots
                         or dir in self._dirs
                         or path_or_parents_in_set(dir, self._roots)
                     ):
                         return b'this'
                     if dir in self._parents:
                         return self._allparentschildren.get(dir) or set()
                     return set()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
             class exactmatcher(basematcher):
                 r"""Matches the input files exactly. They are interpreted as paths, not
                 patterns (so no kind-prefixes).
                 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
                 >>> m(b'a.txt')
                 True
                 >>> m(b'b.txt')
                 False
                 Input files that would be matched are exactly those returned by .files()
                 >>> m.files()
                 ['a.txt', 're:.*\\.c$']
                 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
                 >>> m(b'main.c')
                 False
                 >>> m(br're:.*\.c$')
                 True
                 """
                 def __init__(self, files, badfn=None):
                     super(exactmatcher, self).__init__(badfn)
                     if isinstance(files, list):
                         self._files = files
                     else:
                         self._files = list(files)
                 matchfn = basematcher.exact
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     return dir in self._dirs
                 @propertycache
                 def _visitchildrenset_candidates(self):
                     """A memoized set of candidates for visitchildrenset."""
                     return self._fileset | self._dirs - {b''}
                 @propertycache
                 def _sorted_visitchildrenset_candidates(self):
                     """A memoized sorted list of candidates for visitchildrenset."""
                     return sorted(self._visitchildrenset_candidates)
                 def visitchildrenset(self, dir):
                     if not self._fileset or dir not in self._dirs:
                         return set()
                     if dir == b'':
                         candidates = self._visitchildrenset_candidates
                     else:
                         candidates = self._sorted_visitchildrenset_candidates
                         d = dir + b'/'
                         # Use bisect to find the first element potentially starting with d
                         # (i.e. >= d). This should always find at least one element (we'll
                         # assert later if this is not the case).
                         first = bisect.bisect_left(candidates, d)
                         # We need a representation of the first element that is > d that
                         # does not start with d, so since we added a `/` on the end of dir,
                         # we'll add whatever comes after slash (we could probably assume
                         # that `0` is after `/`, but let's not) to the end of dir instead.
                         dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
                         # Use bisect to find the first element >= d_next
                         last = bisect.bisect_left(candidates, dnext, lo=first)
                         dlen = len(d)
                         candidates = {c[dlen:] for c in candidates[first:last]}
                     # self._dirs includes all of the directories, recursively, so if
                     # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
                     # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     # '/' in it, indicating a it's for a subdir-of-a-subdir; the
                     # immediate subdir will be in there without a slash.
                     ret = {c for c in candidates if b'/' not in c}
                     # We really do not expect ret to be empty, since that would imply that
                     # there's something in _dirs that didn't have a file in _fileset.
                     assert ret
                     return ret
                 def isexact(self):
                     return True
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<exactmatcher files=%r>' % self._files
             class differencematcher(basematcher):
                 """Composes two matchers by matching if the first matches and the second
                 does not.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 def __init__(self, m1, m2):
                     super(differencematcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 def was_tampered_with(self) -> bool:
                     return (
                         self.was_tampered_with_nonrec()
                         or self._m1.was_tampered_with()
                         or self._m2.was_tampered_with()
                     )
                 def matchfn(self, f):
                     return self._m1(f) and not self._m2(f)
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         return [f for f in self._m1.files() if self(f)]
                     # If m1 is not an exact matcher, we can't easily figure out the set of
                     # files, because its files() are not always files. For example, if
                     # m1 is "path:dir" and m2 is "rootfileins:.", we don't
                     # want to remove "dir" from the set even though it would match m2,
                     # because the "dir" in m1 may not be a file.
                     return self._m1.files()
                 def visitdir(self, dir):
                     if self._m2.visitdir(dir) == b'all':
                         return False
                     elif not self._m2.visitdir(dir):
                         # m2 does not match dir, we can return 'all' here if possible
                         return self._m1.visitdir(dir)
                     return bool(self._m1.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m2_set = self._m2.visitchildrenset(dir)
                     if m2_set == b'all':
                         return set()
                     m1_set = self._m1.visitchildrenset(dir)
                     # Possible values for m1: 'all', 'this', set(...), set()
                     # Possible values for m2:        'this', set(...), set()
                     # If m2 has nothing under here that we care about, return m1, even if
                     # it's 'all'. This is a change in behavior from visitdir, which would
                     # return True, not 'all', for some reason.
                     if not m2_set:
                         return m1_set
                     if m1_set in [b'all', b'this']:
                         # Never return 'all' here if m2_set is any kind of non-empty (either
                         # 'this' or set(foo)), since m2 might return set() for a
                         # subdirectory.
                         return b'this'
                     # Possible values for m1:         set(...), set()
                     # Possible values for m2: 'this', set(...)
                     # We ignore m2's set results. They're possibly incorrect:
                     #  m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
                     #    m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
                     #    return set(), which is *not* correct, we still need to visit 'dir'!
                     return m1_set
                 def isexact(self):
                     return self._m1.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
             def intersectmatchers(m1, m2):
                 """Composes two matchers by matching if both of them match.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 if m1 is None or m2 is None:
                     return m1 or m2
                 if m1.always():
                     m = copy.copy(m2)
                     # TODO: Consider encapsulating these things in a class so there's only
                     # one thing to copy from m1.
                     m.bad = m1.bad
                     m.traversedir = m1.traversedir
                     return m
                 if m2.always():
                     m = copy.copy(m1)
                     return m
                 return intersectionmatcher(m1, m2)
             class intersectionmatcher(basematcher):
                 def __init__(self, m1, m2):
                     super(intersectionmatcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 def was_tampered_with(self) -> bool:
                     return (
                         self.was_tampered_with_nonrec()
                         or self._m1.was_tampered_with()
                         or self._m2.was_tampered_with()
                     )
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         m1, m2 = self._m1, self._m2
                         if not m1.isexact():
                             m1, m2 = m2, m1
                         return [f for f in m1.files() if m2(f)]
                     # It neither m1 nor m2 is an exact matcher, we can't easily intersect
                     # the set of files, because their files() are not always files. For
                     # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
                     # "path:dir2", we don't want to remove "dir2" from the set.
                     return self._m1.files() + self._m2.files()
                 def matchfn(self, f):
                     return self._m1(f) and self._m2(f)
                 def visitdir(self, dir):
                     visit1 = self._m1.visitdir(dir)
                     if visit1 == b'all':
                         return self._m2.visitdir(dir)
                     # bool() because visit1=True + visit2='all' should not be 'all'
                     return bool(visit1 and self._m2.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m1_set = self._m1.visitchildrenset(dir)
                     if not m1_set:
                         return set()
                     m2_set = self._m2.visitchildrenset(dir)
                     if not m2_set:
                         return set()
                     if m1_set == b'all':
                         return m2_set
                     elif m2_set == b'all':
                         return m1_set
                     if m1_set == b'this' or m2_set == b'this':
                         return b'this'
                     assert isinstance(m1_set, set) and isinstance(m2_set, set)
                     return m1_set.intersection(m2_set)
                 def always(self):
                     return self._m1.always() and self._m2.always()
                 def isexact(self):
                     return self._m1.isexact() or self._m2.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
             class subdirmatcher(basematcher):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> from . import pycompat
                 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
                 >>> m2 = subdirmatcher(b'sub', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'b.txt')
                 True
                 >>> m2.matchfn(b'a.txt')
                 False
                 >>> m2.matchfn(b'b.txt')
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact(b'b.txt')
                 True
                 >>> def bad(f, msg):
                 ...     print(pycompat.sysstr(b"%s: %s" % (f, msg)))
                 >>> m1.bad = bad
                 >>> m2.bad(b'x.txt', b'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path: bytes, matcher: basematcher) -> None:
                     super(subdirmatcher, self).__init__()
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher.always()
                     self._files = [
                         f[len(path) + 1 :]
                         for f in matcher._files
                         if f.startswith(path + b"/")
                     ]
                     # If the parent repo had a path to this subrepo and the matcher is
                     # a prefix matcher, this submatcher always matches.
                     if matcher.prefix():
                         self._always = any(f == path for f in matcher._files)
                 def was_tampered_with(self) -> bool:
                     return (
                         self.was_tampered_with_nonrec() or self._matcher.was_tampered_with()
                     )
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + b"/" + f, msg)
                 def matchfn(self, f):
                     # Some information is lost in the superclass's constructor, so we
                     # can not accurately create the matching function for the subdirectory
                     # from the inputs. Instead, we override matchfn() and visitdir() to
                     # call the original matcher with the subdirectory path prepended.
                     return self._matcher.matchfn(self._path + b"/" + f)
                 def visitdir(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitdir(dir)
                 def visitchildrenset(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitchildrenset(dir)
                 def always(self):
                     return self._always
                 def prefix(self):
                     return self._matcher.prefix() and not self._always
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<subdirmatcher path=%r, matcher=%r>' % (
                         self._path,
                         self._matcher,
                     )
             class prefixdirmatcher(basematcher):
                 """Adapt a matcher to work on a parent directory.
                 The matcher's non-matching-attributes (bad, traversedir) are ignored.
                 The prefix path should usually be the relative path from the root of
                 this matcher to the root of the wrapped matcher.
                 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
                 >>> m2 = prefixdirmatcher(b'd/e', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'd/e/a.txt')
                 True
                 >>> m2(b'd/e/b.txt')
                 False
                 >>> m2.files()
                 ['d/e/a.txt', 'd/e/f/b.txt']
                 >>> m2.exact(b'd/e/a.txt')
                 True
                 >>> m2.visitdir(b'd')
                 True
                 >>> m2.visitdir(b'd/e')
                 True
                 >>> m2.visitdir(b'd/e/f')
                 True
                 >>> m2.visitdir(b'd/e/g')
                 False
                 >>> m2.visitdir(b'd/ef')
                 False
                 """
                 def __init__(self, path, matcher, badfn=None):
                     super(prefixdirmatcher, self).__init__(badfn)
                     if not path:
                         raise error.ProgrammingError(b'prefix path must not be empty')
                     self._path = path
                     self._pathprefix = path + b'/'
                     self._matcher = matcher
                 @propertycache
                 def _files(self):
                     return [self._pathprefix + f for f in self._matcher._files]
                 def matchfn(self, f):
                     if not f.startswith(self._pathprefix):
                         return False
                     return self._matcher.matchfn(f[len(self._pathprefix) :])
                 @propertycache
                 def _pathdirs(self):
                     return set(pathutil.finddirs(self._path))
                 def visitdir(self, dir):
                     if dir == self._path:
                         return self._matcher.visitdir(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitdir(dir[len(self._pathprefix) :])
                     return dir in self._pathdirs
                 def visitchildrenset(self, dir):
                     if dir == self._path:
                         return self._matcher.visitchildrenset(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
                     if dir in self._pathdirs:
                         return b'this'
                     return set()
                 def isexact(self):
                     return self._matcher.isexact()
                 def prefix(self):
                     return self._matcher.prefix()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<prefixdirmatcher path=%r, matcher=%r>' % (
                         pycompat.bytestr(self._path),
                         self._matcher,
                     )
             class unionmatcher(basematcher):
                 """A matcher that is the union of several matchers.
                 The non-matching-attributes (bad, traversedir) are taken from the first
                 matcher.
                 """
                 def __init__(self, matchers):
                     m1 = matchers[0]
                     super(unionmatcher, self).__init__()
                     self.traversedir = m1.traversedir
                     self._matchers = matchers
                 def was_tampered_with(self) -> bool:
                     return self.was_tampered_with_nonrec() or any(
                         map(lambda m: m.was_tampered_with(), self._matchers)
                     )
                 def matchfn(self, f):
                     for match in self._matchers:
                         if match(f):
                             return True
                     return False
                 def visitdir(self, dir):
                     r = False
                     for m in self._matchers:
                         v = m.visitdir(dir)
                         if v == b'all':
                             return v
                         r |= v
                     return r
                 def visitchildrenset(self, dir):
                     r = set()
                     this = False
                     for m in self._matchers:
                         v = m.visitchildrenset(dir)
                         if not v:
                             continue
                         if v == b'all':
                             return v
                         if this or v == b'this':
                             this = True
                             # don't break, we might have an 'all' in here.
                             continue
                         assert isinstance(v, set)
                         r = r.union(v)
                     if this:
                         return b'this'
                     return r
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<unionmatcher matchers=%r>' % self._matchers
             def patkind(pattern, default=None):
                 r"""If pattern is 'kind:pat' with a known kind, return kind.
                 >>> patkind(br're:.*\.c$')
                 're'
                 >>> patkind(b'glob:*.c')
                 'glob'
                 >>> patkind(b'relpath:test.py')
                 'relpath'
                 >>> patkind(b'main.py')
                 >>> patkind(b'main.py', default=b're')
                 're'
                 """
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if b':' in pattern:
                     kind, pat = pattern.split(b':', 1)
                     if kind in allpatternkinds:
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r"""Convert an extended glob string to a regexp string.
                 >>> from . import pycompat
                 >>> def bprint(s):
                 ...     print(pycompat.sysstr(s))
                 >>> bprint(_globre(br'?'))
                 .
                 >>> bprint(_globre(br'*'))
                 [^/]*
                 >>> bprint(_globre(br'**'))
                 .*
                 >>> bprint(_globre(br'**/a'))
                 (?:.*/)?a
                 >>> bprint(_globre(br'a/**/b'))
                 a/(?:.*/)?b
                 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
                 [a*?!^][\^b][^c]
                 >>> bprint(_globre(br'{a,b}'))
                 (?:a|b)
                 >>> bprint(_globre(br'.\*\?'))
                 \.\*\?
                 """
                 i, n = 0, len(pat)
                 res = b''
                 group = 0
                 escape = util.stringutil.regexbytesescapemap.get
                 def peek():
                     return i < n and pat[i : i + 1]
                 while i < n:
                     c = pat[i : i + 1]
                     i += 1
                     if c not in b'*?[{},\\':
                         res += escape(c, c)
                     elif c == b'*':
                         if peek() == b'*':
                             i += 1
                             if peek() == b'/':
                                 i += 1
                                 res += b'(?:.*/)?'
                             else:
                                 res += b'.*'
                         else:
                             res += b'[^/]*'
                     elif c == b'?':
                         res += b'.'
                     elif c == b'[':
                         j = i
                         if j < n and pat[j : j + 1] in b'!]':
                             j += 1
                         while j < n and pat[j : j + 1] != b']':
                             j += 1
                         if j >= n:
                             res += b'\\['
                         else:
                             stuff = pat[i:j].replace(b'\\', b'\\\\')
                             i = j + 1
                             if stuff[0:1] == b'!':
                                 stuff = b'^' + stuff[1:]
                             elif stuff[0:1] == b'^':
                                 stuff = b'\\' + stuff
                             res = b'%s[%s]' % (res, stuff)
                     elif c == b'{':
                         group += 1
                         res += b'(?:'
                     elif c == b'}' and group:
                         res += b')'
                         group -= 1
                     elif c == b',' and group:
                         res += b'|'
                     elif c == b'\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p, p)
                         else:
                             res += escape(c, c)
                     else:
                         res += escape(c, c)
                 return res
             FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
             def _regex(kind, pat, globsuffix):
                 """Convert a (normalized) pattern of any kind into a
                 regular expression.
                 globsuffix is appended to the regexp of globs."""
                 if not pat and kind in (b'glob', b'relpath'):
                     return b''
                 if kind == b're':
                     return pat
                 if kind == b'filepath':
                     raise error.ProgrammingError(
                         "'filepath:' patterns should not be converted to a regex"
                     )
                 if kind in (b'path', b'relpath'):
                     if pat == b'.':
                         return b''
                     return util.stringutil.reescape(pat) + b'(?:/|$)'
                 if kind == b'rootfilesin':
                     if pat == b'.':
                         escaped = b''
                     else:
                         # Pattern is a directory name.
                         escaped = util.stringutil.reescape(pat) + b'/'
                     # Anything after the pattern must be a non-directory.
                     return escaped + b'[^/]+$'
                 if kind == b'relglob':
                     globre = _globre(pat)
                     if globre.startswith(b'[^/]*'):
                         # When pat has the form *XYZ (common), make the returned regex more
                         # legible by returning the regex for **XYZ instead of **/*XYZ.
                         return b'.*' + globre[len(b'[^/]*') :] + globsuffix
                     return b'(?:|.*/)' + globre + globsuffix
                 if kind == b'relre':
                     flag = None
                     m = FLAG_RE.match(pat)
                     if m:
                         flag, pat = m.groups()
                     if not pat.startswith(b'^'):
                         pat = b'.*' + pat
                     if flag is not None:
                         pat = br'(?%s:%s)' % (flag, pat)
                     return pat
                 if kind in (b'glob', b'rootglob'):
                     return _globre(pat) + globsuffix
                 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
             def _buildmatch(kindpats, globsuffix, root):
                 """Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs."""
                 matchfuncs = []
                 subincludes, kindpats = _expandsubinclude(kindpats, root)
                 if subincludes:
                     submatchers = {}
                     def matchsubinclude(f):
                         for prefix, matcherargs in subincludes:
                             if f.startswith(prefix):
                                 mf = submatchers.get(prefix)
                                 if mf is None:
                                     mf = match(*matcherargs)
                                     submatchers[prefix] = mf
                                 if mf(f[len(prefix) :]):
                                     return True
                         return False
                     matchfuncs.append(matchsubinclude)
                 regex = b''
                 if kindpats:
                     if all(k == b'rootfilesin' for k, p, s in kindpats):
                         dirs = {p for k, p, s in kindpats}
                         def mf(f):
                             i = f.rfind(b'/')
                             if i >= 0:
                                 dir = f[:i]
                             else:
                                 dir = b'.'
                             return dir in dirs
                         regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
                         matchfuncs.append(mf)
                     else:
                         regex, mf = _buildregexmatch(kindpats, globsuffix)
                         matchfuncs.append(mf)
                 if len(matchfuncs) == 1:
                     return regex, matchfuncs[0]
                 else:
                     return regex, lambda f: any(mf(f) for mf in matchfuncs)
             MAX_RE_SIZE = 20000
             def _joinregexes(regexps):
                 """gather multiple regular expressions into a single one"""
                 return b'|'.join(regexps)
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function.
                 Test too large input
                 >>> _buildregexmatch([
                 ...     (b'relglob', b'?' * MAX_RE_SIZE, b'')
                 ... ], b'$')
                 Traceback (most recent call last):
                 ...
                 Abort: matcher pattern is too long (20009 bytes)
                 """
                 try:
                     allgroups = []
                     regexps = []
                     exact = set()
                     for kind, pattern, _source in kindpats:
                         if kind == b'filepath':
                             exact.add(pattern)
                             continue
                         regexps.append(_regex(kind, pattern, globsuffix))
                     fullregexp = _joinregexes(regexps)
                     startidx = 0
                     groupsize = 0
                     for idx, r in enumerate(regexps):
                         piecesize = len(r)
                         if piecesize > MAX_RE_SIZE:
                             msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
                             raise error.Abort(msg)
                         elif (groupsize + piecesize) > MAX_RE_SIZE:
                             group = regexps[startidx:idx]
                             allgroups.append(_joinregexes(group))
                             startidx = idx
                             groupsize = 0
                         groupsize += piecesize + 1
                     if startidx == 0:
                         matcher = _rematcher(fullregexp)
                         func = lambda s: bool(matcher(s))
                     else:
                         group = regexps[startidx:]
                         allgroups.append(_joinregexes(group))
                         allmatchers = [_rematcher(g) for g in allgroups]
                         func = lambda s: any(m(s) for m in allmatchers)
                     actualfunc = func
                     if exact:
                         # An empty regex will always match, so only call the regex if
                         # there were any actual patterns to match.
                         if not regexps:
                             actualfunc = lambda s: s in exact
                         else:
                             actualfunc = lambda s: s in exact or func(s)
                     return fullregexp, actualfunc
                 except re.error:
                     for k, p, s in kindpats:
                         if k == b'filepath':
                             continue
                         try:
                             _rematcher(_regex(k, p, globsuffix))
                         except re.error:
                             if s:
                                 raise error.Abort(
                                     _(b"%s: invalid pattern (%s): %s") % (s, k, p)
                                 )
                             else:
                                 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
                     raise error.Abort(_(b"invalid pattern"))
             def _patternrootsanddirs(kindpats):
                 """Returns roots and directories corresponding to each pattern.
                 This calculates the roots and directories exactly matching the patterns and
                 returns a tuple of (roots, dirs) for each. It does not return other
                 directories which may also need to be considered, like the parent
                 directories.
                 """
                 r = []
                 d = []
                 for kind, pat, source in kindpats:
                     if kind in (b'glob', b'rootglob'):  # find the non-glob prefix
                         root = []
                         for p in pat.split(b'/'):
                             if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
                                 break
                             root.append(p)
                         r.append(b'/'.join(root))
                     elif kind in (b'relpath', b'path', b'filepath'):
                         if pat == b'.':
                             pat = b''
                         r.append(pat)
                     elif kind in (b'rootfilesin',):
                         if pat == b'.':
                             pat = b''
                         d.append(pat)
                     else:  # relglob, re, relre
                         r.append(b'')
                 return r, d
             def _roots(kindpats):
                 '''Returns root directories to match recursively from the given patterns.'''
                 roots, dirs = _patternrootsanddirs(kindpats)
                 return roots
             def _rootsdirsandparents(kindpats):
                 """Returns roots and exact directories from patterns.
                 `roots` are directories to match recursively, `dirs` should
                 be matched non-recursively, and `parents` are the implicitly required
                 directories to walk to items in either roots or dirs.
                 Returns a tuple of (roots, dirs, parents).
                 >>> r = _rootsdirsandparents(
                 ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
                 ...      (b'glob', b'g*', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['g/h', 'g/h', ''], []) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 ([], ['g/h', '']) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
                 ...      (b'path', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['r', 'p/p', ''], []) ['', 'p']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
                 ...      (b'relre', b'rr', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['', '', ''], []) ['']
                 """
                 r, d = _patternrootsanddirs(kindpats)
                 p = set()
                 # Add the parents as non-recursive/exact directories, since they must be
                 # scanned to get to either the roots or the other exact directories.
                 p.update(pathutil.dirs(d))
                 p.update(pathutil.dirs(r))
                 # FIXME: all uses of this function convert these to sets, do so before
                 # returning.
                 # FIXME: all uses of this function do not need anything in 'roots' and
                 # 'dirs' to also be in 'parents', consider removing them before returning.
                 return r, d, p
             def _explicitfiles(kindpats):
                 """Returns the potential explicit filenames from the patterns.
                 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
                 ['foo/bar']
                 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
                 []
                 """
                 # Keep only the pattern kinds where one can specify filenames (vs only
                 # directory names).
                 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
                 return _roots(filable)
             def _prefix(kindpats):
                 '''Whether all the patterns match a prefix (i.e. recursively)'''
                 for kind, pat, source in kindpats:
                     if kind not in (b'path', b'relpath'):
                         return False
                 return True
             _commentre = None
+            if typing.TYPE_CHECKING:
+                from typing_extensions import (
+                    Literal,
+                )
+                @overload
+                def readpatternfile(
+                    filepath: bytes, warn: Callable[[bytes], Any], sourceinfo: Literal[True]
+                ) -> List[Tuple[bytes, int, bytes]]:
+                    ...
+                @overload
+                def readpatternfile(
+                    filepath: bytes,
+                    warn: Callable[[bytes], Any],
+                    sourceinfo: Literal[False],
+                ) -> List[bytes]:
+                    ...
+                @overload
+                def readpatternfile(
+                    filepath: bytes,
+                    warn: Callable[[bytes], Any],
+                    sourceinfo: bool = False,
+                ) -> List[Union[Tuple[bytes, int, bytes], bytes]]:
+                    ...
             def readpatternfile(filepath, warn, sourceinfo=False):
                 """parse a pattern file, returning a list of
                 patterns. These patterns should be given to compile()
                 to be validated and converted into a match function.
                 trailing white space is dropped.
                 the escape character is backslash.
                 comments start with #.
                 empty lines are skipped.
                 lines can be of the following formats:
                 syntax: regexp # defaults following lines to non-rooted regexps
                 syntax: glob   # defaults following lines to non-rooted globs
                 re:pattern     # non-rooted regular expression
                 glob:pattern   # non-rooted glob
                 rootglob:pat   # rooted glob (same root as ^ in regexps)
                 pattern        # pattern of the current default type
                 if sourceinfo is set, returns a list of tuples:
                 (pattern, lineno, originalline).
                 This is useful to debug ignore patterns.
                 """
                 syntaxes = {
                     b're': b'relre:',
                     b'regexp': b'relre:',
                     b'glob': b'relglob:',
                     b'rootglob': b'rootglob:',
                     b'include': b'include',
                     b'subinclude': b'subinclude',
                 }
                 syntax = b'relre:'
                 patterns = []
                 fp = open(filepath, b'rb')
                 for lineno, line in enumerate(fp, start=1):
                     if b"#" in line:
                         global _commentre
                         if not _commentre:
                             _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
                         # remove comments prefixed by an even number of escapes
                         m = _commentre.search(line)
                         if m:
                             line = line[: m.end(1)]
                         # fixup properly escaped comments that survived the above
                         line = line.replace(b"\\#", b"#")
                     line = line.rstrip()
                     if not line:
                         continue
                     if line.startswith(b'syntax:'):
                         s = line[7:].strip()
                         try:
                             syntax = syntaxes[s]
                         except KeyError:
                             if warn:
                                 warn(
                                     _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
                                 )
                         continue
                     linesyntax = syntax
                     for s, rels in syntaxes.items():
                         if line.startswith(rels):
                             linesyntax = rels
                             line = line[len(rels) :]
                             break
                         elif line.startswith(s + b':'):
                             linesyntax = rels
                             line = line[len(s) + 1 :]
                             break
                     if sourceinfo:
                         patterns.append((linesyntax + line, lineno, line))
                     else:
                         patterns.append(linesyntax + line)
                 fp.close()
                 return patterns