upstream/mercurial-mirror Commit - r47634:8bca353b

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import bisect

10

import copy

11

import copy

11

import itertools

12

import itertools

12

import os

13

import os

13

import re

14

import re

14

15

from .i18n import _

16

from .i18n import _

16

from .pycompat import open

17

from .pycompat import open

17

from . import (

18

from . import (

18

encoding,

19

encoding,

19

error,

20

error,

20

pathutil,

21

pathutil,

21

policy,

22

policy,

22

pycompat,

23

pycompat,

23

util,

24

util,

24

)

25

)

25

from .utils import stringutil

26

from .utils import stringutil

26

27

rustmod = policy.importrust('dirstate')

28

rustmod = policy.importrust('dirstate')

28

29

allpatternkinds = (

30

allpatternkinds = (

30

b're',

31

b're',

31

b'glob',

32

b'glob',

32

b'path',

33

b'path',

33

b'relglob',

34

b'relglob',

34

b'relpath',

35

b'relpath',

35

b'relre',

36

b'relre',

36

b'rootglob',

37

b'rootglob',

37

b'listfile',

38

b'listfile',

38

b'listfile0',

39

b'listfile0',

39

b'set',

40

b'set',

40

b'include',

41

b'include',

41

b'subinclude',

42

b'subinclude',

42

b'rootfilesin',

43

b'rootfilesin',

43

)

44

)

44

cwdrelativepatternkinds = (b'relpath', b'glob')

45

cwdrelativepatternkinds = (b'relpath', b'glob')

45

46

propertycache = util.propertycache

47

propertycache = util.propertycache

47

48

49

def _rematcher(regex):

50

def _rematcher(regex):

50

"""compile the regexp with the best available regexp engine and return a

51

"""compile the regexp with the best available regexp engine and return a

51

matcher function"""

52

matcher function"""

52

m = util.re.compile(regex)

53

m = util.re.compile(regex)

53

try:

54

try:

54

# slightly faster, provided by facebook's re2 bindings

55

# slightly faster, provided by facebook's re2 bindings

55

return m.test_match

56

return m.test_match

56

except AttributeError:

57

except AttributeError:

57

return m.match

58

return m.match

58

59

60

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

61

def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

62

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

62

matchers = []

63

matchers = []

63

other = []

64

other = []

64

65

for kind, pat, source in kindpats:

66

for kind, pat, source in kindpats:

66

if kind == b'set':

67

if kind == b'set':

67

if ctx is None:

68

if ctx is None:

68

raise error.ProgrammingError(

69

raise error.ProgrammingError(

69

b"fileset expression with no context"

70

b"fileset expression with no context"

70

)

71

)

71

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

72

matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))

72

73

if listsubrepos:

74

if listsubrepos:

74

for subpath in ctx.substate:

75

for subpath in ctx.substate:

75

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

76

sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

77

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

77

matchers.append(pm)

78

matchers.append(pm)

78

79

continue

80

continue

80

other.append((kind, pat, source))

81

other.append((kind, pat, source))

81

return matchers, other

82

return matchers, other

82

83

84

def _expandsubinclude(kindpats, root):

85

def _expandsubinclude(kindpats, root):

85

"""Returns the list of subinclude matcher args and the kindpats without the

86

"""Returns the list of subinclude matcher args and the kindpats without the

86

subincludes in it."""

87

subincludes in it."""

87

relmatchers = []

88

relmatchers = []

88

other = []

89

other = []

89

90

for kind, pat, source in kindpats:

91

for kind, pat, source in kindpats:

91

if kind == b'subinclude':

92

if kind == b'subinclude':

92

sourceroot = pathutil.dirname(util.normpath(source))

93

sourceroot = pathutil.dirname(util.normpath(source))

93

pat = util.pconvert(pat)

94

pat = util.pconvert(pat)

94

path = pathutil.join(sourceroot, pat)

95

path = pathutil.join(sourceroot, pat)

95

96

newroot = pathutil.dirname(path)

97

newroot = pathutil.dirname(path)

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

98

matcherargs = (newroot, b'', [], [b'include:%s' % path])

98

99

prefix = pathutil.canonpath(root, root, newroot)

100

prefix = pathutil.canonpath(root, root, newroot)

100

if prefix:

101

if prefix:

101

prefix += b'/'

102

prefix += b'/'

102

relmatchers.append((prefix, matcherargs))

103

relmatchers.append((prefix, matcherargs))

103

else:

104

else:

104

other.append((kind, pat, source))

105

other.append((kind, pat, source))

105

106

return relmatchers, other

107

return relmatchers, other

107

108

109

def _kindpatsalwaysmatch(kindpats):

110

def _kindpatsalwaysmatch(kindpats):

110

"""Checks whether the kindspats match everything, as e.g.

111

"""Checks whether the kindspats match everything, as e.g.

111

'relpath:.' does.

112

'relpath:.' does.

112

"""

113

"""

113

for kind, pat, source in kindpats:

114

for kind, pat, source in kindpats:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

115

if pat != b'' or kind not in [b'relpath', b'glob']:

115

return False

116

return False

116

return True

117

return True

117

118

119

def _buildkindpatsmatcher(

120

def _buildkindpatsmatcher(

120

matchercls,

121

matchercls,

121

root,

122

root,

122

cwd,

123

cwd,

123

kindpats,

124

kindpats,

124

ctx=None,

125

ctx=None,

125

listsubrepos=False,

126

listsubrepos=False,

126

badfn=None,

127

badfn=None,

127

):

128

):

128

matchers = []

129

matchers = []

129

fms, kindpats = _expandsets(

130

fms, kindpats = _expandsets(

130

cwd,

131

cwd,

131

kindpats,

132

kindpats,

132

ctx=ctx,

133

ctx=ctx,

133

listsubrepos=listsubrepos,

134

listsubrepos=listsubrepos,

134

badfn=badfn,

135

badfn=badfn,

135

)

136

)

136

if kindpats:

137

if kindpats:

137

m = matchercls(root, kindpats, badfn=badfn)

138

m = matchercls(root, kindpats, badfn=badfn)

138

matchers.append(m)

139

matchers.append(m)

139

if fms:

140

if fms:

140

matchers.extend(fms)

141

matchers.extend(fms)

141

if not matchers:

142

if not matchers:

142

return nevermatcher(badfn=badfn)

143

return nevermatcher(badfn=badfn)

143

if len(matchers) == 1:

144

if len(matchers) == 1:

144

return matchers[0]

145

return matchers[0]

145

return unionmatcher(matchers)

146

return unionmatcher(matchers)

146

147

148

def match(

149

def match(

149

root,

150

root,

150

cwd,

151

cwd,

151

patterns=None,

152

patterns=None,

152

include=None,

153

include=None,

153

exclude=None,

154

exclude=None,

154

default=b'glob',

155

default=b'glob',

155

auditor=None,

156

auditor=None,

156

ctx=None,

157

ctx=None,

157

listsubrepos=False,

158

listsubrepos=False,

158

warn=None,

159

warn=None,

159

badfn=None,

160

badfn=None,

160

icasefs=False,

161

icasefs=False,

161

):

162

):

162

r"""build an object to match a set of file patterns

163

r"""build an object to match a set of file patterns

163

164

arguments:

165

arguments:

165

root - the canonical root of the tree you're matching against

166

root - the canonical root of the tree you're matching against

166

cwd - the current working directory, if relevant

167

cwd - the current working directory, if relevant

167

patterns - patterns to find

168

patterns - patterns to find

168

include - patterns to include (unless they are excluded)

169

include - patterns to include (unless they are excluded)

169

exclude - patterns to exclude (even if they are included)

170

exclude - patterns to exclude (even if they are included)

170

default - if a pattern in patterns has no explicit type, assume this one

171

default - if a pattern in patterns has no explicit type, assume this one

171

auditor - optional path auditor

172

auditor - optional path auditor

172

ctx - optional changecontext

173

ctx - optional changecontext

173

listsubrepos - if True, recurse into subrepositories

174

listsubrepos - if True, recurse into subrepositories

174

warn - optional function used for printing warnings

175

warn - optional function used for printing warnings

175

badfn - optional bad() callback for this matcher instead of the default

176

badfn - optional bad() callback for this matcher instead of the default

176

icasefs - make a matcher for wdir on case insensitive filesystems, which

177

icasefs - make a matcher for wdir on case insensitive filesystems, which

177

normalizes the given patterns to the case in the filesystem

178

normalizes the given patterns to the case in the filesystem

178

179

a pattern is one of:

180

a pattern is one of:

180

'glob:<glob>' - a glob relative to cwd

181

'glob:<glob>' - a glob relative to cwd

181

're:<regexp>' - a regular expression

182

're:<regexp>' - a regular expression

182

'path:<path>' - a path relative to repository root, which is matched

183

'path:<path>' - a path relative to repository root, which is matched

183

recursively

184

recursively

184

'rootfilesin:<path>' - a path relative to repository root, which is

185

'rootfilesin:<path>' - a path relative to repository root, which is

185

matched non-recursively (will not match subdirectories)

186

matched non-recursively (will not match subdirectories)

186

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

187

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

187

'relpath:<path>' - a path relative to cwd

188

'relpath:<path>' - a path relative to cwd

188

'relre:<regexp>' - a regexp that needn't match the start of a name

189

'relre:<regexp>' - a regexp that needn't match the start of a name

189

'set:<fileset>' - a fileset expression

190

'set:<fileset>' - a fileset expression

190

'include:<path>' - a file of patterns to read and include

191

'include:<path>' - a file of patterns to read and include

191

'subinclude:<path>' - a file of patterns to match against files under

192

'subinclude:<path>' - a file of patterns to match against files under

192

the same directory

193

the same directory

193

'<something>' - a pattern of the specified default type

194

'<something>' - a pattern of the specified default type

194

195

>>> def _match(root, *args, **kwargs):

196

>>> def _match(root, *args, **kwargs):

196

... return match(util.localpath(root), *args, **kwargs)

197

... return match(util.localpath(root), *args, **kwargs)

197

198

Usually a patternmatcher is returned:

199

Usually a patternmatcher is returned:

199

>>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

200

>>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

200

201

201

202

Combining 'patterns' with 'include' (resp. 'exclude') gives an

203

Combining 'patterns' with 'include' (resp. 'exclude') gives an

203

intersectionmatcher (resp. a differencematcher):

204

intersectionmatcher (resp. a differencematcher):

204

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

205

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

205

206

206

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

207

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

207

208

208

209

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

210

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

210

>>> _match(b'/foo', b'.', [])

211

>>> _match(b'/foo', b'.', [])

211

212

212

213

The 'default' argument determines which kind of pattern is assumed if a

214

The 'default' argument determines which kind of pattern is assumed if a

214

pattern has no prefix:

215

pattern has no prefix:

215

>>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')

216

>>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')

216

217

217

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

218

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

218

219

219

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

220

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

220

221

221

222

The primary use of matchers is to check whether a value (usually a file

223

The primary use of matchers is to check whether a value (usually a file

223

name) matches againset one of the patterns given at initialization. There

224

name) matches againset one of the patterns given at initialization. There

224

are two ways of doing this check.

225

are two ways of doing this check.

225

226

>>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])

227

>>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])

227

228

1. Calling the matcher with a file name returns True if any pattern

229

1. Calling the matcher with a file name returns True if any pattern

229

matches that file name:

230

matches that file name:

230

>>> m(b'a')

231

>>> m(b'a')

231

True

232

True

232

>>> m(b'main.c')

233

>>> m(b'main.c')

233

True

234

True

234

>>> m(b'test.py')

235

>>> m(b'test.py')

235

False

236

False

236

237

2. Using the exact() method only returns True if the file name matches one

238

2. Using the exact() method only returns True if the file name matches one

238

of the exact patterns (i.e. not re: or glob: patterns):

239

of the exact patterns (i.e. not re: or glob: patterns):

239

>>> m.exact(b'a')

240

>>> m.exact(b'a')

240

True

241

True

241

>>> m.exact(b'main.c')

242

>>> m.exact(b'main.c')

242

False

243

False

243

"""

244

"""

244

assert os.path.isabs(root)

245

assert os.path.isabs(root)

245

cwd = os.path.join(root, util.localpath(cwd))

246

cwd = os.path.join(root, util.localpath(cwd))

246

normalize = _donormalize

247

normalize = _donormalize

247

if icasefs:

248

if icasefs:

248

dirstate = ctx.repo().dirstate

249

dirstate = ctx.repo().dirstate

249

dsnormalize = dirstate.normalize

250

dsnormalize = dirstate.normalize

250

251

def normalize(patterns, default, root, cwd, auditor, warn):

252

def normalize(patterns, default, root, cwd, auditor, warn):

252

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

253

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

253

kindpats = []

254

kindpats = []

254

for kind, pats, source in kp:

255

for kind, pats, source in kp:

255

if kind not in (b're', b'relre'): # regex can't be normalized

256

if kind not in (b're', b'relre'): # regex can't be normalized

256

p = pats

257

p = pats

257

pats = dsnormalize(pats)

258

pats = dsnormalize(pats)

258

259

# Preserve the original to handle a case only rename.

260

# Preserve the original to handle a case only rename.

260

if p != pats and p in dirstate:

261

if p != pats and p in dirstate:

261

kindpats.append((kind, p, source))

262

kindpats.append((kind, p, source))

262

263

kindpats.append((kind, pats, source))

264

kindpats.append((kind, pats, source))

264

return kindpats

265

return kindpats

265

266

if patterns:

267

if patterns:

267

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

268

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

268

if _kindpatsalwaysmatch(kindpats):

269

if _kindpatsalwaysmatch(kindpats):

269

m = alwaysmatcher(badfn)

270

m = alwaysmatcher(badfn)

270

else:

271

else:

271

m = _buildkindpatsmatcher(

272

m = _buildkindpatsmatcher(

272

patternmatcher,

273

patternmatcher,

273

root,

274

root,

274

cwd,

275

cwd,

275

kindpats,

276

kindpats,

276

ctx=ctx,

277

ctx=ctx,

277

listsubrepos=listsubrepos,

278

listsubrepos=listsubrepos,

278

badfn=badfn,

279

badfn=badfn,

279

)

280

)

280

else:

281

else:

281

# It's a little strange that no patterns means to match everything.

282

# It's a little strange that no patterns means to match everything.

282

# Consider changing this to match nothing (probably using nevermatcher).

283

# Consider changing this to match nothing (probably using nevermatcher).

283

m = alwaysmatcher(badfn)

284

m = alwaysmatcher(badfn)

284

285

if include:

286

if include:

286

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

287

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

287

im = _buildkindpatsmatcher(

288

im = _buildkindpatsmatcher(

288

includematcher,

289

includematcher,

289

root,

290

root,

290

cwd,

291

cwd,

291

kindpats,

292

kindpats,

292

ctx=ctx,

293

ctx=ctx,

293

listsubrepos=listsubrepos,

294

listsubrepos=listsubrepos,

294

badfn=None,

295

badfn=None,

295

)

296

)

296

m = intersectmatchers(m, im)

297

m = intersectmatchers(m, im)

297

if exclude:

298

if exclude:

298

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

299

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

299

em = _buildkindpatsmatcher(

300

em = _buildkindpatsmatcher(

300

includematcher,

301

includematcher,

301

root,

302

root,

302

cwd,

303

cwd,

303

kindpats,

304

kindpats,

304

ctx=ctx,

305

ctx=ctx,

305

listsubrepos=listsubrepos,

306

listsubrepos=listsubrepos,

306

badfn=None,

307

badfn=None,

307

)

308

)

308

m = differencematcher(m, em)

309

m = differencematcher(m, em)

309

return m

310

return m

310

311

312

def exact(files, badfn=None):

313

def exact(files, badfn=None):

313

return exactmatcher(files, badfn=badfn)

314

return exactmatcher(files, badfn=badfn)

314

315

316

def always(badfn=None):

317

def always(badfn=None):

317

return alwaysmatcher(badfn)

318

return alwaysmatcher(badfn)

318

319

320

def never(badfn=None):

321

def never(badfn=None):

321

return nevermatcher(badfn)

322

return nevermatcher(badfn)

322

323

324

def badmatch(match, badfn):

325

def badmatch(match, badfn):

325

"""Make a copy of the given matcher, replacing its bad method with the given

326

"""Make a copy of the given matcher, replacing its bad method with the given

326

one.

327

one.

327

"""

328

"""

328

m = copy.copy(match)

329

m = copy.copy(match)

329

m.bad = badfn

330

m.bad = badfn

330

return m

331

return m

331

332

333

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

334

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

334

"""Convert 'kind:pat' from the patterns list to tuples with kind and

335

"""Convert 'kind:pat' from the patterns list to tuples with kind and

335

normalized and rooted patterns and with listfiles expanded."""

336

normalized and rooted patterns and with listfiles expanded."""

336

kindpats = []

337

kindpats = []

337

for kind, pat in [_patsplit(p, default) for p in patterns]:

338

for kind, pat in [_patsplit(p, default) for p in patterns]:

338

if kind in cwdrelativepatternkinds:

339

if kind in cwdrelativepatternkinds:

339

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

340

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

340

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

341

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

341

pat = util.normpath(pat)

342

pat = util.normpath(pat)

342

elif kind in (b'listfile', b'listfile0'):

343

elif kind in (b'listfile', b'listfile0'):

343

try:

344

try:

344

files = util.readfile(pat)

345

files = util.readfile(pat)

345

if kind == b'listfile0':

346

if kind == b'listfile0':

346

files = files.split(b'\0')

347

files = files.split(b'\0')

347

else:

348

else:

348

files = files.splitlines()

349

files = files.splitlines()

349

files = [f for f in files if f]

350

files = [f for f in files if f]

350

except EnvironmentError:

351

except EnvironmentError:

351

raise error.Abort(_(b"unable to read file list (%s)") % pat)

352

raise error.Abort(_(b"unable to read file list (%s)") % pat)

352

for k, p, source in _donormalize(

353

for k, p, source in _donormalize(

353

files, default, root, cwd, auditor, warn

354

files, default, root, cwd, auditor, warn

354

):

355

):

355

kindpats.append((k, p, pat))

356

kindpats.append((k, p, pat))

356

continue

357

continue

357

elif kind == b'include':

358

elif kind == b'include':

358

try:

359

try:

359

fullpath = os.path.join(root, util.localpath(pat))

360

fullpath = os.path.join(root, util.localpath(pat))

360

includepats = readpatternfile(fullpath, warn)

361

includepats = readpatternfile(fullpath, warn)

361

for k, p, source in _donormalize(

362

for k, p, source in _donormalize(

362

includepats, default, root, cwd, auditor, warn

363

includepats, default, root, cwd, auditor, warn

363

):

364

):

364

kindpats.append((k, p, source or pat))

365

kindpats.append((k, p, source or pat))

365

except error.Abort as inst:

366

except error.Abort as inst:

366

raise error.Abort(

367

raise error.Abort(

367

b'%s: %s'

368

b'%s: %s'

368

% (

369

% (

369

pat,

370

pat,

370

inst.message,

371

inst.message,

371

) # pytype: disable=unsupported-operands

372

) # pytype: disable=unsupported-operands

372

)

373

)

373

except IOError as inst:

374

except IOError as inst:

374

if warn:

375

if warn:

375

warn(

376

warn(

376

_(b"skipping unreadable pattern file '%s': %s\n")

377

_(b"skipping unreadable pattern file '%s': %s\n")

377

% (pat, stringutil.forcebytestr(inst.strerror))

378

% (pat, stringutil.forcebytestr(inst.strerror))

378

)

379

)

379

continue

380

continue

380

# else: re or relre - which cannot be normalized

381

# else: re or relre - which cannot be normalized

381

kindpats.append((kind, pat, b''))

382

kindpats.append((kind, pat, b''))

382

return kindpats

383

return kindpats

383

384

385

class basematcher(object):

386

class basematcher(object):

386

def __init__(self, badfn=None):

387

def __init__(self, badfn=None):

387

if badfn is not None:

388

if badfn is not None:

388

self.bad = badfn

389

self.bad = badfn

389

390

def __call__(self, fn):

391

def __call__(self, fn):

391

return self.matchfn(fn)

392

return self.matchfn(fn)

392

393

# Callbacks related to how the matcher is used by dirstate.walk.

394

# Callbacks related to how the matcher is used by dirstate.walk.

394

# Subscribers to these events must monkeypatch the matcher object.

395

# Subscribers to these events must monkeypatch the matcher object.

395

def bad(self, f, msg):

396

def bad(self, f, msg):

396

"""Callback from dirstate.walk for each explicit file that can't be

397

"""Callback from dirstate.walk for each explicit file that can't be

397

found/accessed, with an error message."""

398

found/accessed, with an error message."""

398

399

# If an traversedir is set, it will be called when a directory discovered

400

# If an traversedir is set, it will be called when a directory discovered

400

# by recursive traversal is visited.

401

# by recursive traversal is visited.

401

traversedir = None

402

traversedir = None

402

403

@propertycache

404

@propertycache

404

def _files(self):

405

def _files(self):

405

return []

406

return []

406

407

def files(self):

408

def files(self):

408

"""Explicitly listed files or patterns or roots:

409

"""Explicitly listed files or patterns or roots:

409

if no patterns or .always(): empty list,

410

if no patterns or .always(): empty list,

410

if exact: list exact files,

411

if exact: list exact files,

411

if not .anypats(): list all files and dirs,

412

if not .anypats(): list all files and dirs,

412

else: optimal roots"""

413

else: optimal roots"""

413

return self._files

414

return self._files

414

415

@propertycache

416

@propertycache

416

def _fileset(self):

417

def _fileset(self):

417

return set(self._files)

418

return set(self._files)

418

419

def exact(self, f):

420

def exact(self, f):

420

'''Returns True if f is in .files().'''

421

'''Returns True if f is in .files().'''

421

return f in self._fileset

422

return f in self._fileset

422

423

def matchfn(self, f):

424

def matchfn(self, f):

424

return False

425

return False

425

426

def visitdir(self, dir):

427

def visitdir(self, dir):

427

"""Decides whether a directory should be visited based on whether it

428

"""Decides whether a directory should be visited based on whether it

428

has potential matches in it or one of its subdirectories. This is

429

has potential matches in it or one of its subdirectories. This is

429

based on the match's primary, included, and excluded patterns.

430

based on the match's primary, included, and excluded patterns.

430

431

Returns the string 'all' if the given directory and all subdirectories

432

Returns the string 'all' if the given directory and all subdirectories

432

should be visited. Otherwise returns True or False indicating whether

433

should be visited. Otherwise returns True or False indicating whether

433

the given directory should be visited.

434

the given directory should be visited.

434

"""

435

"""

435

return True

436

return True

436

437

def visitchildrenset(self, dir):

438

def visitchildrenset(self, dir):

438

"""Decides whether a directory should be visited based on whether it

439

"""Decides whether a directory should be visited based on whether it

439

has potential matches in it or one of its subdirectories, and

440

has potential matches in it or one of its subdirectories, and

440

potentially lists which subdirectories of that directory should be

441

potentially lists which subdirectories of that directory should be

441

visited. This is based on the match's primary, included, and excluded

442

visited. This is based on the match's primary, included, and excluded

442

patterns.

443

patterns.

443

444

This function is very similar to 'visitdir', and the following mapping

445

This function is very similar to 'visitdir', and the following mapping

445

can be applied:

446

can be applied:

446

447

visitdir | visitchildrenlist

448

visitdir | visitchildrenlist

448

----------+-------------------

449

----------+-------------------

449

False | set()

450

False | set()

450

'all' | 'all'

451

'all' | 'all'

451

True | 'this' OR non-empty set of subdirs -or files- to visit

452

True | 'this' OR non-empty set of subdirs -or files- to visit

452

453

Example:

454

Example:

454

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

455

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

455

the following values (assuming the implementation of visitchildrenset

456

the following values (assuming the implementation of visitchildrenset

456

is capable of recognizing this; some implementations are not).

457

is capable of recognizing this; some implementations are not).

457

458

'' -> {'foo', 'qux'}

459

'' -> {'foo', 'qux'}

459

'baz' -> set()

460

'baz' -> set()

460

'foo' -> {'bar'}

461

'foo' -> {'bar'}

461

# Ideally this would be 'all', but since the prefix nature of matchers

462

# Ideally this would be 'all', but since the prefix nature of matchers

462

# is applied to the entire matcher, we have to downgrade this to

463

# is applied to the entire matcher, we have to downgrade this to

463

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

464

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

464

# in.

465

# in.

465

'foo/bar' -> 'this'

466

'foo/bar' -> 'this'

466

'qux' -> 'this'

467

'qux' -> 'this'

467

468

Important:

469

Important:

469

Most matchers do not know if they're representing files or

470

Most matchers do not know if they're representing files or

470

directories. They see ['path:dir/f'] and don't know whether 'f' is a

471

directories. They see ['path:dir/f'] and don't know whether 'f' is a

471

file or a directory, so visitchildrenset('dir') for most matchers will

472

file or a directory, so visitchildrenset('dir') for most matchers will

472

return {'f'}, but if the matcher knows it's a file (like exactmatcher

473

return {'f'}, but if the matcher knows it's a file (like exactmatcher

473

does), it may return 'this'. Do not rely on the return being a set

474

does), it may return 'this'. Do not rely on the return being a set

474

indicating that there are no files in this dir to investigate (or

475

indicating that there are no files in this dir to investigate (or

475

equivalently that if there are files to investigate in 'dir' that it

476

equivalently that if there are files to investigate in 'dir' that it

476

will always return 'this').

477

will always return 'this').

477

"""

478

"""

478

return b'this'

479

return b'this'

479

480

def always(self):

481

def always(self):

481

"""Matcher will match everything and .files() will be empty --

482

"""Matcher will match everything and .files() will be empty --

482

optimization might be possible."""

483

optimization might be possible."""

483

return False

484

return False

484

485

def isexact(self):

486

def isexact(self):

486

"""Matcher will match exactly the list of files in .files() --

487

"""Matcher will match exactly the list of files in .files() --

487

optimization might be possible."""

488

optimization might be possible."""

488

return False

489

return False

489

490

def prefix(self):

491

def prefix(self):

491

"""Matcher will match the paths in .files() recursively --

492

"""Matcher will match the paths in .files() recursively --

492

optimization might be possible."""

493

optimization might be possible."""

493

return False

494

return False

494

495

def anypats(self):

496

def anypats(self):

496

"""None of .always(), .isexact(), and .prefix() is true --

497

"""None of .always(), .isexact(), and .prefix() is true --

497

optimizations will be difficult."""

498

optimizations will be difficult."""

498

return not self.always() and not self.isexact() and not self.prefix()

499

return not self.always() and not self.isexact() and not self.prefix()

499

500

501

class alwaysmatcher(basematcher):

502

class alwaysmatcher(basematcher):

502

'''Matches everything.'''

503

'''Matches everything.'''

503

504

def __init__(self, badfn=None):

505

def __init__(self, badfn=None):

505

super(alwaysmatcher, self).__init__(badfn)

506

super(alwaysmatcher, self).__init__(badfn)

506

507

def always(self):

508

def always(self):

508

return True

509

return True

509

510

def matchfn(self, f):

511

def matchfn(self, f):

511

return True

512

return True

512

513

def visitdir(self, dir):

514

def visitdir(self, dir):

514

return b'all'

515

return b'all'

515

516

def visitchildrenset(self, dir):

517

def visitchildrenset(self, dir):

517

return b'all'

518

return b'all'

518

519

def __repr__(self):

520

def __repr__(self):

520

return r'<alwaysmatcher>'

521

return r'<alwaysmatcher>'

521

522

523

class nevermatcher(basematcher):

524

class nevermatcher(basematcher):

524

'''Matches nothing.'''

525

'''Matches nothing.'''

525

526

def __init__(self, badfn=None):

527

def __init__(self, badfn=None):

527

super(nevermatcher, self).__init__(badfn)

528

super(nevermatcher, self).__init__(badfn)

528

529

# It's a little weird to say that the nevermatcher is an exact matcher

530

# It's a little weird to say that the nevermatcher is an exact matcher

530

# or a prefix matcher, but it seems to make sense to let callers take

531

# or a prefix matcher, but it seems to make sense to let callers take

531

# fast paths based on either. There will be no exact matches, nor any

532

# fast paths based on either. There will be no exact matches, nor any

532

# prefixes (files() returns []), so fast paths iterating over them should

533

# prefixes (files() returns []), so fast paths iterating over them should

533

# be efficient (and correct).

534

# be efficient (and correct).

534

def isexact(self):

535

def isexact(self):

535

return True

536

return True

536

537

def prefix(self):

538

def prefix(self):

538

return True

539

return True

539

540

def visitdir(self, dir):

541

def visitdir(self, dir):

541

return False

542

return False

542

543

def visitchildrenset(self, dir):

544

def visitchildrenset(self, dir):

544

return set()

545

return set()

545

546

def __repr__(self):

547

def __repr__(self):

547

return r'<nevermatcher>'

548

return r'<nevermatcher>'

548

549

550

class predicatematcher(basematcher):

551

class predicatematcher(basematcher):

551

"""A matcher adapter for a simple boolean function"""

552

"""A matcher adapter for a simple boolean function"""

552

553

def __init__(self, predfn, predrepr=None, badfn=None):

554

def __init__(self, predfn, predrepr=None, badfn=None):

554

super(predicatematcher, self).__init__(badfn)

555

super(predicatematcher, self).__init__(badfn)

555

self.matchfn = predfn

556

self.matchfn = predfn

556

self._predrepr = predrepr

557

self._predrepr = predrepr

557

558

@encoding.strmethod

559

@encoding.strmethod

559

def __repr__(self):

560

def __repr__(self):

560

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

561

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

561

self.matchfn

562

self.matchfn

562

)

563

)

563

return b'<predicatenmatcher pred=%s>' % s

564

return b'<predicatenmatcher pred=%s>' % s

564

565

566

def path_or_parents_in_set(path, prefix_set):

567

def path_or_parents_in_set(path, prefix_set):

567

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

568

"""Returns True if `path` (or any parent of `path`) is in `prefix_set`."""

568

l = len(prefix_set)

569

l = len(prefix_set)

569

if l == 0:

570

if l == 0:

570

return False

571

return False

571

if path in prefix_set:

572

if path in prefix_set:

572

return True

573

return True

573

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

574

# If there's more than 5 paths in prefix_set, it's *probably* quicker to

574

# "walk up" the directory hierarchy instead, with the assumption that most

575

# "walk up" the directory hierarchy instead, with the assumption that most

575

# directory hierarchies are relatively shallow and hash lookup is cheap.

576

# directory hierarchies are relatively shallow and hash lookup is cheap.

576

if l > 5:

577

if l > 5:

577

return any(

578

return any(

578

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

579

parentdir in prefix_set for parentdir in pathutil.finddirs(path)

579

)

580

)

580

581

# FIXME: Ideally we'd never get to this point if this is the case - we'd

582

# FIXME: Ideally we'd never get to this point if this is the case - we'd

582

# recognize ourselves as an 'always' matcher and skip this.

583

# recognize ourselves as an 'always' matcher and skip this.

583

if b'' in prefix_set:

584

if b'' in prefix_set:

584

return True

585

return True

585

586

if pycompat.ispy3:

587

if pycompat.ispy3:

587

sl = ord(b'/')

588

sl = ord(b'/')

588

else:

589

else:

589

sl = '/'

590

sl = '/'

590

591

# We already checked that path isn't in prefix_set exactly, so

592

# We already checked that path isn't in prefix_set exactly, so

592

# `path[len(pf)] should never raise IndexError.

593

# `path[len(pf)] should never raise IndexError.

593

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

594

return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)

594

595

596

class patternmatcher(basematcher):

597

class patternmatcher(basematcher):

597

r"""Matches a set of (kind, pat, source) against a 'root' directory.

598

r"""Matches a set of (kind, pat, source) against a 'root' directory.

598

599

>>> kindpats = [

600

>>> kindpats = [

600

... (b're', br'.*\.c$', b''),

601

... (b're', br'.*\.c$', b''),

601

... (b'path', b'foo/a', b''),

602

... (b'path', b'foo/a', b''),

602

... (b'relpath', b'b', b''),

603

... (b'relpath', b'b', b''),

603

... (b'glob', b'*.h', b''),

604

... (b'glob', b'*.h', b''),

604

... ]

605

... ]

605

>>> m = patternmatcher(b'foo', kindpats)

606

>>> m = patternmatcher(b'foo', kindpats)

606

>>> m(b'main.c') # matches re:.*\.c$

607

>>> m(b'main.c') # matches re:.*\.c$

607

True

608

True

608

>>> m(b'b.txt')

609

>>> m(b'b.txt')

609

False

610

False

610

>>> m(b'foo/a') # matches path:foo/a

611

>>> m(b'foo/a') # matches path:foo/a

611

True

612

True

612

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

613

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

613

False

614

False

614

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

615

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

615

True

616

True

616

>>> m(b'lib.h') # matches glob:*.h

617

>>> m(b'lib.h') # matches glob:*.h

617

True

618

True

618

619

>>> m.files()

620

>>> m.files()

620

['', 'foo/a', 'b', '']

621

['', 'foo/a', 'b', '']

621

>>> m.exact(b'foo/a')

622

>>> m.exact(b'foo/a')

622

True

623

True

623

>>> m.exact(b'b')

624

>>> m.exact(b'b')

624

True

625

True

625

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

626

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

626

False

627

False

627

"""

628

"""

628

629

def __init__(self, root, kindpats, badfn=None):

630

def __init__(self, root, kindpats, badfn=None):

630

super(patternmatcher, self).__init__(badfn)

631

super(patternmatcher, self).__init__(badfn)

631

632

self._files = _explicitfiles(kindpats)

633

self._files = _explicitfiles(kindpats)

633

self._prefix = _prefix(kindpats)

634

self._prefix = _prefix(kindpats)

634

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

635

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

635

636

@propertycache

637

@propertycache

637

def _dirs(self):

638

def _dirs(self):

638

return set(pathutil.dirs(self._fileset))

639

return set(pathutil.dirs(self._fileset))

639

640

def visitdir(self, dir):

641

def visitdir(self, dir):

641

if self._prefix and dir in self._fileset:

642

if self._prefix and dir in self._fileset:

642

return b'all'

643

return b'all'

643

return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)

644

return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)

644

645

def visitchildrenset(self, dir):

646

def visitchildrenset(self, dir):

646

ret = self.visitdir(dir)

647

ret = self.visitdir(dir)

647

if ret is True:

648

if ret is True:

648

return b'this'

649

return b'this'

649

elif not ret:

650

elif not ret:

650

return set()

651

return set()

651

assert ret == b'all'

652

assert ret == b'all'

652

return b'all'

653

return b'all'

653

654

def prefix(self):

655

def prefix(self):

655

return self._prefix

656

return self._prefix

656

657

@encoding.strmethod

658

@encoding.strmethod

658

def __repr__(self):

659

def __repr__(self):

659

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

660

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

660

661

662

# This is basically a reimplementation of pathutil.dirs that stores the

663

# This is basically a reimplementation of pathutil.dirs that stores the

663

# children instead of just a count of them, plus a small optional optimization

664

# children instead of just a count of them, plus a small optional optimization

664

# to avoid some directories we don't need.

665

# to avoid some directories we don't need.

665

class _dirchildren(object):

666

class _dirchildren(object):

666

def __init__(self, paths, onlyinclude=None):

667

def __init__(self, paths, onlyinclude=None):

667

self._dirs = {}

668

self._dirs = {}

668

self._onlyinclude = onlyinclude or []

669

self._onlyinclude = onlyinclude or []

669

addpath = self.addpath

670

addpath = self.addpath

670

for f in paths:

671

for f in paths:

671

addpath(f)

672

addpath(f)

672

673

def addpath(self, path):

674

def addpath(self, path):

674

if path == b'':

675

if path == b'':

675

return

676

return

676

dirs = self._dirs

677

dirs = self._dirs

677

findsplitdirs = _dirchildren._findsplitdirs

678

findsplitdirs = _dirchildren._findsplitdirs

678

for d, b in findsplitdirs(path):

679

for d, b in findsplitdirs(path):

679

if d not in self._onlyinclude:

680

if d not in self._onlyinclude:

680

continue

681

continue

681

dirs.setdefault(d, set()).add(b)

682

dirs.setdefault(d, set()).add(b)

682

683

@staticmethod

684

@staticmethod

684

def _findsplitdirs(path):

685

def _findsplitdirs(path):

685

# yields (dirname, basename) tuples, walking back to the root. This is

686

# yields (dirname, basename) tuples, walking back to the root. This is

686

# very similar to pathutil.finddirs, except:

687

# very similar to pathutil.finddirs, except:

687

# - produces a (dirname, basename) tuple, not just 'dirname'

688

# - produces a (dirname, basename) tuple, not just 'dirname'

688

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

689

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

689

# slash.

690

# slash.

690

oldpos = len(path)

691

oldpos = len(path)

691

pos = path.rfind(b'/')

692

pos = path.rfind(b'/')

692

while pos != -1:

693

while pos != -1:

693

yield path[:pos], path[pos + 1 : oldpos]

694

yield path[:pos], path[pos + 1 : oldpos]

694

oldpos = pos

695

oldpos = pos

695

pos = path.rfind(b'/', 0, pos)

696

pos = path.rfind(b'/', 0, pos)

696

yield b'', path[:oldpos]

697

yield b'', path[:oldpos]

697

698

def get(self, path):

699

def get(self, path):

699

return self._dirs.get(path, set())

700

return self._dirs.get(path, set())

700

701

702

class includematcher(basematcher):

703

class includematcher(basematcher):

703

def __init__(self, root, kindpats, badfn=None):

704

def __init__(self, root, kindpats, badfn=None):

704

super(includematcher, self).__init__(badfn)

705

super(includematcher, self).__init__(badfn)

705

if rustmod is not None:

706

if rustmod is not None:

706

# We need to pass the patterns to Rust because they can contain

707

# We need to pass the patterns to Rust because they can contain

707

# patterns from the user interface

708

# patterns from the user interface

708

self._kindpats = kindpats

709

self._kindpats = kindpats

709

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

710

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

710

self._prefix = _prefix(kindpats)

711

self._prefix = _prefix(kindpats)

711

roots, dirs, parents = _rootsdirsandparents(kindpats)

712

roots, dirs, parents = _rootsdirsandparents(kindpats)

712

# roots are directories which are recursively included.

713

# roots are directories which are recursively included.

713

self._roots = set(roots)

714

self._roots = set(roots)

714

# dirs are directories which are non-recursively included.

715

# dirs are directories which are non-recursively included.

715

self._dirs = set(dirs)

716

self._dirs = set(dirs)

716

# parents are directories which are non-recursively included because

717

# parents are directories which are non-recursively included because

717

# they are needed to get to items in _dirs or _roots.

718

# they are needed to get to items in _dirs or _roots.

718

self._parents = parents

719

self._parents = parents

719

720

def visitdir(self, dir):

721

def visitdir(self, dir):

721

if self._prefix and dir in self._roots:

722

if self._prefix and dir in self._roots:

722

return b'all'

723

return b'all'

723

return (

724

return (

724

dir in self._dirs

725

dir in self._dirs

725

or dir in self._parents

726

or dir in self._parents

726

or path_or_parents_in_set(dir, self._roots)

727

or path_or_parents_in_set(dir, self._roots)

727

)

728

)

728

729

@propertycache

730

@propertycache

730

def _allparentschildren(self):

731

def _allparentschildren(self):

731

# It may seem odd that we add dirs, roots, and parents, and then

732

# It may seem odd that we add dirs, roots, and parents, and then

732

# restrict to only parents. This is to catch the case of:

733

# restrict to only parents. This is to catch the case of:

733

# dirs = ['foo/bar']

734

# dirs = ['foo/bar']

734

# parents = ['foo']

735

# parents = ['foo']

735

# if we asked for the children of 'foo', but had only added

736

# if we asked for the children of 'foo', but had only added

736

# self._parents, we wouldn't be able to respond ['bar'].

737

# self._parents, we wouldn't be able to respond ['bar'].

737

return _dirchildren(

738

return _dirchildren(

738

itertools.chain(self._dirs, self._roots, self._parents),

739

itertools.chain(self._dirs, self._roots, self._parents),

739

onlyinclude=self._parents,

740

onlyinclude=self._parents,

740

)

741

)

741

742

def visitchildrenset(self, dir):

743

def visitchildrenset(self, dir):

743

if self._prefix and dir in self._roots:

744

if self._prefix and dir in self._roots:

744

return b'all'

745

return b'all'

745

# Note: this does *not* include the 'dir in self._parents' case from

746

# Note: this does *not* include the 'dir in self._parents' case from

746

# visitdir, that's handled below.

747

# visitdir, that's handled below.

747

if (

748

if (

748

b'' in self._roots

749

b'' in self._roots

749

or dir in self._dirs

750

or dir in self._dirs

750

or path_or_parents_in_set(dir, self._roots)

751

or path_or_parents_in_set(dir, self._roots)

751

):

752

):

752

return b'this'

753

return b'this'

753

754

if dir in self._parents:

755

if dir in self._parents:

755

return self._allparentschildren.get(dir) or set()

756

return self._allparentschildren.get(dir) or set()

756

return set()

757

return set()

757

758

@encoding.strmethod

759

@encoding.strmethod

759

def __repr__(self):

760

def __repr__(self):

760

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

761

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

761

762

763

class exactmatcher(basematcher):

764

class exactmatcher(basematcher):

764

r"""Matches the input files exactly. They are interpreted as paths, not

765

r"""Matches the input files exactly. They are interpreted as paths, not

765

patterns (so no kind-prefixes).

766

patterns (so no kind-prefixes).

766

767

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

768

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

768

>>> m(b'a.txt')

769

>>> m(b'a.txt')

769

True

770

True

770

>>> m(b'b.txt')

771

>>> m(b'b.txt')

771

False

772

False

772

773

Input files that would be matched are exactly those returned by .files()

774

Input files that would be matched are exactly those returned by .files()

774

>>> m.files()

775

>>> m.files()

775

['a.txt', 're:.*\\.c$']

776

['a.txt', 're:.*\\.c$']

776

777

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

778

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

778

>>> m(b'main.c')

779

>>> m(b'main.c')

779

False

780

False

780

>>> m(br're:.*\.c$')

781

>>> m(br're:.*\.c$')

781

True

782

True

782

"""

783

"""

783

784

def __init__(self, files, badfn=None):

785

def __init__(self, files, badfn=None):

785

super(exactmatcher, self).__init__(badfn)

786

super(exactmatcher, self).__init__(badfn)

786

787

if isinstance(files, list):

788

if isinstance(files, list):

788

self._files = files

789

self._files = files

789

else:

790

else:

790

self._files = list(files)

791

self._files = list(files)

791

792

matchfn = basematcher.exact

793

matchfn = basematcher.exact

793

794

@propertycache

795

@propertycache

795

def _dirs(self):

796

def _dirs(self):

796

return set(pathutil.dirs(self._fileset))

797

return set(pathutil.dirs(self._fileset))

797

798

def visitdir(self, dir):

799

def visitdir(self, dir):

799

return dir in self._dirs

800

return dir in self._dirs

800

801

802

@propertycache

803

def _visitchildrenset_candidates(self):

804

"""A memoized set of candidates for visitchildrenset."""

805

return self._fileset | self._dirs - {b''}

806

807

@propertycache

808

def _sorted_visitchildrenset_candidates(self):

809

"""A memoized sorted list of candidates for visitchildrenset."""

810

return sorted(self._visitchildrenset_candidates)

811

801

def visitchildrenset(self, dir):

812

def visitchildrenset(self, dir):

802

if not self._fileset or dir not in self._dirs:

813

if not self._fileset or dir not in self._dirs:

803

return set()

814

return set()

804

815

805

candidates = self._fileset | self._dirs - {b''}

816

if dir == b'':

806

if dir != b'':

817

candidates = self._visitchildrenset_candidates

818

else:

819

candidates = self._sorted_visitchildrenset_candidates

807

d = dir + b'/'

820

d = dir + b'/'

808

candidates = {c[len(d) :] for c in candidates if c.startswith(d)}

821

# Use bisect to find the first element potentially starting with d

822

# (i.e. >= d). This should always find at least one element (we'll

823

# assert later if this is not the case).

824

first = bisect.bisect_left(candidates, d)

825

# We need a representation of the first element that is > d that

826

# does not start with d, so since we added a `/` on the end of dir,

827

# we'll add whatever comes after slash (we could probably assume

828

# that `0` is after `/`, but let's not) to the end of dir instead.

829

dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))

830

# Use bisect to find the first element >= d_next

831

last = bisect.bisect_left(candidates, dnext, lo=first)

832

dlen = len(d)

833

candidates = {c[dlen:] for c in candidates[first:last]}

809

# self._dirs includes all of the directories, recursively, so if

834

# self._dirs includes all of the directories, recursively, so if

810

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

835

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

811

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

836

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

812

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

837

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

813

# immediate subdir will be in there without a slash.

838

# immediate subdir will be in there without a slash.

814

ret = {c for c in candidates if b'/' not in c}

839

ret = {c for c in candidates if b'/' not in c}

815

# We really do not expect ret to be empty, since that would imply that

840

# We really do not expect ret to be empty, since that would imply that

816

# there's something in _dirs that didn't have a file in _fileset.

841

# there's something in _dirs that didn't have a file in _fileset.

817

assert ret

842

assert ret

818

return ret

843

return ret

819

844

820

def isexact(self):

845

def isexact(self):

821

return True

846

return True

822

847

823

@encoding.strmethod

848

@encoding.strmethod

824

def __repr__(self):

849

def __repr__(self):

825

return b'<exactmatcher files=%r>' % self._files

850

return b'<exactmatcher files=%r>' % self._files

826

851

827

852

828

class differencematcher(basematcher):

853

class differencematcher(basematcher):

829

"""Composes two matchers by matching if the first matches and the second

854

"""Composes two matchers by matching if the first matches and the second

830

does not.

855

does not.

831

856

832

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

857

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

833

"""

858

"""

834

859

835

def __init__(self, m1, m2):

860

def __init__(self, m1, m2):

836

super(differencematcher, self).__init__()

861

super(differencematcher, self).__init__()

837

self._m1 = m1

862

self._m1 = m1

838

self._m2 = m2

863

self._m2 = m2

839

self.bad = m1.bad

864

self.bad = m1.bad

840

self.traversedir = m1.traversedir

865

self.traversedir = m1.traversedir

841

866

842

def matchfn(self, f):

867

def matchfn(self, f):

843

return self._m1(f) and not self._m2(f)

868

return self._m1(f) and not self._m2(f)

844

869

845

@propertycache

870

@propertycache

846

def _files(self):

871

def _files(self):

847

if self.isexact():

872

if self.isexact():

848

return [f for f in self._m1.files() if self(f)]

873

return [f for f in self._m1.files() if self(f)]

849

# If m1 is not an exact matcher, we can't easily figure out the set of

874

# If m1 is not an exact matcher, we can't easily figure out the set of

850

# files, because its files() are not always files. For example, if

875

# files, because its files() are not always files. For example, if

851

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

876

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

852

# want to remove "dir" from the set even though it would match m2,

877

# want to remove "dir" from the set even though it would match m2,

853

# because the "dir" in m1 may not be a file.

878

# because the "dir" in m1 may not be a file.

854

return self._m1.files()

879

return self._m1.files()

855

880

856

def visitdir(self, dir):

881

def visitdir(self, dir):

857

if self._m2.visitdir(dir) == b'all':

882

if self._m2.visitdir(dir) == b'all':

858

return False

883

return False

859

elif not self._m2.visitdir(dir):

884

elif not self._m2.visitdir(dir):

860

# m2 does not match dir, we can return 'all' here if possible

885

# m2 does not match dir, we can return 'all' here if possible

861

return self._m1.visitdir(dir)

886

return self._m1.visitdir(dir)

862

return bool(self._m1.visitdir(dir))

887

return bool(self._m1.visitdir(dir))

863

888

864

def visitchildrenset(self, dir):

889

def visitchildrenset(self, dir):

865

m2_set = self._m2.visitchildrenset(dir)

890

m2_set = self._m2.visitchildrenset(dir)

866

if m2_set == b'all':

891

if m2_set == b'all':

867

return set()

892

return set()

868

m1_set = self._m1.visitchildrenset(dir)

893

m1_set = self._m1.visitchildrenset(dir)

869

# Possible values for m1: 'all', 'this', set(...), set()

894

# Possible values for m1: 'all', 'this', set(...), set()

870

# Possible values for m2: 'this', set(...), set()

895

# Possible values for m2: 'this', set(...), set()

871

# If m2 has nothing under here that we care about, return m1, even if

896

# If m2 has nothing under here that we care about, return m1, even if

872

# it's 'all'. This is a change in behavior from visitdir, which would

897

# it's 'all'. This is a change in behavior from visitdir, which would

873

# return True, not 'all', for some reason.

898

# return True, not 'all', for some reason.

874

if not m2_set:

899

if not m2_set:

875

return m1_set

900

return m1_set

876

if m1_set in [b'all', b'this']:

901

if m1_set in [b'all', b'this']:

877

# Never return 'all' here if m2_set is any kind of non-empty (either

902

# Never return 'all' here if m2_set is any kind of non-empty (either

878

# 'this' or set(foo)), since m2 might return set() for a

903

# 'this' or set(foo)), since m2 might return set() for a

879

# subdirectory.

904

# subdirectory.

880

return b'this'

905

return b'this'

881

# Possible values for m1: set(...), set()

906

# Possible values for m1: set(...), set()

882

# Possible values for m2: 'this', set(...)

907

# Possible values for m2: 'this', set(...)

883

# We ignore m2's set results. They're possibly incorrect:

908

# We ignore m2's set results. They're possibly incorrect:

884

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

909

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

885

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

910

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

886

# return set(), which is *not* correct, we still need to visit 'dir'!

911

# return set(), which is *not* correct, we still need to visit 'dir'!

887

return m1_set

912

return m1_set

888

913

889

def isexact(self):

914

def isexact(self):

890

return self._m1.isexact()

915

return self._m1.isexact()

891

916

892

@encoding.strmethod

917

@encoding.strmethod

893

def __repr__(self):

918

def __repr__(self):

894

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

919

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

895

920

896

921

897

def intersectmatchers(m1, m2):

922

def intersectmatchers(m1, m2):

898

"""Composes two matchers by matching if both of them match.

923

"""Composes two matchers by matching if both of them match.

899

924

900

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

925

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

901

"""

926

"""

902

if m1 is None or m2 is None:

927

if m1 is None or m2 is None:

903

return m1 or m2

928

return m1 or m2

904

if m1.always():

929

if m1.always():

905

m = copy.copy(m2)

930

m = copy.copy(m2)

906

# TODO: Consider encapsulating these things in a class so there's only

931

# TODO: Consider encapsulating these things in a class so there's only

907

# one thing to copy from m1.

932

# one thing to copy from m1.

908

m.bad = m1.bad

933

m.bad = m1.bad

909

m.traversedir = m1.traversedir

934

m.traversedir = m1.traversedir

910

return m

935

return m

911

if m2.always():

936

if m2.always():

912

m = copy.copy(m1)

937

m = copy.copy(m1)

913

return m

938

return m

914

return intersectionmatcher(m1, m2)

939

return intersectionmatcher(m1, m2)

915

940

916

941

917

class intersectionmatcher(basematcher):

942

class intersectionmatcher(basematcher):

918

def __init__(self, m1, m2):

943

def __init__(self, m1, m2):

919

super(intersectionmatcher, self).__init__()

944

super(intersectionmatcher, self).__init__()

920

self._m1 = m1

945

self._m1 = m1

921

self._m2 = m2

946

self._m2 = m2

922

self.bad = m1.bad

947

self.bad = m1.bad

923

self.traversedir = m1.traversedir

948

self.traversedir = m1.traversedir

924

949

925

@propertycache

950

@propertycache

926

def _files(self):

951

def _files(self):

927

if self.isexact():

952

if self.isexact():

928

m1, m2 = self._m1, self._m2

953

m1, m2 = self._m1, self._m2

929

if not m1.isexact():

954

if not m1.isexact():

930

m1, m2 = m2, m1

955

m1, m2 = m2, m1

931

return [f for f in m1.files() if m2(f)]

956

return [f for f in m1.files() if m2(f)]

932

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

957

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

933

# the set of files, because their files() are not always files. For

958

# the set of files, because their files() are not always files. For

934

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

959

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

935

# "path:dir2", we don't want to remove "dir2" from the set.

960

# "path:dir2", we don't want to remove "dir2" from the set.

936

return self._m1.files() + self._m2.files()

961

return self._m1.files() + self._m2.files()

937

962

938

def matchfn(self, f):

963

def matchfn(self, f):

939

return self._m1(f) and self._m2(f)

964

return self._m1(f) and self._m2(f)

940

965

941

def visitdir(self, dir):

966

def visitdir(self, dir):

942

visit1 = self._m1.visitdir(dir)

967

visit1 = self._m1.visitdir(dir)

943

if visit1 == b'all':

968

if visit1 == b'all':

944

return self._m2.visitdir(dir)

969

return self._m2.visitdir(dir)

945

# bool() because visit1=True + visit2='all' should not be 'all'

970

# bool() because visit1=True + visit2='all' should not be 'all'

946

return bool(visit1 and self._m2.visitdir(dir))

971

return bool(visit1 and self._m2.visitdir(dir))

947

972

948

def visitchildrenset(self, dir):

973

def visitchildrenset(self, dir):

949

m1_set = self._m1.visitchildrenset(dir)

974

m1_set = self._m1.visitchildrenset(dir)

950

if not m1_set:

975

if not m1_set:

951

return set()

976

return set()

952

m2_set = self._m2.visitchildrenset(dir)

977

m2_set = self._m2.visitchildrenset(dir)

953

if not m2_set:

978

if not m2_set:

954

return set()

979

return set()

955

980

956

if m1_set == b'all':

981

if m1_set == b'all':

957

return m2_set

982

return m2_set

958

elif m2_set == b'all':

983

elif m2_set == b'all':

959

return m1_set

984

return m1_set

960

985

961

if m1_set == b'this' or m2_set == b'this':

986

if m1_set == b'this' or m2_set == b'this':

962

return b'this'

987

return b'this'

963

988

964

assert isinstance(m1_set, set) and isinstance(m2_set, set)

989

assert isinstance(m1_set, set) and isinstance(m2_set, set)

965

return m1_set.intersection(m2_set)

990

return m1_set.intersection(m2_set)

966

991

967

def always(self):

992

def always(self):

968

return self._m1.always() and self._m2.always()

993

return self._m1.always() and self._m2.always()

969

994

970

def isexact(self):

995

def isexact(self):

971

return self._m1.isexact() or self._m2.isexact()

996

return self._m1.isexact() or self._m2.isexact()

972

997

973

@encoding.strmethod

998

@encoding.strmethod

974

def __repr__(self):

999

def __repr__(self):

975

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

1000

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

976

1001

977

1002

978

class subdirmatcher(basematcher):

1003

class subdirmatcher(basematcher):

979

"""Adapt a matcher to work on a subdirectory only.

1004

"""Adapt a matcher to work on a subdirectory only.

980

1005

981

The paths are remapped to remove/insert the path as needed:

1006

The paths are remapped to remove/insert the path as needed:

982

1007

983

>>> from . import pycompat

1008

>>> from . import pycompat

984

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

1009

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)

985

>>> m2 = subdirmatcher(b'sub', m1)

1010

>>> m2 = subdirmatcher(b'sub', m1)

986

>>> m2(b'a.txt')

1011

>>> m2(b'a.txt')

987

False

1012

False

988

>>> m2(b'b.txt')

1013

>>> m2(b'b.txt')

989

True

1014

True

990

>>> m2.matchfn(b'a.txt')

1015

>>> m2.matchfn(b'a.txt')

991

False

1016

False

992

>>> m2.matchfn(b'b.txt')

1017

>>> m2.matchfn(b'b.txt')

993

True

1018

True

994

>>> m2.files()

1019

>>> m2.files()

995

['b.txt']

1020

['b.txt']

996

>>> m2.exact(b'b.txt')

1021

>>> m2.exact(b'b.txt')

997

True

1022

True

998

>>> def bad(f, msg):

1023

>>> def bad(f, msg):

999

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1024

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

1000

>>> m1.bad = bad

1025

>>> m1.bad = bad

1001

>>> m2.bad(b'x.txt', b'No such file')

1026

>>> m2.bad(b'x.txt', b'No such file')

1002

sub/x.txt: No such file

1027

sub/x.txt: No such file

1003

"""

1028

"""

1004

1029

1005

def __init__(self, path, matcher):

1030

def __init__(self, path, matcher):

1006

super(subdirmatcher, self).__init__()

1031

super(subdirmatcher, self).__init__()

1007

self._path = path

1032

self._path = path

1008

self._matcher = matcher

1033

self._matcher = matcher

1009

self._always = matcher.always()

1034

self._always = matcher.always()

1010

1035

1011

self._files = [

1036

self._files = [

1012

f[len(path) + 1 :]

1037

f[len(path) + 1 :]

1013

for f in matcher._files

1038

for f in matcher._files

1014

if f.startswith(path + b"/")

1039

if f.startswith(path + b"/")

1015

]

1040

]

1016

1041

1017

# If the parent repo had a path to this subrepo and the matcher is

1042

# If the parent repo had a path to this subrepo and the matcher is

1018

# a prefix matcher, this submatcher always matches.

1043

# a prefix matcher, this submatcher always matches.

1019

if matcher.prefix():

1044

if matcher.prefix():

1020

self._always = any(f == path for f in matcher._files)

1045

self._always = any(f == path for f in matcher._files)

1021

1046

1022

def bad(self, f, msg):

1047

def bad(self, f, msg):

1023

self._matcher.bad(self._path + b"/" + f, msg)

1048

self._matcher.bad(self._path + b"/" + f, msg)

1024

1049

1025

def matchfn(self, f):

1050

def matchfn(self, f):

1026

# Some information is lost in the superclass's constructor, so we

1051

# Some information is lost in the superclass's constructor, so we

1027

# can not accurately create the matching function for the subdirectory

1052

# can not accurately create the matching function for the subdirectory

1028

# from the inputs. Instead, we override matchfn() and visitdir() to

1053

# from the inputs. Instead, we override matchfn() and visitdir() to

1029

# call the original matcher with the subdirectory path prepended.

1054

# call the original matcher with the subdirectory path prepended.

1030

return self._matcher.matchfn(self._path + b"/" + f)

1055

return self._matcher.matchfn(self._path + b"/" + f)

1031

1056

1032

def visitdir(self, dir):

1057

def visitdir(self, dir):

1033

if dir == b'':

1058

if dir == b'':

1034

dir = self._path

1059

dir = self._path

1035

else:

1060

else:

1036

dir = self._path + b"/" + dir

1061

dir = self._path + b"/" + dir

1037

return self._matcher.visitdir(dir)

1062

return self._matcher.visitdir(dir)

1038

1063

1039

def visitchildrenset(self, dir):

1064

def visitchildrenset(self, dir):

1040

if dir == b'':

1065

if dir == b'':

1041

dir = self._path

1066

dir = self._path

1042

else:

1067

else:

1043

dir = self._path + b"/" + dir

1068

dir = self._path + b"/" + dir

1044

return self._matcher.visitchildrenset(dir)

1069

return self._matcher.visitchildrenset(dir)

1045

1070

1046

def always(self):

1071

def always(self):

1047

return self._always

1072

return self._always

1048

1073

1049

def prefix(self):

1074

def prefix(self):

1050

return self._matcher.prefix() and not self._always

1075

return self._matcher.prefix() and not self._always

1051

1076

1052

@encoding.strmethod

1077

@encoding.strmethod

1053

def __repr__(self):

1078

def __repr__(self):

1054

return b'<subdirmatcher path=%r, matcher=%r>' % (

1079

return b'<subdirmatcher path=%r, matcher=%r>' % (

1055

self._path,

1080

self._path,

1056

self._matcher,

1081

self._matcher,

1057

)

1082

)

1058

1083

1059

1084

1060

class prefixdirmatcher(basematcher):

1085

class prefixdirmatcher(basematcher):

1061

"""Adapt a matcher to work on a parent directory.

1086

"""Adapt a matcher to work on a parent directory.

1062

1087

1063

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1088

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1064

1089

1065

The prefix path should usually be the relative path from the root of

1090

The prefix path should usually be the relative path from the root of

1066

this matcher to the root of the wrapped matcher.

1091

this matcher to the root of the wrapped matcher.

1067

1092

1068

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1093

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1069

>>> m2 = prefixdirmatcher(b'd/e', m1)

1094

>>> m2 = prefixdirmatcher(b'd/e', m1)

1070

>>> m2(b'a.txt')

1095

>>> m2(b'a.txt')

1071

False

1096

False

1072

>>> m2(b'd/e/a.txt')

1097

>>> m2(b'd/e/a.txt')

1073

True

1098

True

1074

>>> m2(b'd/e/b.txt')

1099

>>> m2(b'd/e/b.txt')

1075

False

1100

False

1076

>>> m2.files()

1101

>>> m2.files()

1077

['d/e/a.txt', 'd/e/f/b.txt']

1102

['d/e/a.txt', 'd/e/f/b.txt']

1078

>>> m2.exact(b'd/e/a.txt')

1103

>>> m2.exact(b'd/e/a.txt')

1079

True

1104

True

1080

>>> m2.visitdir(b'd')

1105

>>> m2.visitdir(b'd')

1081

True

1106

True

1082

>>> m2.visitdir(b'd/e')

1107

>>> m2.visitdir(b'd/e')

1083

True

1108

True

1084

>>> m2.visitdir(b'd/e/f')

1109

>>> m2.visitdir(b'd/e/f')

1085

True

1110

True

1086

>>> m2.visitdir(b'd/e/g')

1111

>>> m2.visitdir(b'd/e/g')

1087

False

1112

False

1088

>>> m2.visitdir(b'd/ef')

1113

>>> m2.visitdir(b'd/ef')

1089

False

1114

False

1090

"""

1115

"""

1091

1116

1092

def __init__(self, path, matcher, badfn=None):

1117

def __init__(self, path, matcher, badfn=None):

1093

super(prefixdirmatcher, self).__init__(badfn)

1118

super(prefixdirmatcher, self).__init__(badfn)

1094

if not path:

1119

if not path:

1095

raise error.ProgrammingError(b'prefix path must not be empty')

1120

raise error.ProgrammingError(b'prefix path must not be empty')

1096

self._path = path

1121

self._path = path

1097

self._pathprefix = path + b'/'

1122

self._pathprefix = path + b'/'

1098

self._matcher = matcher

1123

self._matcher = matcher

1099

1124

1100

@propertycache

1125

@propertycache

1101

def _files(self):

1126

def _files(self):

1102

return [self._pathprefix + f for f in self._matcher._files]

1127

return [self._pathprefix + f for f in self._matcher._files]

1103

1128

1104

def matchfn(self, f):

1129

def matchfn(self, f):

1105

if not f.startswith(self._pathprefix):

1130

if not f.startswith(self._pathprefix):

1106

return False

1131

return False

1107

return self._matcher.matchfn(f[len(self._pathprefix) :])

1132

return self._matcher.matchfn(f[len(self._pathprefix) :])

1108

1133

1109

@propertycache

1134

@propertycache

1110

def _pathdirs(self):

1135

def _pathdirs(self):

1111

return set(pathutil.finddirs(self._path))

1136

return set(pathutil.finddirs(self._path))

1112

1137

1113

def visitdir(self, dir):

1138

def visitdir(self, dir):

1114

if dir == self._path:

1139

if dir == self._path:

1115

return self._matcher.visitdir(b'')

1140

return self._matcher.visitdir(b'')

1116

if dir.startswith(self._pathprefix):

1141

if dir.startswith(self._pathprefix):

1117

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1142

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1118

return dir in self._pathdirs

1143

return dir in self._pathdirs

1119

1144

1120

def visitchildrenset(self, dir):

1145

def visitchildrenset(self, dir):

1121

if dir == self._path:

1146

if dir == self._path:

1122

return self._matcher.visitchildrenset(b'')

1147

return self._matcher.visitchildrenset(b'')

1123

if dir.startswith(self._pathprefix):

1148

if dir.startswith(self._pathprefix):

1124

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1149

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1125

if dir in self._pathdirs:

1150

if dir in self._pathdirs:

1126

return b'this'

1151

return b'this'

1127

return set()

1152

return set()

1128

1153

1129

def isexact(self):

1154

def isexact(self):

1130

return self._matcher.isexact()

1155

return self._matcher.isexact()

1131

1156

1132

def prefix(self):

1157

def prefix(self):

1133

return self._matcher.prefix()

1158

return self._matcher.prefix()

1134

1159

1135

@encoding.strmethod

1160

@encoding.strmethod

1136

def __repr__(self):

1161

def __repr__(self):

1137

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1162

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1138

pycompat.bytestr(self._path),

1163

pycompat.bytestr(self._path),

1139

self._matcher,

1164

self._matcher,

1140

)

1165

)

1141

1166

1142

1167

1143

class unionmatcher(basematcher):

1168

class unionmatcher(basematcher):

1144

"""A matcher that is the union of several matchers.

1169

"""A matcher that is the union of several matchers.

1145

1170

1146

The non-matching-attributes (bad, traversedir) are taken from the first

1171

The non-matching-attributes (bad, traversedir) are taken from the first

1147

matcher.

1172

matcher.

1148

"""

1173

"""

1149

1174

1150

def __init__(self, matchers):

1175

def __init__(self, matchers):

1151

m1 = matchers[0]

1176

m1 = matchers[0]

1152

super(unionmatcher, self).__init__()

1177

super(unionmatcher, self).__init__()

1153

self.traversedir = m1.traversedir

1178

self.traversedir = m1.traversedir

1154

self._matchers = matchers

1179

self._matchers = matchers

1155

1180

1156

def matchfn(self, f):

1181

def matchfn(self, f):

1157

for match in self._matchers:

1182

for match in self._matchers:

1158

if match(f):

1183

if match(f):

1159

return True

1184

return True

1160

return False

1185

return False

1161

1186

1162

def visitdir(self, dir):

1187

def visitdir(self, dir):

1163

r = False

1188

r = False

1164

for m in self._matchers:

1189

for m in self._matchers:

1165

v = m.visitdir(dir)

1190

v = m.visitdir(dir)

1166

if v == b'all':

1191

if v == b'all':

1167

return v

1192

return v

1168

r |= v

1193

r |= v

1169

return r

1194

return r

1170

1195

1171

def visitchildrenset(self, dir):

1196

def visitchildrenset(self, dir):

1172

r = set()

1197

r = set()

1173

this = False

1198

this = False

1174

for m in self._matchers:

1199

for m in self._matchers:

1175

v = m.visitchildrenset(dir)

1200

v = m.visitchildrenset(dir)

1176

if not v:

1201

if not v:

1177

continue

1202

continue

1178

if v == b'all':

1203

if v == b'all':

1179

return v

1204

return v

1180

if this or v == b'this':

1205

if this or v == b'this':

1181

this = True

1206

this = True

1182

# don't break, we might have an 'all' in here.

1207

# don't break, we might have an 'all' in here.

1183

continue

1208

continue

1184

assert isinstance(v, set)

1209

assert isinstance(v, set)

1185

r = r.union(v)

1210

r = r.union(v)

1186

if this:

1211

if this:

1187

return b'this'

1212

return b'this'

1188

return r

1213

return r

1189

1214

1190

@encoding.strmethod

1215

@encoding.strmethod

1191

def __repr__(self):

1216

def __repr__(self):

1192

return b'<unionmatcher matchers=%r>' % self._matchers

1217

return b'<unionmatcher matchers=%r>' % self._matchers

1193

1218

1194

1219

1195

def patkind(pattern, default=None):

1220

def patkind(pattern, default=None):

1196

r"""If pattern is 'kind:pat' with a known kind, return kind.

1221

r"""If pattern is 'kind:pat' with a known kind, return kind.

1197

1222

1198

>>> patkind(br're:.*\.c$')

1223

>>> patkind(br're:.*\.c$')

1199

're'

1224

're'

1200

>>> patkind(b'glob:*.c')

1225

>>> patkind(b'glob:*.c')

1201

'glob'

1226

'glob'

1202

>>> patkind(b'relpath:test.py')

1227

>>> patkind(b'relpath:test.py')

1203

'relpath'

1228

'relpath'

1204

>>> patkind(b'main.py')

1229

>>> patkind(b'main.py')

1205

>>> patkind(b'main.py', default=b're')

1230

>>> patkind(b'main.py', default=b're')

1206

're'

1231

're'

1207

"""

1232

"""

1208

return _patsplit(pattern, default)[0]

1233

return _patsplit(pattern, default)[0]

1209

1234

1210

1235

1211

def _patsplit(pattern, default):

1236

def _patsplit(pattern, default):

1212

"""Split a string into the optional pattern kind prefix and the actual

1237

"""Split a string into the optional pattern kind prefix and the actual

1213

pattern."""

1238

pattern."""

1214

if b':' in pattern:

1239

if b':' in pattern:

1215

kind, pat = pattern.split(b':', 1)

1240

kind, pat = pattern.split(b':', 1)

1216

if kind in allpatternkinds:

1241

if kind in allpatternkinds:

1217

return kind, pat

1242

return kind, pat

1218

return default, pattern

1243

return default, pattern

1219

1244

1220

1245

1221

def _globre(pat):

1246

def _globre(pat):

1222

r"""Convert an extended glob string to a regexp string.

1247

r"""Convert an extended glob string to a regexp string.

1223

1248

1224

>>> from . import pycompat

1249

>>> from . import pycompat

1225

>>> def bprint(s):

1250

>>> def bprint(s):

1226

... print(pycompat.sysstr(s))

1251

... print(pycompat.sysstr(s))

1227

>>> bprint(_globre(br'?'))

1252

>>> bprint(_globre(br'?'))

1228

.

1253

.

1229

>>> bprint(_globre(br'*'))

1254

>>> bprint(_globre(br'*'))

1230

[^/]*

1255

[^/]*

1231

>>> bprint(_globre(br'**'))

1256

>>> bprint(_globre(br'**'))

1232

.*

1257

.*

1233

>>> bprint(_globre(br'**/a'))

1258

>>> bprint(_globre(br'**/a'))

1234

(?:.*/)?a

1259

(?:.*/)?a

1235

>>> bprint(_globre(br'a/**/b'))

1260

>>> bprint(_globre(br'a/**/b'))

1236

a/(?:.*/)?b

1261

a/(?:.*/)?b

1237

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1262

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1238

[a*?!^][\^b][^c]

1263

[a*?!^][\^b][^c]

1239

>>> bprint(_globre(br'{a,b}'))

1264

>>> bprint(_globre(br'{a,b}'))

1240

(?:a|b)

1265

(?:a|b)

1241

>>> bprint(_globre(br'.\*\?'))

1266

>>> bprint(_globre(br'.\*\?'))

1242

\.\*\?

1267

\.\*\?

1243

"""

1268

"""

1244

i, n = 0, len(pat)

1269

i, n = 0, len(pat)

1245

res = b''

1270

res = b''

1246

group = 0

1271

group = 0

1247

escape = util.stringutil.regexbytesescapemap.get

1272

escape = util.stringutil.regexbytesescapemap.get

1248

1273

1249

def peek():

1274

def peek():

1250

return i < n and pat[i : i + 1]

1275

return i < n and pat[i : i + 1]

1251

1276

1252

while i < n:

1277

while i < n:

1253

c = pat[i : i + 1]

1278

c = pat[i : i + 1]

1254

i += 1

1279

i += 1

1255

if c not in b'*?[{},\\':

1280

if c not in b'*?[{},\\':

1256

res += escape(c, c)

1281

res += escape(c, c)

1257

elif c == b'*':

1282

elif c == b'*':

1258

if peek() == b'*':

1283

if peek() == b'*':

1259

i += 1

1284

i += 1

1260

if peek() == b'/':

1285

if peek() == b'/':

1261

i += 1

1286

i += 1

1262

res += b'(?:.*/)?'

1287

res += b'(?:.*/)?'

1263

else:

1288

else:

1264

res += b'.*'

1289

res += b'.*'

1265

else:

1290

else:

1266

res += b'[^/]*'

1291

res += b'[^/]*'

1267

elif c == b'?':

1292

elif c == b'?':

1268

res += b'.'

1293

res += b'.'

1269

elif c == b'[':

1294

elif c == b'[':

1270

j = i

1295

j = i

1271

if j < n and pat[j : j + 1] in b'!]':

1296

if j < n and pat[j : j + 1] in b'!]':

1272

j += 1

1297

j += 1

1273

while j < n and pat[j : j + 1] != b']':

1298

while j < n and pat[j : j + 1] != b']':

1274

j += 1

1299

j += 1

1275

if j >= n:

1300

if j >= n:

1276

res += b'\\['

1301

res += b'\\['

1277

else:

1302

else:

1278

stuff = pat[i:j].replace(b'\\', b'\\\\')

1303

stuff = pat[i:j].replace(b'\\', b'\\\\')

1279

i = j + 1

1304

i = j + 1

1280

if stuff[0:1] == b'!':

1305

if stuff[0:1] == b'!':

1281

stuff = b'^' + stuff[1:]

1306

stuff = b'^' + stuff[1:]

1282

elif stuff[0:1] == b'^':

1307

elif stuff[0:1] == b'^':

1283

stuff = b'\\' + stuff

1308

stuff = b'\\' + stuff

1284

res = b'%s[%s]' % (res, stuff)

1309

res = b'%s[%s]' % (res, stuff)

1285

elif c == b'{':

1310

elif c == b'{':

1286

group += 1

1311

group += 1

1287

res += b'(?:'

1312

res += b'(?:'

1288

elif c == b'}' and group:

1313

elif c == b'}' and group:

1289

res += b')'

1314

res += b')'

1290

group -= 1

1315

group -= 1

1291

elif c == b',' and group:

1316

elif c == b',' and group:

1292

res += b'|'

1317

res += b'|'

1293

elif c == b'\\':

1318

elif c == b'\\':

1294

p = peek()

1319

p = peek()

1295

if p:

1320

if p:

1296

i += 1

1321

i += 1

1297

res += escape(p, p)

1322

res += escape(p, p)

1298

else:

1323

else:

1299

res += escape(c, c)

1324

res += escape(c, c)

1300

else:

1325

else:

1301

res += escape(c, c)

1326

res += escape(c, c)

1302

return res

1327

return res

1303

1328

1304

1329

1305

def _regex(kind, pat, globsuffix):

1330

def _regex(kind, pat, globsuffix):

1306

"""Convert a (normalized) pattern of any kind into a

1331

"""Convert a (normalized) pattern of any kind into a

1307

regular expression.

1332

regular expression.

1308

globsuffix is appended to the regexp of globs."""

1333

globsuffix is appended to the regexp of globs."""

1309

if not pat and kind in (b'glob', b'relpath'):

1334

if not pat and kind in (b'glob', b'relpath'):

1310

return b''

1335

return b''

1311

if kind == b're':

1336

if kind == b're':

1312

return pat

1337

return pat

1313

if kind in (b'path', b'relpath'):

1338

if kind in (b'path', b'relpath'):

1314

if pat == b'.':

1339

if pat == b'.':

1315

return b''

1340

return b''

1316

return util.stringutil.reescape(pat) + b'(?:/|$)'

1341

return util.stringutil.reescape(pat) + b'(?:/|$)'

1317

if kind == b'rootfilesin':

1342

if kind == b'rootfilesin':

1318

if pat == b'.':

1343

if pat == b'.':

1319

escaped = b''

1344

escaped = b''

1320

else:

1345

else:

1321

# Pattern is a directory name.

1346

# Pattern is a directory name.

1322

escaped = util.stringutil.reescape(pat) + b'/'

1347

escaped = util.stringutil.reescape(pat) + b'/'

1323

# Anything after the pattern must be a non-directory.

1348

# Anything after the pattern must be a non-directory.

1324

return escaped + b'[^/]+$'

1349

return escaped + b'[^/]+$'

1325

if kind == b'relglob':

1350

if kind == b'relglob':

1326

globre = _globre(pat)

1351

globre = _globre(pat)

1327

if globre.startswith(b'[^/]*'):

1352

if globre.startswith(b'[^/]*'):

1328

# When pat has the form *XYZ (common), make the returned regex more

1353

# When pat has the form *XYZ (common), make the returned regex more

1329

# legible by returning the regex for **XYZ instead of **/*XYZ.

1354

# legible by returning the regex for **XYZ instead of **/*XYZ.

1330

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1355

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1331

return b'(?:|.*/)' + globre + globsuffix

1356

return b'(?:|.*/)' + globre + globsuffix

1332

if kind == b'relre':

1357

if kind == b'relre':

1333

if pat.startswith(b'^'):

1358

if pat.startswith(b'^'):

1334

return pat

1359

return pat

1335

return b'.*' + pat

1360

return b'.*' + pat

1336

if kind in (b'glob', b'rootglob'):

1361

if kind in (b'glob', b'rootglob'):

1337

return _globre(pat) + globsuffix

1362

return _globre(pat) + globsuffix

1338

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1363

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1339

1364

1340

1365

1341

def _buildmatch(kindpats, globsuffix, root):

1366

def _buildmatch(kindpats, globsuffix, root):

1342

"""Return regexp string and a matcher function for kindpats.

1367

"""Return regexp string and a matcher function for kindpats.

1343

globsuffix is appended to the regexp of globs."""

1368

globsuffix is appended to the regexp of globs."""

1344

matchfuncs = []

1369

matchfuncs = []

1345

1370

1346

subincludes, kindpats = _expandsubinclude(kindpats, root)

1371

subincludes, kindpats = _expandsubinclude(kindpats, root)

1347

if subincludes:

1372

if subincludes:

1348

submatchers = {}

1373

submatchers = {}

1349

1374

1350

def matchsubinclude(f):

1375

def matchsubinclude(f):

1351

for prefix, matcherargs in subincludes:

1376

for prefix, matcherargs in subincludes:

1352

if f.startswith(prefix):

1377

if f.startswith(prefix):

1353

mf = submatchers.get(prefix)

1378

mf = submatchers.get(prefix)

1354

if mf is None:

1379

if mf is None:

1355

mf = match(*matcherargs)

1380

mf = match(*matcherargs)

1356

submatchers[prefix] = mf

1381

submatchers[prefix] = mf

1357

1382

1358

if mf(f[len(prefix) :]):

1383

if mf(f[len(prefix) :]):

1359

return True

1384

return True

1360

return False

1385

return False

1361

1386

1362

matchfuncs.append(matchsubinclude)

1387

matchfuncs.append(matchsubinclude)

1363

1388

1364

regex = b''

1389

regex = b''

1365

if kindpats:

1390

if kindpats:

1366

if all(k == b'rootfilesin' for k, p, s in kindpats):

1391

if all(k == b'rootfilesin' for k, p, s in kindpats):

1367

dirs = {p for k, p, s in kindpats}

1392

dirs = {p for k, p, s in kindpats}

1368

1393

1369

def mf(f):

1394

def mf(f):

1370

i = f.rfind(b'/')

1395

i = f.rfind(b'/')

1371

if i >= 0:

1396

if i >= 0:

1372

dir = f[:i]

1397

dir = f[:i]

1373

else:

1398

else:

1374

dir = b'.'

1399

dir = b'.'

1375

return dir in dirs

1400

return dir in dirs

1376

1401

1377

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1402

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1378

matchfuncs.append(mf)

1403

matchfuncs.append(mf)

1379

else:

1404

else:

1380

regex, mf = _buildregexmatch(kindpats, globsuffix)

1405

regex, mf = _buildregexmatch(kindpats, globsuffix)

1381

matchfuncs.append(mf)

1406

matchfuncs.append(mf)

1382

1407

1383

if len(matchfuncs) == 1:

1408

if len(matchfuncs) == 1:

1384

return regex, matchfuncs[0]

1409

return regex, matchfuncs[0]

1385

else:

1410

else:

1386

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1411

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1387

1412

1388

1413

1389

MAX_RE_SIZE = 20000

1414

MAX_RE_SIZE = 20000

1390

1415

1391

1416

1392

def _joinregexes(regexps):

1417

def _joinregexes(regexps):

1393

"""gather multiple regular expressions into a single one"""

1418

"""gather multiple regular expressions into a single one"""

1394

return b'|'.join(regexps)

1419

return b'|'.join(regexps)

1395

1420

1396

1421

1397

def _buildregexmatch(kindpats, globsuffix):

1422

def _buildregexmatch(kindpats, globsuffix):

1398

"""Build a match function from a list of kinds and kindpats,

1423

"""Build a match function from a list of kinds and kindpats,

1399

return regexp string and a matcher function.

1424

return regexp string and a matcher function.

1400

1425

1401

Test too large input

1426

Test too large input

1402

>>> _buildregexmatch([

1427

>>> _buildregexmatch([

1403

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1428

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1404

... ], b'$')

1429

... ], b'$')

1405

Traceback (most recent call last):

1430

Traceback (most recent call last):

1406

...

1431

...

1407

Abort: matcher pattern is too long (20009 bytes)

1432

Abort: matcher pattern is too long (20009 bytes)

1408

"""

1433

"""

1409

try:

1434

try:

1410

allgroups = []

1435

allgroups = []

1411

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1436

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1412

fullregexp = _joinregexes(regexps)

1437

fullregexp = _joinregexes(regexps)

1413

1438

1414

startidx = 0

1439

startidx = 0

1415

groupsize = 0

1440

groupsize = 0

1416

for idx, r in enumerate(regexps):

1441

for idx, r in enumerate(regexps):

1417

piecesize = len(r)

1442

piecesize = len(r)

1418

if piecesize > MAX_RE_SIZE:

1443

if piecesize > MAX_RE_SIZE:

1419

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1444

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1420

raise error.Abort(msg)

1445

raise error.Abort(msg)

1421

elif (groupsize + piecesize) > MAX_RE_SIZE:

1446

elif (groupsize + piecesize) > MAX_RE_SIZE:

1422

group = regexps[startidx:idx]

1447

group = regexps[startidx:idx]

1423

allgroups.append(_joinregexes(group))

1448

allgroups.append(_joinregexes(group))

1424

startidx = idx

1449

startidx = idx

1425

groupsize = 0

1450

groupsize = 0

1426

groupsize += piecesize + 1

1451

groupsize += piecesize + 1

1427

1452

1428

if startidx == 0:

1453

if startidx == 0:

1429

matcher = _rematcher(fullregexp)

1454

matcher = _rematcher(fullregexp)

1430

func = lambda s: bool(matcher(s))

1455

func = lambda s: bool(matcher(s))

1431

else:

1456

else:

1432

group = regexps[startidx:]

1457

group = regexps[startidx:]

1433

allgroups.append(_joinregexes(group))

1458

allgroups.append(_joinregexes(group))

1434

allmatchers = [_rematcher(g) for g in allgroups]

1459

allmatchers = [_rematcher(g) for g in allgroups]

1435

func = lambda s: any(m(s) for m in allmatchers)

1460

func = lambda s: any(m(s) for m in allmatchers)

1436

return fullregexp, func

1461

return fullregexp, func

1437

except re.error:

1462

except re.error:

1438

for k, p, s in kindpats:

1463

for k, p, s in kindpats:

1439

try:

1464

try:

1440

_rematcher(_regex(k, p, globsuffix))

1465

_rematcher(_regex(k, p, globsuffix))

1441

except re.error:

1466

except re.error:

1442

if s:

1467

if s:

1443

raise error.Abort(

1468

raise error.Abort(

1444

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1469

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1445

)

1470

)

1446

else:

1471

else:

1447

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1472

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1448

raise error.Abort(_(b"invalid pattern"))

1473

raise error.Abort(_(b"invalid pattern"))

1449

1474

1450

1475

1451

def _patternrootsanddirs(kindpats):

1476

def _patternrootsanddirs(kindpats):

1452

"""Returns roots and directories corresponding to each pattern.

1477

"""Returns roots and directories corresponding to each pattern.

1453

1478

1454

This calculates the roots and directories exactly matching the patterns and

1479

This calculates the roots and directories exactly matching the patterns and

1455

returns a tuple of (roots, dirs) for each. It does not return other

1480

returns a tuple of (roots, dirs) for each. It does not return other

1456

directories which may also need to be considered, like the parent

1481

directories which may also need to be considered, like the parent

1457

directories.

1482

directories.

1458

"""

1483

"""

1459

r = []

1484

r = []

1460

d = []

1485

d = []

1461

for kind, pat, source in kindpats:

1486

for kind, pat, source in kindpats:

1462

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1487

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1463

root = []

1488

root = []

1464

for p in pat.split(b'/'):

1489

for p in pat.split(b'/'):

1465

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1490

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1466

break

1491

break

1467

root.append(p)

1492

root.append(p)

1468

r.append(b'/'.join(root))

1493

r.append(b'/'.join(root))

1469

elif kind in (b'relpath', b'path'):

1494

elif kind in (b'relpath', b'path'):

1470

if pat == b'.':

1495

if pat == b'.':

1471

pat = b''

1496

pat = b''

1472

r.append(pat)

1497

r.append(pat)

1473

elif kind in (b'rootfilesin',):

1498

elif kind in (b'rootfilesin',):

1474

if pat == b'.':

1499

if pat == b'.':

1475

pat = b''

1500

pat = b''

1476

d.append(pat)

1501

d.append(pat)

1477

else: # relglob, re, relre

1502

else: # relglob, re, relre

1478

r.append(b'')

1503

r.append(b'')

1479

return r, d

1504

return r, d

1480

1505

1481

1506

1482

def _roots(kindpats):

1507

def _roots(kindpats):

1483

'''Returns root directories to match recursively from the given patterns.'''

1508

'''Returns root directories to match recursively from the given patterns.'''

1484

roots, dirs = _patternrootsanddirs(kindpats)

1509

roots, dirs = _patternrootsanddirs(kindpats)

1485

return roots

1510

return roots

1486

1511

1487

1512

1488

def _rootsdirsandparents(kindpats):

1513

def _rootsdirsandparents(kindpats):

1489

"""Returns roots and exact directories from patterns.

1514

"""Returns roots and exact directories from patterns.

1490

1515

1491

`roots` are directories to match recursively, `dirs` should

1516

`roots` are directories to match recursively, `dirs` should

1492

be matched non-recursively, and `parents` are the implicitly required

1517

be matched non-recursively, and `parents` are the implicitly required

1493

directories to walk to items in either roots or dirs.

1518

directories to walk to items in either roots or dirs.

1494

1519

1495

Returns a tuple of (roots, dirs, parents).

1520

Returns a tuple of (roots, dirs, parents).

1496

1521

1497

>>> r = _rootsdirsandparents(

1522

>>> r = _rootsdirsandparents(

1498

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1523

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1499

... (b'glob', b'g*', b'')])

1524

... (b'glob', b'g*', b'')])

1500

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1525

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1501

(['g/h', 'g/h', ''], []) ['', 'g']

1526

(['g/h', 'g/h', ''], []) ['', 'g']

1502

>>> r = _rootsdirsandparents(

1527

>>> r = _rootsdirsandparents(

1503

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1528

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1504

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1529

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1505

([], ['g/h', '']) ['', 'g']

1530

([], ['g/h', '']) ['', 'g']

1506

>>> r = _rootsdirsandparents(

1531

>>> r = _rootsdirsandparents(

1507

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1532

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1508

... (b'path', b'', b'')])

1533

... (b'path', b'', b'')])

1509

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1534

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1510

(['r', 'p/p', ''], []) ['', 'p']

1535

(['r', 'p/p', ''], []) ['', 'p']

1511

>>> r = _rootsdirsandparents(

1536

>>> r = _rootsdirsandparents(

1512

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1537

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1513

... (b'relre', b'rr', b'')])

1538

... (b'relre', b'rr', b'')])

1514

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1539

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1515

(['', '', ''], []) ['']

1540

(['', '', ''], []) ['']

1516

"""

1541

"""

1517

r, d = _patternrootsanddirs(kindpats)

1542

r, d = _patternrootsanddirs(kindpats)

1518

1543

1519

p = set()

1544

p = set()

1520

# Add the parents as non-recursive/exact directories, since they must be

1545

# Add the parents as non-recursive/exact directories, since they must be

1521

# scanned to get to either the roots or the other exact directories.

1546

# scanned to get to either the roots or the other exact directories.

1522

p.update(pathutil.dirs(d))

1547

p.update(pathutil.dirs(d))

1523

p.update(pathutil.dirs(r))

1548

p.update(pathutil.dirs(r))

1524

1549

1525

# FIXME: all uses of this function convert these to sets, do so before

1550

# FIXME: all uses of this function convert these to sets, do so before

1526

# returning.

1551

# returning.

1527

# FIXME: all uses of this function do not need anything in 'roots' and

1552

# FIXME: all uses of this function do not need anything in 'roots' and

1528

# 'dirs' to also be in 'parents', consider removing them before returning.

1553

# 'dirs' to also be in 'parents', consider removing them before returning.

1529

return r, d, p

1554

return r, d, p

1530

1555

1531

1556

1532

def _explicitfiles(kindpats):

1557

def _explicitfiles(kindpats):

1533

"""Returns the potential explicit filenames from the patterns.

1558

"""Returns the potential explicit filenames from the patterns.

1534

1559

1535

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1560

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1536

['foo/bar']

1561

['foo/bar']

1537

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1562

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1538

[]

1563

[]

1539

"""

1564

"""

1540

# Keep only the pattern kinds where one can specify filenames (vs only

1565

# Keep only the pattern kinds where one can specify filenames (vs only

1541

# directory names).

1566

# directory names).

1542

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1567

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1543

return _roots(filable)

1568

return _roots(filable)

1544

1569

1545

1570

1546

def _prefix(kindpats):

1571

def _prefix(kindpats):

1547

'''Whether all the patterns match a prefix (i.e. recursively)'''

1572

'''Whether all the patterns match a prefix (i.e. recursively)'''

1548

for kind, pat, source in kindpats:

1573

for kind, pat, source in kindpats:

1549

if kind not in (b'path', b'relpath'):

1574

if kind not in (b'path', b'relpath'):

1550

return False

1575

return False

1551

return True

1576

return True

1552

1577

1553

1578

1554

_commentre = None

1579

_commentre = None

1555

1580

1556

1581

1557

def readpatternfile(filepath, warn, sourceinfo=False):

1582

def readpatternfile(filepath, warn, sourceinfo=False):

1558

"""parse a pattern file, returning a list of

1583

"""parse a pattern file, returning a list of

1559

patterns. These patterns should be given to compile()

1584

patterns. These patterns should be given to compile()

1560

to be validated and converted into a match function.

1585

to be validated and converted into a match function.

1561

1586

1562

trailing white space is dropped.

1587

trailing white space is dropped.

1563

the escape character is backslash.

1588

the escape character is backslash.

1564

comments start with #.

1589

comments start with #.

1565

empty lines are skipped.

1590

empty lines are skipped.

1566

1591

1567

lines can be of the following formats:

1592

lines can be of the following formats:

1568

1593

1569

syntax: regexp # defaults following lines to non-rooted regexps

1594

syntax: regexp # defaults following lines to non-rooted regexps

1570

syntax: glob # defaults following lines to non-rooted globs

1595

syntax: glob # defaults following lines to non-rooted globs

1571

re:pattern # non-rooted regular expression

1596

re:pattern # non-rooted regular expression

1572

glob:pattern # non-rooted glob

1597

glob:pattern # non-rooted glob

1573

rootglob:pat # rooted glob (same root as ^ in regexps)

1598

rootglob:pat # rooted glob (same root as ^ in regexps)

1574

pattern # pattern of the current default type

1599

pattern # pattern of the current default type

1575

1600

1576

if sourceinfo is set, returns a list of tuples:

1601

if sourceinfo is set, returns a list of tuples:

1577

(pattern, lineno, originalline).

1602

(pattern, lineno, originalline).

1578

This is useful to debug ignore patterns.

1603

This is useful to debug ignore patterns.

1579

"""

1604

"""

1580

1605

1581

syntaxes = {

1606

syntaxes = {

1582

b're': b'relre:',

1607

b're': b'relre:',

1583

b'regexp': b'relre:',

1608

b'regexp': b'relre:',

1584

b'glob': b'relglob:',

1609

b'glob': b'relglob:',

1585

b'rootglob': b'rootglob:',

1610

b'rootglob': b'rootglob:',

1586

b'include': b'include',

1611

b'include': b'include',

1587

b'subinclude': b'subinclude',

1612

b'subinclude': b'subinclude',

1588

}

1613

}

1589

syntax = b'relre:'

1614

syntax = b'relre:'

1590

patterns = []

1615

patterns = []

1591

1616

1592

fp = open(filepath, b'rb')

1617

fp = open(filepath, b'rb')

1593

for lineno, line in enumerate(util.iterfile(fp), start=1):

1618

for lineno, line in enumerate(util.iterfile(fp), start=1):

1594

if b"#" in line:

1619

if b"#" in line:

1595

global _commentre

1620

global _commentre

1596

if not _commentre:

1621

if not _commentre:

1597

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1622

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1598

# remove comments prefixed by an even number of escapes

1623

# remove comments prefixed by an even number of escapes

1599

m = _commentre.search(line)

1624

m = _commentre.search(line)

1600

if m:

1625

if m:

1601

line = line[: m.end(1)]

1626

line = line[: m.end(1)]

1602

# fixup properly escaped comments that survived the above

1627

# fixup properly escaped comments that survived the above

1603

line = line.replace(b"\\#", b"#")

1628

line = line.replace(b"\\#", b"#")

1604

line = line.rstrip()

1629

line = line.rstrip()

1605

if not line:

1630

if not line:

1606

continue

1631

continue

1607

1632

1608

if line.startswith(b'syntax:'):

1633

if line.startswith(b'syntax:'):

1609

s = line[7:].strip()

1634

s = line[7:].strip()

1610

try:

1635

try:

1611

syntax = syntaxes[s]

1636

syntax = syntaxes[s]

1612

except KeyError:

1637

except KeyError:

1613

if warn:

1638

if warn:

1614

warn(

1639

warn(

1615

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1640

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1616

)

1641

)

1617

continue

1642

continue

1618

1643

1619

linesyntax = syntax

1644

linesyntax = syntax

1620

for s, rels in pycompat.iteritems(syntaxes):

1645

for s, rels in pycompat.iteritems(syntaxes):

1621

if line.startswith(rels):

1646

if line.startswith(rels):

1622

linesyntax = rels

1647

linesyntax = rels

1623

line = line[len(rels) :]

1648

line = line[len(rels) :]

1624

break

1649

break

1625

elif line.startswith(s + b':'):

1650

elif line.startswith(s + b':'):

1626

linesyntax = rels

1651

linesyntax = rels

1627

line = line[len(s) + 1 :]

1652

line = line[len(s) + 1 :]

1628

break

1653

break

1629

if sourceinfo:

1654

if sourceinfo:

1630

patterns.append((linesyntax + line, lineno, line))

1655

patterns.append((linesyntax + line, lineno, line))

1631

else:

1656

else:

1632

patterns.append(linesyntax + line)

1657

patterns.append(linesyntax + line)

1633

fp.close()

1658

fp.close()

1634

return patterns

1659

return patterns

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
+            import bisect
             import copy
             import itertools
             import os
             import re
             from .i18n import _
             from .pycompat import open
             from . import (
                 encoding,
                 error,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             rustmod = policy.importrust('dirstate')
             allpatternkinds = (
                 b're',
                 b'glob',
                 b'path',
                 b'relglob',
                 b'relpath',
                 b'relre',
                 b'rootglob',
                 b'listfile',
                 b'listfile0',
                 b'set',
                 b'include',
                 b'subinclude',
                 b'rootfilesin',
             )
             cwdrelativepatternkinds = (b'relpath', b'glob')
             propertycache = util.propertycache
             def _rematcher(regex):
                 """compile the regexp with the best available regexp engine and return a
                 matcher function"""
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
                 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
                 matchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'set':
                         if ctx is None:
                             raise error.ProgrammingError(
                                 b"fileset expression with no context"
                             )
                         matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
                         if listsubrepos:
                             for subpath in ctx.substate:
                                 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
                                 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
                                 matchers.append(pm)
                         continue
                     other.append((kind, pat, source))
                 return matchers, other
             def _expandsubinclude(kindpats, root):
                 """Returns the list of subinclude matcher args and the kindpats without the
                 subincludes in it."""
                 relmatchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'subinclude':
                         sourceroot = pathutil.dirname(util.normpath(source))
                         pat = util.pconvert(pat)
                         path = pathutil.join(sourceroot, pat)
                         newroot = pathutil.dirname(path)
                         matcherargs = (newroot, b'', [], [b'include:%s' % path])
                         prefix = pathutil.canonpath(root, root, newroot)
                         if prefix:
                             prefix += b'/'
                         relmatchers.append((prefix, matcherargs))
                     else:
                         other.append((kind, pat, source))
                 return relmatchers, other
             def _kindpatsalwaysmatch(kindpats):
                 """Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat, source in kindpats:
                     if pat != b'' or kind not in [b'relpath', b'glob']:
                         return False
                 return True
             def _buildkindpatsmatcher(
                 matchercls,
                 root,
                 cwd,
                 kindpats,
                 ctx=None,
                 listsubrepos=False,
                 badfn=None,
             ):
                 matchers = []
                 fms, kindpats = _expandsets(
                     cwd,
                     kindpats,
                     ctx=ctx,
                     listsubrepos=listsubrepos,
                     badfn=badfn,
                 )
                 if kindpats:
                     m = matchercls(root, kindpats, badfn=badfn)
                     matchers.append(m)
                 if fms:
                     matchers.extend(fms)
                 if not matchers:
                     return nevermatcher(badfn=badfn)
                 if len(matchers) == 1:
                     return matchers[0]
                 return unionmatcher(matchers)
             def match(
                 root,
                 cwd,
                 patterns=None,
                 include=None,
                 exclude=None,
                 default=b'glob',
                 auditor=None,
                 ctx=None,
                 listsubrepos=False,
                 warn=None,
                 badfn=None,
                 icasefs=False,
             ):
                 r"""build an object to match a set of file patterns
                 arguments:
                 root - the canonical root of the tree you're matching against
                 cwd - the current working directory, if relevant
                 patterns - patterns to find
                 include - patterns to include (unless they are excluded)
                 exclude - patterns to exclude (even if they are included)
                 default - if a pattern in patterns has no explicit type, assume this one
                 auditor - optional path auditor
                 ctx - optional changecontext
                 listsubrepos - if True, recurse into subrepositories
                 warn - optional function used for printing warnings
                 badfn - optional bad() callback for this matcher instead of the default
                 icasefs - make a matcher for wdir on case insensitive filesystems, which
                     normalizes the given patterns to the case in the filesystem
                 a pattern is one of:
                 'glob:<glob>' - a glob relative to cwd
                 're:<regexp>' - a regular expression
                 'path:<path>' - a path relative to repository root, which is matched
                                 recursively
                 'rootfilesin:<path>' - a path relative to repository root, which is
                                 matched non-recursively (will not match subdirectories)
                 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                 'relpath:<path>' - a path relative to cwd
                 'relre:<regexp>' - a regexp that needn't match the start of a name
                 'set:<fileset>' - a fileset expression
                 'include:<path>' - a file of patterns to read and include
                 'subinclude:<path>' - a file of patterns to match against files under
                                       the same directory
                 '<something>' - a pattern of the specified default type
                 >>> def _match(root, *args, **kwargs):
                 ...     return match(util.localpath(root), *args, **kwargs)
                 Usually a patternmatcher is returned:
                 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
                 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
                 Combining 'patterns' with 'include' (resp. 'exclude') gives an
                 intersectionmatcher (resp. a differencematcher):
                 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
                 <class 'mercurial.match.intersectionmatcher'>
                 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
                 <class 'mercurial.match.differencematcher'>
                 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
                 >>> _match(b'/foo', b'.', [])
                 <alwaysmatcher>
                 The 'default' argument determines which kind of pattern is assumed if a
                 pattern has no prefix:
                 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
                 <patternmatcher patterns='.*\\.c$'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
                 <patternmatcher patterns='main\\.py(?:/|$)'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
                 <patternmatcher patterns='main.py'>
                 The primary use of matchers is to check whether a value (usually a file
                 name) matches againset one of the patterns given at initialization. There
                 are two ways of doing this check.
                 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
 . Calling the matcher with a file name returns True if any pattern
                 matches that file name:
                 >>> m(b'a')
                 True
                 >>> m(b'main.c')
                 True
                 >>> m(b'test.py')
                 False
 . Using the exact() method only returns True if the file name matches one
                 of the exact patterns (i.e. not re: or glob: patterns):
                 >>> m.exact(b'a')
                 True
                 >>> m.exact(b'main.c')
                 False
                 """
                 assert os.path.isabs(root)
                 cwd = os.path.join(root, util.localpath(cwd))
                 normalize = _donormalize
                 if icasefs:
                     dirstate = ctx.repo().dirstate
                     dsnormalize = dirstate.normalize
                     def normalize(patterns, default, root, cwd, auditor, warn):
                         kp = _donormalize(patterns, default, root, cwd, auditor, warn)
                         kindpats = []
                         for kind, pats, source in kp:
                             if kind not in (b're', b'relre'):  # regex can't be normalized
                                 p = pats
                                 pats = dsnormalize(pats)
                                 # Preserve the original to handle a case only rename.
                                 if p != pats and p in dirstate:
                                     kindpats.append((kind, p, source))
                             kindpats.append((kind, pats, source))
                         return kindpats
                 if patterns:
                     kindpats = normalize(patterns, default, root, cwd, auditor, warn)
                     if _kindpatsalwaysmatch(kindpats):
                         m = alwaysmatcher(badfn)
                     else:
                         m = _buildkindpatsmatcher(
                             patternmatcher,
                             root,
                             cwd,
                             kindpats,
                             ctx=ctx,
                             listsubrepos=listsubrepos,
                             badfn=badfn,
                         )
                 else:
                     # It's a little strange that no patterns means to match everything.
                     # Consider changing this to match nothing (probably using nevermatcher).
                     m = alwaysmatcher(badfn)
                 if include:
                     kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
                     im = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = intersectmatchers(m, im)
                 if exclude:
                     kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
                     em = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         cwd,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = differencematcher(m, em)
                 return m
             def exact(files, badfn=None):
                 return exactmatcher(files, badfn=badfn)
             def always(badfn=None):
                 return alwaysmatcher(badfn)
             def never(badfn=None):
                 return nevermatcher(badfn)
             def badmatch(match, badfn):
                 """Make a copy of the given matcher, replacing its bad method with the given
                 one.
                 """
                 m = copy.copy(match)
                 m.bad = badfn
                 return m
             def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
                 """Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded."""
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in cwdrelativepatternkinds:
                         pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
                     elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
                         pat = util.normpath(pat)
                     elif kind in (b'listfile', b'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == b'listfile0':
                                 files = files.split(b'\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise error.Abort(_(b"unable to read file list (%s)") % pat)
                         for k, p, source in _donormalize(
                             files, default, root, cwd, auditor, warn
                         ):
                             kindpats.append((k, p, pat))
                         continue
                     elif kind == b'include':
                         try:
                             fullpath = os.path.join(root, util.localpath(pat))
                             includepats = readpatternfile(fullpath, warn)
                             for k, p, source in _donormalize(
                                 includepats, default, root, cwd, auditor, warn
                             ):
                                 kindpats.append((k, p, source or pat))
                         except error.Abort as inst:
                             raise error.Abort(
                                 b'%s: %s'
                                 % (
                                     pat,
                                     inst.message,
                                 )  # pytype: disable=unsupported-operands
                             )
                         except IOError as inst:
                             if warn:
                                 warn(
                                     _(b"skipping unreadable pattern file '%s': %s\n")
                                     % (pat, stringutil.forcebytestr(inst.strerror))
                                 )
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat, b''))
                 return kindpats
             class basematcher(object):
                 def __init__(self, badfn=None):
                     if badfn is not None:
                         self.bad = badfn
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     """Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message."""
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 @propertycache
                 def _files(self):
                     return []
                 def files(self):
                     """Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots"""
                     return self._files
                 @propertycache
                 def _fileset(self):
                     return set(self._files)
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fileset
                 def matchfn(self, f):
                     return False
                 def visitdir(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories. This is
                     based on the match's primary, included, and excluded patterns.
                     Returns the string 'all' if the given directory and all subdirectories
                     should be visited. Otherwise returns True or False indicating whether
                     the given directory should be visited.
                     """
                     return True
                 def visitchildrenset(self, dir):
                     """Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories, and
                     potentially lists which subdirectories of that directory should be
                     visited. This is based on the match's primary, included, and excluded
                     patterns.
                     This function is very similar to 'visitdir', and the following mapping
                     can be applied:
                          visitdir | visitchildrenlist
                         ----------+-------------------
                          False    | set()
                          'all'    | 'all'
                          True     | 'this' OR non-empty set of subdirs -or files- to visit
                     Example:
                       Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
                       the following values (assuming the implementation of visitchildrenset
                       is capable of recognizing this; some implementations are not).
                       '' -> {'foo', 'qux'}
                       'baz' -> set()
                       'foo' -> {'bar'}
                       # Ideally this would be 'all', but since the prefix nature of matchers
                       # is applied to the entire matcher, we have to downgrade this to
                       # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
                       # in.
                       'foo/bar' -> 'this'
                       'qux' -> 'this'
                     Important:
                       Most matchers do not know if they're representing files or
                       directories. They see ['path:dir/f'] and don't know whether 'f' is a
                       file or a directory, so visitchildrenset('dir') for most matchers will
                       return {'f'}, but if the matcher knows it's a file (like exactmatcher
                       does), it may return 'this'. Do not rely on the return being a set
                       indicating that there are no files in this dir to investigate (or
                       equivalently that if there are files to investigate in 'dir' that it
                       will always return 'this').
                     """
                     return b'this'
                 def always(self):
                     """Matcher will match everything and .files() will be empty --
                     optimization might be possible."""
                     return False
                 def isexact(self):
                     """Matcher will match exactly the list of files in .files() --
                     optimization might be possible."""
                     return False
                 def prefix(self):
                     """Matcher will match the paths in .files() recursively --
                     optimization might be possible."""
                     return False
                 def anypats(self):
                     """None of .always(), .isexact(), and .prefix() is true --
                     optimizations will be difficult."""
                     return not self.always() and not self.isexact() and not self.prefix()
             class alwaysmatcher(basematcher):
                 '''Matches everything.'''
                 def __init__(self, badfn=None):
                     super(alwaysmatcher, self).__init__(badfn)
                 def always(self):
                     return True
                 def matchfn(self, f):
                     return True
                 def visitdir(self, dir):
                     return b'all'
                 def visitchildrenset(self, dir):
                     return b'all'
                 def __repr__(self):
                     return r'<alwaysmatcher>'
             class nevermatcher(basematcher):
                 '''Matches nothing.'''
                 def __init__(self, badfn=None):
                     super(nevermatcher, self).__init__(badfn)
                 # It's a little weird to say that the nevermatcher is an exact matcher
                 # or a prefix matcher, but it seems to make sense to let callers take
                 # fast paths based on either. There will be no exact matches, nor any
                 # prefixes (files() returns []), so fast paths iterating over them should
                 # be efficient (and correct).
                 def isexact(self):
                     return True
                 def prefix(self):
                     return True
                 def visitdir(self, dir):
                     return False
                 def visitchildrenset(self, dir):
                     return set()
                 def __repr__(self):
                     return r'<nevermatcher>'
             class predicatematcher(basematcher):
                 """A matcher adapter for a simple boolean function"""
                 def __init__(self, predfn, predrepr=None, badfn=None):
                     super(predicatematcher, self).__init__(badfn)
                     self.matchfn = predfn
                     self._predrepr = predrepr
                 @encoding.strmethod
                 def __repr__(self):
                     s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
                         self.matchfn
                     )
                     return b'<predicatenmatcher pred=%s>' % s
             def path_or_parents_in_set(path, prefix_set):
                 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
                 l = len(prefix_set)
                 if l == 0:
                     return False
                 if path in prefix_set:
                     return True
                 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
                 # "walk up" the directory hierarchy instead, with the assumption that most
                 # directory hierarchies are relatively shallow and hash lookup is cheap.
                 if l > 5:
                     return any(
                         parentdir in prefix_set for parentdir in pathutil.finddirs(path)
                     )
                 # FIXME: Ideally we'd never get to this point if this is the case - we'd
                 # recognize ourselves as an 'always' matcher and skip this.
                 if b'' in prefix_set:
                     return True
                 if pycompat.ispy3:
                     sl = ord(b'/')
                 else:
                     sl = '/'
                 # We already checked that path isn't in prefix_set exactly, so
                 # `path[len(pf)] should never raise IndexError.
                 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
             class patternmatcher(basematcher):
                 r"""Matches a set of (kind, pat, source) against a 'root' directory.
                 >>> kindpats = [
                 ...     (b're', br'.*\.c$', b''),
                 ...     (b'path', b'foo/a', b''),
                 ...     (b'relpath', b'b', b''),
                 ...     (b'glob', b'*.h', b''),
                 ... ]
                 >>> m = patternmatcher(b'foo', kindpats)
                 >>> m(b'main.c')  # matches re:.*\.c$
                 True
                 >>> m(b'b.txt')
                 False
                 >>> m(b'foo/a')  # matches path:foo/a
                 True
                 >>> m(b'a')  # does not match path:b, since 'root' is 'foo'
                 False
                 >>> m(b'b')  # matches relpath:b, since 'root' is 'foo'
                 True
                 >>> m(b'lib.h')  # matches glob:*.h
                 True
                 >>> m.files()
                 ['', 'foo/a', 'b', '']
                 >>> m.exact(b'foo/a')
                 True
                 >>> m.exact(b'b')
                 True
                 >>> m.exact(b'lib.h')  # exact matches are for (rel)path kinds
                 False
                 """
                 def __init__(self, root, kindpats, badfn=None):
                     super(patternmatcher, self).__init__(badfn)
                     self._files = _explicitfiles(kindpats)
                     self._prefix = _prefix(kindpats)
                     self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     if self._prefix and dir in self._fileset:
                         return b'all'
                     return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
                 def visitchildrenset(self, dir):
                     ret = self.visitdir(dir)
                     if ret is True:
                         return b'this'
                     elif not ret:
                         return set()
                     assert ret == b'all'
                     return b'all'
                 def prefix(self):
                     return self._prefix
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
             # This is basically a reimplementation of pathutil.dirs that stores the
             # children instead of just a count of them, plus a small optional optimization
             # to avoid some directories we don't need.
             class _dirchildren(object):
                 def __init__(self, paths, onlyinclude=None):
                     self._dirs = {}
                     self._onlyinclude = onlyinclude or []
                     addpath = self.addpath
                     for f in paths:
                         addpath(f)
                 def addpath(self, path):
                     if path == b'':
                         return
                     dirs = self._dirs
                     findsplitdirs = _dirchildren._findsplitdirs
                     for d, b in findsplitdirs(path):
                         if d not in self._onlyinclude:
                             continue
                         dirs.setdefault(d, set()).add(b)
                 @staticmethod
                 def _findsplitdirs(path):
                     # yields (dirname, basename) tuples, walking back to the root.  This is
                     # very similar to pathutil.finddirs, except:
                     #  - produces a (dirname, basename) tuple, not just 'dirname'
                     # Unlike manifest._splittopdir, this does not suffix `dirname` with a
                     # slash.
                     oldpos = len(path)
                     pos = path.rfind(b'/')
                     while pos != -1:
                         yield path[:pos], path[pos + 1 : oldpos]
                         oldpos = pos
                         pos = path.rfind(b'/', 0, pos)
                     yield b'', path[:oldpos]
                 def get(self, path):
                     return self._dirs.get(path, set())
             class includematcher(basematcher):
                 def __init__(self, root, kindpats, badfn=None):
                     super(includematcher, self).__init__(badfn)
                     if rustmod is not None:
                         # We need to pass the patterns to Rust because they can contain
                         # patterns from the user interface
                         self._kindpats = kindpats
                     self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
                     self._prefix = _prefix(kindpats)
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     # roots are directories which are recursively included.
                     self._roots = set(roots)
                     # dirs are directories which are non-recursively included.
                     self._dirs = set(dirs)
                     # parents are directories which are non-recursively included because
                     # they are needed to get to items in _dirs or _roots.
                     self._parents = parents
                 def visitdir(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     return (
                         dir in self._dirs
                         or dir in self._parents
                         or path_or_parents_in_set(dir, self._roots)
                     )
                 @propertycache
                 def _allparentschildren(self):
                     # It may seem odd that we add dirs, roots, and parents, and then
                     # restrict to only parents. This is to catch the case of:
                     #   dirs = ['foo/bar']
                     #   parents = ['foo']
                     # if we asked for the children of 'foo', but had only added
                     # self._parents, we wouldn't be able to respond ['bar'].
                     return _dirchildren(
                         itertools.chain(self._dirs, self._roots, self._parents),
                         onlyinclude=self._parents,
                     )
                 def visitchildrenset(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     # Note: this does *not* include the 'dir in self._parents' case from
                     # visitdir, that's handled below.
                     if (
                         b'' in self._roots
                         or dir in self._dirs
                         or path_or_parents_in_set(dir, self._roots)
                     ):
                         return b'this'
                     if dir in self._parents:
                         return self._allparentschildren.get(dir) or set()
                     return set()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
             class exactmatcher(basematcher):
                 r"""Matches the input files exactly. They are interpreted as paths, not
                 patterns (so no kind-prefixes).
                 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
                 >>> m(b'a.txt')
                 True
                 >>> m(b'b.txt')
                 False
                 Input files that would be matched are exactly those returned by .files()
                 >>> m.files()
                 ['a.txt', 're:.*\\.c$']
                 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
                 >>> m(b'main.c')
                 False
                 >>> m(br're:.*\.c$')
                 True
                 """
                 def __init__(self, files, badfn=None):
                     super(exactmatcher, self).__init__(badfn)
                     if isinstance(files, list):
                         self._files = files
                     else:
                         self._files = list(files)
                 matchfn = basematcher.exact
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     return dir in self._dirs
+                @propertycache
+                def _visitchildrenset_candidates(self):
+                    """A memoized set of candidates for visitchildrenset."""
+                    return self._fileset | self._dirs - {b''}
+                @propertycache
+                def _sorted_visitchildrenset_candidates(self):
+                    """A memoized sorted list of candidates for visitchildrenset."""
+                    return sorted(self._visitchildrenset_candidates)
                 def visitchildrenset(self, dir):
                     if not self._fileset or dir not in self._dirs:
                         return set()
-                    candidates = self._fileset | self._dirs - {b''}
+                    if dir == b'':
-                    if dir != b'':
+                        candidates = self._visitchildrenset_candidates
+                    else:
+                        candidates = self._sorted_visitchildrenset_candidates
                         d = dir + b'/'
-                        candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
+                        # Use bisect to find the first element potentially starting with d
+                        # (i.e. >= d). This should always find at least one element (we'll
+                        # assert later if this is not the case).
+                        first = bisect.bisect_left(candidates, d)
+                        # We need a representation of the first element that is > d that
+                        # does not start with d, so since we added a `/` on the end of dir,
+                        # we'll add whatever comes after slash (we could probably assume
+                        # that `0` is after `/`, but let's not) to the end of dir instead.
+                        dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
+                        # Use bisect to find the first element >= d_next
+                        last = bisect.bisect_left(candidates, dnext, lo=first)
+                        dlen = len(d)
+                        candidates = {c[dlen:] for c in candidates[first:last]}
                     # self._dirs includes all of the directories, recursively, so if
                     # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
                     # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     # '/' in it, indicating a it's for a subdir-of-a-subdir; the
                     # immediate subdir will be in there without a slash.
                     ret = {c for c in candidates if b'/' not in c}
                     # We really do not expect ret to be empty, since that would imply that
                     # there's something in _dirs that didn't have a file in _fileset.
                     assert ret
                     return ret
                 def isexact(self):
                     return True
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<exactmatcher files=%r>' % self._files
             class differencematcher(basematcher):
                 """Composes two matchers by matching if the first matches and the second
                 does not.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 def __init__(self, m1, m2):
                     super(differencematcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 def matchfn(self, f):
                     return self._m1(f) and not self._m2(f)
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         return [f for f in self._m1.files() if self(f)]
                     # If m1 is not an exact matcher, we can't easily figure out the set of
                     # files, because its files() are not always files. For example, if
                     # m1 is "path:dir" and m2 is "rootfileins:.", we don't
                     # want to remove "dir" from the set even though it would match m2,
                     # because the "dir" in m1 may not be a file.
                     return self._m1.files()
                 def visitdir(self, dir):
                     if self._m2.visitdir(dir) == b'all':
                         return False
                     elif not self._m2.visitdir(dir):
                         # m2 does not match dir, we can return 'all' here if possible
                         return self._m1.visitdir(dir)
                     return bool(self._m1.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m2_set = self._m2.visitchildrenset(dir)
                     if m2_set == b'all':
                         return set()
                     m1_set = self._m1.visitchildrenset(dir)
                     # Possible values for m1: 'all', 'this', set(...), set()
                     # Possible values for m2:        'this', set(...), set()
                     # If m2 has nothing under here that we care about, return m1, even if
                     # it's 'all'. This is a change in behavior from visitdir, which would
                     # return True, not 'all', for some reason.
                     if not m2_set:
                         return m1_set
                     if m1_set in [b'all', b'this']:
                         # Never return 'all' here if m2_set is any kind of non-empty (either
                         # 'this' or set(foo)), since m2 might return set() for a
                         # subdirectory.
                         return b'this'
                     # Possible values for m1:         set(...), set()
                     # Possible values for m2: 'this', set(...)
                     # We ignore m2's set results. They're possibly incorrect:
                     #  m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
                     #    m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
                     #    return set(), which is *not* correct, we still need to visit 'dir'!
                     return m1_set
                 def isexact(self):
                     return self._m1.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
             def intersectmatchers(m1, m2):
                 """Composes two matchers by matching if both of them match.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 """
                 if m1 is None or m2 is None:
                     return m1 or m2
                 if m1.always():
                     m = copy.copy(m2)
                     # TODO: Consider encapsulating these things in a class so there's only
                     # one thing to copy from m1.
                     m.bad = m1.bad
                     m.traversedir = m1.traversedir
                     return m
                 if m2.always():
                     m = copy.copy(m1)
                     return m
                 return intersectionmatcher(m1, m2)
             class intersectionmatcher(basematcher):
                 def __init__(self, m1, m2):
                     super(intersectionmatcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         m1, m2 = self._m1, self._m2
                         if not m1.isexact():
                             m1, m2 = m2, m1
                         return [f for f in m1.files() if m2(f)]
                     # It neither m1 nor m2 is an exact matcher, we can't easily intersect
                     # the set of files, because their files() are not always files. For
                     # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
                     # "path:dir2", we don't want to remove "dir2" from the set.
                     return self._m1.files() + self._m2.files()
                 def matchfn(self, f):
                     return self._m1(f) and self._m2(f)
                 def visitdir(self, dir):
                     visit1 = self._m1.visitdir(dir)
                     if visit1 == b'all':
                         return self._m2.visitdir(dir)
                     # bool() because visit1=True + visit2='all' should not be 'all'
                     return bool(visit1 and self._m2.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m1_set = self._m1.visitchildrenset(dir)
                     if not m1_set:
                         return set()
                     m2_set = self._m2.visitchildrenset(dir)
                     if not m2_set:
                         return set()
                     if m1_set == b'all':
                         return m2_set
                     elif m2_set == b'all':
                         return m1_set
                     if m1_set == b'this' or m2_set == b'this':
                         return b'this'
                     assert isinstance(m1_set, set) and isinstance(m2_set, set)
                     return m1_set.intersection(m2_set)
                 def always(self):
                     return self._m1.always() and self._m2.always()
                 def isexact(self):
                     return self._m1.isexact() or self._m2.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
             class subdirmatcher(basematcher):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> from . import pycompat
                 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
                 >>> m2 = subdirmatcher(b'sub', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'b.txt')
                 True
                 >>> m2.matchfn(b'a.txt')
                 False
                 >>> m2.matchfn(b'b.txt')
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact(b'b.txt')
                 True
                 >>> def bad(f, msg):
                 ...     print(pycompat.sysstr(b"%s: %s" % (f, msg)))
                 >>> m1.bad = bad
                 >>> m2.bad(b'x.txt', b'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     super(subdirmatcher, self).__init__()
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher.always()
                     self._files = [
                         f[len(path) + 1 :]
                         for f in matcher._files
                         if f.startswith(path + b"/")
                     ]
                     # If the parent repo had a path to this subrepo and the matcher is
                     # a prefix matcher, this submatcher always matches.
                     if matcher.prefix():
                         self._always = any(f == path for f in matcher._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + b"/" + f, msg)
                 def matchfn(self, f):
                     # Some information is lost in the superclass's constructor, so we
                     # can not accurately create the matching function for the subdirectory
                     # from the inputs. Instead, we override matchfn() and visitdir() to
                     # call the original matcher with the subdirectory path prepended.
                     return self._matcher.matchfn(self._path + b"/" + f)
                 def visitdir(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitdir(dir)
                 def visitchildrenset(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitchildrenset(dir)
                 def always(self):
                     return self._always
                 def prefix(self):
                     return self._matcher.prefix() and not self._always
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<subdirmatcher path=%r, matcher=%r>' % (
                         self._path,
                         self._matcher,
                     )
             class prefixdirmatcher(basematcher):
                 """Adapt a matcher to work on a parent directory.
                 The matcher's non-matching-attributes (bad, traversedir) are ignored.
                 The prefix path should usually be the relative path from the root of
                 this matcher to the root of the wrapped matcher.
                 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
                 >>> m2 = prefixdirmatcher(b'd/e', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'd/e/a.txt')
                 True
                 >>> m2(b'd/e/b.txt')
                 False
                 >>> m2.files()
                 ['d/e/a.txt', 'd/e/f/b.txt']
                 >>> m2.exact(b'd/e/a.txt')
                 True
                 >>> m2.visitdir(b'd')
                 True
                 >>> m2.visitdir(b'd/e')
                 True
                 >>> m2.visitdir(b'd/e/f')
                 True
                 >>> m2.visitdir(b'd/e/g')
                 False
                 >>> m2.visitdir(b'd/ef')
                 False
                 """
                 def __init__(self, path, matcher, badfn=None):
                     super(prefixdirmatcher, self).__init__(badfn)
                     if not path:
                         raise error.ProgrammingError(b'prefix path must not be empty')
                     self._path = path
                     self._pathprefix = path + b'/'
                     self._matcher = matcher
                 @propertycache
                 def _files(self):
                     return [self._pathprefix + f for f in self._matcher._files]
                 def matchfn(self, f):
                     if not f.startswith(self._pathprefix):
                         return False
                     return self._matcher.matchfn(f[len(self._pathprefix) :])
                 @propertycache
                 def _pathdirs(self):
                     return set(pathutil.finddirs(self._path))
                 def visitdir(self, dir):
                     if dir == self._path:
                         return self._matcher.visitdir(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitdir(dir[len(self._pathprefix) :])
                     return dir in self._pathdirs
                 def visitchildrenset(self, dir):
                     if dir == self._path:
                         return self._matcher.visitchildrenset(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
                     if dir in self._pathdirs:
                         return b'this'
                     return set()
                 def isexact(self):
                     return self._matcher.isexact()
                 def prefix(self):
                     return self._matcher.prefix()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<prefixdirmatcher path=%r, matcher=%r>' % (
                         pycompat.bytestr(self._path),
                         self._matcher,
                     )
             class unionmatcher(basematcher):
                 """A matcher that is the union of several matchers.
                 The non-matching-attributes (bad, traversedir) are taken from the first
                 matcher.
                 """
                 def __init__(self, matchers):
                     m1 = matchers[0]
                     super(unionmatcher, self).__init__()
                     self.traversedir = m1.traversedir
                     self._matchers = matchers
                 def matchfn(self, f):
                     for match in self._matchers:
                         if match(f):
                             return True
                     return False
                 def visitdir(self, dir):
                     r = False
                     for m in self._matchers:
                         v = m.visitdir(dir)
                         if v == b'all':
                             return v
                         r |= v
                     return r
                 def visitchildrenset(self, dir):
                     r = set()
                     this = False
                     for m in self._matchers:
                         v = m.visitchildrenset(dir)
                         if not v:
                             continue
                         if v == b'all':
                             return v
                         if this or v == b'this':
                             this = True
                             # don't break, we might have an 'all' in here.
                             continue
                         assert isinstance(v, set)
                         r = r.union(v)
                     if this:
                         return b'this'
                     return r
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<unionmatcher matchers=%r>' % self._matchers
             def patkind(pattern, default=None):
                 r"""If pattern is 'kind:pat' with a known kind, return kind.
                 >>> patkind(br're:.*\.c$')
                 're'
                 >>> patkind(b'glob:*.c')
                 'glob'
                 >>> patkind(b'relpath:test.py')
                 'relpath'
                 >>> patkind(b'main.py')
                 >>> patkind(b'main.py', default=b're')
                 're'
                 """
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if b':' in pattern:
                     kind, pat = pattern.split(b':', 1)
                     if kind in allpatternkinds:
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r"""Convert an extended glob string to a regexp string.
                 >>> from . import pycompat
                 >>> def bprint(s):
                 ...     print(pycompat.sysstr(s))
                 >>> bprint(_globre(br'?'))
                 .
                 >>> bprint(_globre(br'*'))
                 [^/]*
                 >>> bprint(_globre(br'**'))
                 .*
                 >>> bprint(_globre(br'**/a'))
                 (?:.*/)?a
                 >>> bprint(_globre(br'a/**/b'))
                 a/(?:.*/)?b
                 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
                 [a*?!^][\^b][^c]
                 >>> bprint(_globre(br'{a,b}'))
                 (?:a|b)
                 >>> bprint(_globre(br'.\*\?'))
                 \.\*\?
                 """
                 i, n = 0, len(pat)
                 res = b''
                 group = 0
                 escape = util.stringutil.regexbytesescapemap.get
                 def peek():
                     return i < n and pat[i : i + 1]
                 while i < n:
                     c = pat[i : i + 1]
                     i += 1
                     if c not in b'*?[{},\\':
                         res += escape(c, c)
                     elif c == b'*':
                         if peek() == b'*':
                             i += 1
                             if peek() == b'/':
                                 i += 1
                                 res += b'(?:.*/)?'
                             else:
                                 res += b'.*'
                         else:
                             res += b'[^/]*'
                     elif c == b'?':
                         res += b'.'
                     elif c == b'[':
                         j = i
                         if j < n and pat[j : j + 1] in b'!]':
                             j += 1
                         while j < n and pat[j : j + 1] != b']':
                             j += 1
                         if j >= n:
                             res += b'\\['
                         else:
                             stuff = pat[i:j].replace(b'\\', b'\\\\')
                             i = j + 1
                             if stuff[0:1] == b'!':
                                 stuff = b'^' + stuff[1:]
                             elif stuff[0:1] == b'^':
                                 stuff = b'\\' + stuff
                             res = b'%s[%s]' % (res, stuff)
                     elif c == b'{':
                         group += 1
                         res += b'(?:'
                     elif c == b'}' and group:
                         res += b')'
                         group -= 1
                     elif c == b',' and group:
                         res += b'|'
                     elif c == b'\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p, p)
                         else:
                             res += escape(c, c)
                     else:
                         res += escape(c, c)
                 return res
             def _regex(kind, pat, globsuffix):
                 """Convert a (normalized) pattern of any kind into a
                 regular expression.
                 globsuffix is appended to the regexp of globs."""
                 if not pat and kind in (b'glob', b'relpath'):
                     return b''
                 if kind == b're':
                     return pat
                 if kind in (b'path', b'relpath'):
                     if pat == b'.':
                         return b''
                     return util.stringutil.reescape(pat) + b'(?:/|$)'
                 if kind == b'rootfilesin':
                     if pat == b'.':
                         escaped = b''
                     else:
                         # Pattern is a directory name.
                         escaped = util.stringutil.reescape(pat) + b'/'
                     # Anything after the pattern must be a non-directory.
                     return escaped + b'[^/]+$'
                 if kind == b'relglob':
                     globre = _globre(pat)
                     if globre.startswith(b'[^/]*'):
                         # When pat has the form *XYZ (common), make the returned regex more
                         # legible by returning the regex for **XYZ instead of **/*XYZ.
                         return b'.*' + globre[len(b'[^/]*') :] + globsuffix
                     return b'(?:|.*/)' + globre + globsuffix
                 if kind == b'relre':
                     if pat.startswith(b'^'):
                         return pat
                     return b'.*' + pat
                 if kind in (b'glob', b'rootglob'):
                     return _globre(pat) + globsuffix
                 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
             def _buildmatch(kindpats, globsuffix, root):
                 """Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs."""
                 matchfuncs = []
                 subincludes, kindpats = _expandsubinclude(kindpats, root)
                 if subincludes:
                     submatchers = {}
                     def matchsubinclude(f):
                         for prefix, matcherargs in subincludes:
                             if f.startswith(prefix):
                                 mf = submatchers.get(prefix)
                                 if mf is None:
                                     mf = match(*matcherargs)
                                     submatchers[prefix] = mf
                                 if mf(f[len(prefix) :]):
                                     return True
                         return False
                     matchfuncs.append(matchsubinclude)
                 regex = b''
                 if kindpats:
                     if all(k == b'rootfilesin' for k, p, s in kindpats):
                         dirs = {p for k, p, s in kindpats}
                         def mf(f):
                             i = f.rfind(b'/')
                             if i >= 0:
                                 dir = f[:i]
                             else:
                                 dir = b'.'
                             return dir in dirs
                         regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
                         matchfuncs.append(mf)
                     else:
                         regex, mf = _buildregexmatch(kindpats, globsuffix)
                         matchfuncs.append(mf)
                 if len(matchfuncs) == 1:
                     return regex, matchfuncs[0]
                 else:
                     return regex, lambda f: any(mf(f) for mf in matchfuncs)
             MAX_RE_SIZE = 20000
             def _joinregexes(regexps):
                 """gather multiple regular expressions into a single one"""
                 return b'|'.join(regexps)
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function.
                 Test too large input
                 >>> _buildregexmatch([
                 ...     (b'relglob', b'?' * MAX_RE_SIZE, b'')
                 ... ], b'$')
                 Traceback (most recent call last):
                 ...
                 Abort: matcher pattern is too long (20009 bytes)
                 """
                 try:
                     allgroups = []
                     regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
                     fullregexp = _joinregexes(regexps)
                     startidx = 0
                     groupsize = 0
                     for idx, r in enumerate(regexps):
                         piecesize = len(r)
                         if piecesize > MAX_RE_SIZE:
                             msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
                             raise error.Abort(msg)
                         elif (groupsize + piecesize) > MAX_RE_SIZE:
                             group = regexps[startidx:idx]
                             allgroups.append(_joinregexes(group))
                             startidx = idx
                             groupsize = 0
                         groupsize += piecesize + 1
                     if startidx == 0:
                         matcher = _rematcher(fullregexp)
                         func = lambda s: bool(matcher(s))
                     else:
                         group = regexps[startidx:]
                         allgroups.append(_joinregexes(group))
                         allmatchers = [_rematcher(g) for g in allgroups]
                         func = lambda s: any(m(s) for m in allmatchers)
                     return fullregexp, func
                 except re.error:
                     for k, p, s in kindpats:
                         try:
                             _rematcher(_regex(k, p, globsuffix))
                         except re.error:
                             if s:
                                 raise error.Abort(
                                     _(b"%s: invalid pattern (%s): %s") % (s, k, p)
                                 )
                             else:
                                 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
                     raise error.Abort(_(b"invalid pattern"))
             def _patternrootsanddirs(kindpats):
                 """Returns roots and directories corresponding to each pattern.
                 This calculates the roots and directories exactly matching the patterns and
                 returns a tuple of (roots, dirs) for each. It does not return other
                 directories which may also need to be considered, like the parent
                 directories.
                 """
                 r = []
                 d = []
                 for kind, pat, source in kindpats:
                     if kind in (b'glob', b'rootglob'):  # find the non-glob prefix
                         root = []
                         for p in pat.split(b'/'):
                             if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
                                 break
                             root.append(p)
                         r.append(b'/'.join(root))
                     elif kind in (b'relpath', b'path'):
                         if pat == b'.':
                             pat = b''
                         r.append(pat)
                     elif kind in (b'rootfilesin',):
                         if pat == b'.':
                             pat = b''
                         d.append(pat)
                     else:  # relglob, re, relre
                         r.append(b'')
                 return r, d
             def _roots(kindpats):
                 '''Returns root directories to match recursively from the given patterns.'''
                 roots, dirs = _patternrootsanddirs(kindpats)
                 return roots
             def _rootsdirsandparents(kindpats):
                 """Returns roots and exact directories from patterns.
                 `roots` are directories to match recursively, `dirs` should
                 be matched non-recursively, and `parents` are the implicitly required
                 directories to walk to items in either roots or dirs.
                 Returns a tuple of (roots, dirs, parents).
                 >>> r = _rootsdirsandparents(
                 ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
                 ...      (b'glob', b'g*', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['g/h', 'g/h', ''], []) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 ([], ['g/h', '']) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
                 ...      (b'path', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['r', 'p/p', ''], []) ['', 'p']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
                 ...      (b'relre', b'rr', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['', '', ''], []) ['']
                 """
                 r, d = _patternrootsanddirs(kindpats)
                 p = set()
                 # Add the parents as non-recursive/exact directories, since they must be
                 # scanned to get to either the roots or the other exact directories.
                 p.update(pathutil.dirs(d))
                 p.update(pathutil.dirs(r))
                 # FIXME: all uses of this function convert these to sets, do so before
                 # returning.
                 # FIXME: all uses of this function do not need anything in 'roots' and
                 # 'dirs' to also be in 'parents', consider removing them before returning.
                 return r, d, p
             def _explicitfiles(kindpats):
                 """Returns the potential explicit filenames from the patterns.
                 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
                 ['foo/bar']
                 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
                 []
                 """
                 # Keep only the pattern kinds where one can specify filenames (vs only
                 # directory names).
                 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
                 return _roots(filable)
             def _prefix(kindpats):
                 '''Whether all the patterns match a prefix (i.e. recursively)'''
                 for kind, pat, source in kindpats:
                     if kind not in (b'path', b'relpath'):
                         return False
                 return True
             _commentre = None
             def readpatternfile(filepath, warn, sourceinfo=False):
                 """parse a pattern file, returning a list of
                 patterns. These patterns should be given to compile()
                 to be validated and converted into a match function.
                 trailing white space is dropped.
                 the escape character is backslash.
                 comments start with #.
                 empty lines are skipped.
                 lines can be of the following formats:
                 syntax: regexp # defaults following lines to non-rooted regexps
                 syntax: glob   # defaults following lines to non-rooted globs
                 re:pattern     # non-rooted regular expression
                 glob:pattern   # non-rooted glob
                 rootglob:pat   # rooted glob (same root as ^ in regexps)
                 pattern        # pattern of the current default type
                 if sourceinfo is set, returns a list of tuples:
                 (pattern, lineno, originalline).
                 This is useful to debug ignore patterns.
                 """
                 syntaxes = {
                     b're': b'relre:',
                     b'regexp': b'relre:',
                     b'glob': b'relglob:',
                     b'rootglob': b'rootglob:',
                     b'include': b'include',
                     b'subinclude': b'subinclude',
                 }
                 syntax = b'relre:'
                 patterns = []
                 fp = open(filepath, b'rb')
                 for lineno, line in enumerate(util.iterfile(fp), start=1):
                     if b"#" in line:
                         global _commentre
                         if not _commentre:
                             _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
                         # remove comments prefixed by an even number of escapes
                         m = _commentre.search(line)
                         if m:
                             line = line[: m.end(1)]
                         # fixup properly escaped comments that survived the above
                         line = line.replace(b"\\#", b"#")
                     line = line.rstrip()
                     if not line:
                         continue
                     if line.startswith(b'syntax:'):
                         s = line[7:].strip()
                         try:
                             syntax = syntaxes[s]
                         except KeyError:
                             if warn:
                                 warn(
                                     _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
                                 )
                         continue
                     linesyntax = syntax
                     for s, rels in pycompat.iteritems(syntaxes):
                         if line.startswith(rels):
                             linesyntax = rels
                             line = line[len(rels) :]
                             break
                         elif line.startswith(s + b':'):
                             linesyntax = rels
                             line = line[len(s) + 1 :]
                             break
                     if sourceinfo:
                         patterns.append((linesyntax + line, lineno, line))
                     else:
                         patterns.append(linesyntax + line)
                 fp.close()
                 return patterns